0001: /*
0002: * JacORB - a free Java ORB
0003: *
0004: * Copyright (C) 1997-2004 Gerald Brose.
0005: *
0006: * This library is free software; you can redistribute it and/or
0007: * modify it under the terms of the GNU Library General Public
0008: * License as published by the Free Software Foundation; either
0009: * version 2 of the License, or (at your option) any later version.
0010: *
0011: * This library is distributed in the hope that it will be useful,
0012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0014: * Library General Public License for more details.
0015: *
0016: * You should have received a copy of the GNU Library General Public
0017: * License along with this library; if not, write to the Free
0018: * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0019: */
0020:
0021: package org.jacorb.idl;
0022:
0023: import org.jacorb.idl.runtime.*;
0024:
0025: import java.util.*;
0026:
0027: /**
0028: * This class implements a scanner (aka lexical analyzer or
0029: * lexer) for IDL. The scanner reads characters from a global input
0030: * stream and returns integers corresponding to the terminal number
0031: * of the next token. Once the end of input is reached the EOF token
0032: * is returned on every subsequent call.<p>
0033: *
0034: * All symbol constants are defined in sym.java which is generated by
0035: * JavaCup from parser.cup.<p>
0036: *
0037: * In addition to the scanner proper (called first via init() then
0038: * with next_token() to get each token) this class provides simple
0039: * error and warning routines and keeps a count of errors and
0040: * warnings that is publicly accessible. It also provides basic
0041: * preprocessing facilties, i.e. it does handle preprocessor
0042: * directives such as #define, #undef, #include, etc. although it
0043: * does not provide full C++ preprocessing
0044: *
0045: * This class is "static" (i.e., it has only static members and methods).
0046: *
0047: * @version $Id: lexer.java,v 1.53 2006/10/13 19:56:48 andre.spiegel Exp $
0048: * @author Gerald Brose
0049: *
0050: */
0051:
0052: public class lexer {
0053: private static org.apache.log.Logger logger = parser.getLogger();
0054:
0055: /** First and second character of lookahead. */
0056: protected static int next_char;
0057: protected static int next_char2;
0058:
0059: /** EOF constant. */
0060: protected static final int EOF_CHAR = -1;
0061:
0062: /**
0063: * Table of keywords. Keywords are initially treated as
0064: * identifiers. Just before they are returned we look them up in
0065: * this table to see if they match one of the keywords. The
0066: * string of the name is the key here, which indexes Integer
0067: * objects holding the symbol number.
0068: */
0069:
0070: protected static Hashtable keywords = new Hashtable();
0071:
0072: /** Table of keywords, stored in lower case. Keys are the
0073: * lower case version of the keywords used as keys for the keywords
0074: * hash above, and the values are the case sensitive versions of
0075: * the keywords. This table is used for detecting collisions of
0076: * identifiers with keywords.
0077: */
0078: protected static Hashtable keywords_lower_case = new Hashtable();
0079:
0080: /** Table of Java reserved names.
0081: */
0082:
0083: protected static Hashtable java_keywords = new Hashtable();
0084:
0085: /** Table of single character symbols. For ease of implementation, we
0086: * store all unambiguous single character tokens in this table of Integer
0087: * objects keyed by Integer objects with the numerical value of the
0088: * appropriate char (currently Character objects have a bug which precludes
0089: * their use in tables).
0090: */
0091: protected static Hashtable char_symbols = new Hashtable(25);
0092:
0093: /** Defined symbols (preprocessor) */
0094:
0095: protected static Hashtable defines = new Hashtable();
0096: protected static boolean conditionalCompilation = true;
0097:
0098: /** nested #ifdefs are pushed on this stack by the "preprocessor" */
0099: private static java.util.Stack ifStack = new Stack();
0100:
0101: private static java.util.Stack tokenStack = new Stack();
0102:
0103: /** Current line number for use in error messages. */
0104: protected static int current_line = 1;
0105:
0106: /** Current line for use in error messages. */
0107: protected static StringBuffer line = new StringBuffer();
0108:
0109: /** Character position in current line. */
0110: protected static int current_position = 1;
0111:
0112: /** Have we already read a '"' ? */
0113: protected static boolean in_string = false;
0114:
0115: /** Are we processing a wide char or string ? */
0116: protected static boolean wide = false;
0117:
0118: /** Count of total errors detected so far. */
0119: static int error_count = 0;
0120:
0121: /** Count of warnings issued so far */
0122: public static int warning_count = 0;
0123:
0124: /** currently active pragma prefix */
0125: public static String currentPragmaPrefix = "";
0126:
0127: /** current file name */
0128: public static String currentFile = "";
0129:
0130: /** reset the scanner state */
0131:
0132: public static void reset() {
0133: current_position = 1;
0134: error_count = 0;
0135: warning_count = 0;
0136: currentPragmaPrefix = "";
0137: line = new StringBuffer();
0138: ifStack.removeAllElements();
0139: tokenStack.removeAllElements();
0140: defines.clear();
0141: }
0142:
0143: /**
0144: * Initialize the scanner. This sets up the keywords and char_symbols
0145: * tables and reads the first two characters of lookahead.
0146: *
0147: * "Object" is listed as reserved in the OMG spec.
0148: * "int" is not, but I reserved it to bar its usage as a legal integer
0149: * type.
0150: */
0151:
0152: public static void init() throws java.io.IOException {
0153: /* set up standard symbols */
0154: defines.put("JACORB_IDL_1_4", "");
0155:
0156: /* set up the keyword table */
0157:
0158: keywords.put("abstract", new Integer(sym.ABSTRACT));
0159: keywords.put("any", new Integer(sym.ANY));
0160: keywords.put("attribute", new Integer(sym.ATTRIBUTE));
0161: keywords.put("boolean", new Integer(sym.BOOLEAN));
0162: keywords.put("case", new Integer(sym.CASE));
0163: keywords.put("char", new Integer(sym.CHAR));
0164: keywords.put("const", new Integer(sym.CONST));
0165: keywords.put("context", new Integer(sym.CONTEXT));
0166: keywords.put("custom", new Integer(sym.CUSTOM));
0167: keywords.put("default", new Integer(sym.DEFAULT));
0168: keywords.put("double", new Integer(sym.DOUBLE));
0169: keywords.put("enum", new Integer(sym.ENUM));
0170: keywords.put("exception", new Integer(sym.EXCEPTION));
0171: keywords.put("factory", new Integer(sym.FACTORY));
0172: keywords.put("FALSE", new Integer(sym.FALSE));
0173: keywords.put("fixed", new Integer(sym.FIXED));
0174: keywords.put("float", new Integer(sym.FLOAT));
0175: keywords.put("getraises", new Integer(sym.GETRAISES));
0176: keywords.put("in", new Integer(sym.IN));
0177: keywords.put("inout", new Integer(sym.INOUT));
0178: keywords.put("interface", new Integer(sym.INTERFACE));
0179: keywords.put("local", new Integer(sym.LOCAL));
0180: keywords.put("long", new Integer(sym.LONG));
0181: keywords.put("module", new Integer(sym.MODULE));
0182: keywords.put("native", new Integer(sym.NATIVE));
0183: keywords.put("Object", new Integer(sym.OBJECT));
0184: keywords.put("octet", new Integer(sym.OCTET));
0185: keywords.put("oneway", new Integer(sym.ONEWAY));
0186: keywords.put("out", new Integer(sym.OUT));
0187: keywords.put("private", new Integer(sym.PRIVATE));
0188: keywords.put("public", new Integer(sym.PUBLIC));
0189: keywords.put("pseudo", new Integer(sym.PSEUDO));
0190: keywords.put("raises", new Integer(sym.RAISES));
0191: keywords.put("readonly", new Integer(sym.READONLY));
0192: keywords.put("sequence", new Integer(sym.SEQUENCE));
0193: keywords.put("setraises", new Integer(sym.SETRAISES));
0194: keywords.put("short", new Integer(sym.SHORT));
0195: keywords.put("string", new Integer(sym.STRING));
0196: keywords.put("struct", new Integer(sym.STRUCT));
0197: keywords.put("supports", new Integer(sym.SUPPORTS));
0198: keywords.put("switch", new Integer(sym.SWITCH));
0199: keywords.put("TRUE", new Integer(sym.TRUE));
0200: keywords.put("truncatable", new Integer(sym.TRUNCATABLE));
0201: keywords.put("typedef", new Integer(sym.TYPEDEF));
0202: keywords.put("unsigned", new Integer(sym.UNSIGNED));
0203: keywords.put("union", new Integer(sym.UNION));
0204: keywords.put("ValueBase", new Integer(sym.VALUEBASE));
0205: keywords.put("valuetype", new Integer(sym.VALUETYPE));
0206: keywords.put("void", new Integer(sym.VOID));
0207: keywords.put("wchar", new Integer(sym.WCHAR));
0208: keywords.put("wstring", new Integer(sym.WSTRING));
0209:
0210: keywords.put("::", new Integer(sym.DBLCOLON));
0211: keywords.put("<<", new Integer(sym.LSHIFT));
0212: keywords.put(">>", new Integer(sym.RSHIFT));
0213: keywords.put("L\"", new Integer(sym.LDBLQUOTE));
0214:
0215: // setup the mapping of lower case keywords to case sensitive
0216: // keywords
0217:
0218: for (java.util.Enumeration e = keywords.keys(); e
0219: .hasMoreElements();) {
0220: String keyword = (String) e.nextElement();
0221: String keyword_lower_case = keyword.toLowerCase();
0222: keywords_lower_case.put(keyword_lower_case, keyword);
0223: }
0224:
0225: /* set up the table of single character symbols */
0226: char_symbols.put(new Integer(';'), new Integer(sym.SEMI));
0227: char_symbols.put(new Integer(','), new Integer(sym.COMMA));
0228: char_symbols.put(new Integer('*'), new Integer(sym.STAR));
0229: char_symbols.put(new Integer('.'), new Integer(sym.DOT));
0230: char_symbols.put(new Integer(':'), new Integer(sym.COLON));
0231: char_symbols.put(new Integer('='), new Integer(sym.EQUALS));
0232: char_symbols.put(new Integer('+'), new Integer(sym.PLUS));
0233: char_symbols.put(new Integer('-'), new Integer(sym.MINUS));
0234: char_symbols.put(new Integer('{'), new Integer(sym.LCBRACE));
0235: char_symbols.put(new Integer('}'), new Integer(sym.RCBRACE));
0236: char_symbols.put(new Integer('('), new Integer(sym.LPAREN));
0237: char_symbols.put(new Integer(')'), new Integer(sym.RPAREN));
0238: char_symbols.put(new Integer('['), new Integer(sym.LSBRACE));
0239: char_symbols.put(new Integer(']'), new Integer(sym.RSBRACE));
0240: char_symbols.put(new Integer('<'), new Integer(sym.LESSTHAN));
0241: char_symbols
0242: .put(new Integer('>'), new Integer(sym.GREATERTHAN));
0243: char_symbols.put(new Integer('\''), new Integer(sym.QUOTE));
0244: char_symbols.put(new Integer('\"'), new Integer(sym.DBLQUOTE));
0245: char_symbols.put(new Integer('\\'), new Integer(sym.BSLASH));
0246: char_symbols.put(new Integer('^'), new Integer(sym.CIRCUM));
0247: char_symbols.put(new Integer('&'), new Integer(sym.AMPERSAND));
0248: char_symbols.put(new Integer('/'), new Integer(sym.SLASH));
0249: char_symbols.put(new Integer('%'), new Integer(sym.PERCENT));
0250: char_symbols.put(new Integer('~'), new Integer(sym.TILDE));
0251: char_symbols.put(new Integer('|'), new Integer(sym.BAR));
0252: char_symbols.put(new Integer(' '), new Integer(sym.SPACE));
0253:
0254: /* set up reserved Java names */
0255:
0256: java_keywords.put("abstract", "");
0257: java_keywords.put("boolean", "");
0258: java_keywords.put("break", "");
0259: java_keywords.put("byte", "");
0260: java_keywords.put("case", "");
0261: java_keywords.put("catch", "");
0262: java_keywords.put("char", "");
0263: java_keywords.put("class", "");
0264: java_keywords.put("const", "");
0265: java_keywords.put("continue", "");
0266: java_keywords.put("default", "");
0267: java_keywords.put("do", "");
0268: java_keywords.put("double", "");
0269: java_keywords.put("else", "");
0270: java_keywords.put("extends", "");
0271: java_keywords.put("false", "");
0272: java_keywords.put("final", "");
0273: java_keywords.put("finally", "");
0274: java_keywords.put("float", "");
0275: java_keywords.put("for", "");
0276: java_keywords.put("goto", "");
0277: java_keywords.put("if", "");
0278: java_keywords.put("implements", "");
0279: java_keywords.put("import", "");
0280: java_keywords.put("instanceof", "");
0281: java_keywords.put("int", "");
0282: java_keywords.put("interface", "");
0283: java_keywords.put("long", "");
0284: java_keywords.put("native", "");
0285: java_keywords.put("new", "");
0286: java_keywords.put("null", "");
0287: java_keywords.put("package", "");
0288: java_keywords.put("private", "");
0289: java_keywords.put("protected", "");
0290: java_keywords.put("public", "");
0291: java_keywords.put("return", "");
0292: java_keywords.put("short", "");
0293: java_keywords.put("static", "");
0294: java_keywords.put("super", "");
0295: java_keywords.put("switch", "");
0296: java_keywords.put("synchronized", "");
0297: java_keywords.put("true", "");
0298: java_keywords.put("this", "");
0299: java_keywords.put("throw", "");
0300: java_keywords.put("throws", "");
0301: java_keywords.put("transient", "");
0302: java_keywords.put("try", "");
0303: java_keywords.put("void", "");
0304: java_keywords.put("volatile", "");
0305: java_keywords.put("while", "");
0306:
0307: java_keywords.put("clone", "");
0308: java_keywords.put("equals", "");
0309: java_keywords.put("finalize", "");
0310: java_keywords.put("getClass", "");
0311: java_keywords.put("hashCode", "");
0312: java_keywords.put("notify", "");
0313: java_keywords.put("notifyAll", "");
0314: java_keywords.put("toString", "");
0315: java_keywords.put("wait", "");
0316:
0317: /* stack needs a topmost value */
0318: ifStack.push(new Boolean(true));
0319:
0320: /* read two characters of lookahead */
0321:
0322: try {
0323: next_char = GlobalInputStream.read();
0324: } catch (Exception e) {
0325: org.jacorb.idl.parser.fatal_error("Cannot read from file "
0326: + GlobalInputStream.currentFile().getAbsolutePath()
0327: + ", please check file name.", null);
0328: }
0329:
0330: if (next_char == EOF_CHAR)
0331: next_char2 = EOF_CHAR;
0332: else
0333: next_char2 = GlobalInputStream.read();
0334: }
0335:
0336: public static void define(String symbol, String value) {
0337: if (logger.isDebugEnabled())
0338: logger.debug("Defining: " + symbol + " as " + value);
0339: defines.put(symbol, value);
0340: }
0341:
0342: public static void undefine(String symbol) {
0343: if (logger.isDebugEnabled())
0344: logger.debug("Un-defining: " + symbol);
0345: defines.remove(symbol);
0346: }
0347:
0348: public static String defined(String symbol) {
0349: return (String) defines.get(symbol);
0350: }
0351:
0352: /**
0353: * record information about the last lexical scope so that it can be
0354: * restored later
0355: */
0356:
0357: public static int currentLine() {
0358: return current_line;
0359: }
0360:
0361: /**
0362: * return the current reading position
0363: */
0364:
0365: public static PositionInfo getPosition() {
0366: return new PositionInfo(current_line, current_position,
0367: currentPragmaPrefix, line.toString(), GlobalInputStream
0368: .currentFile());
0369: }
0370:
0371: public static void restorePosition(PositionInfo p) {
0372: current_line = p.line_no;
0373: currentPragmaPrefix = p.pragma_prefix;
0374: current_position = 0;
0375: }
0376:
0377: /**
0378: * Advance the scanner one character in the input stream. This moves
0379: * next_char2 to next_char and then reads a new next_char2.
0380: */
0381:
0382: protected static void advance() throws java.io.IOException {
0383: int old_char;
0384:
0385: old_char = next_char;
0386: next_char = next_char2;
0387: next_char2 = GlobalInputStream.read();
0388:
0389: line.append((char) old_char);
0390:
0391: /* count this */
0392:
0393: current_position++;
0394: if (old_char == '\n') {
0395: current_line++;
0396: current_position = 1;
0397: line = new StringBuffer();
0398: }
0399: }
0400:
0401: /**
0402: * Emit an error message. The message will be marked with both the
0403: * current line number and the position in the line. Error messages
0404: * are printed on standard error (System.err).
0405: * @param message the message to print.
0406: */
0407: public static void emit_error(String message) {
0408: if (parser.getLogger().isErrorEnabled()) {
0409: logger.error(GlobalInputStream.currentFile()
0410: .getAbsolutePath()
0411: + ", line: "
0412: + current_line
0413: + "("
0414: + current_position
0415: + "): "
0416: + message
0417: + "\n\t"
0418: + line.toString());
0419: }
0420: error_count++;
0421: }
0422:
0423: public static void emit_error(String message, str_token t) {
0424: if (t == null) {
0425: emit_error(message);
0426: } else {
0427: if (parser.getLogger().isErrorEnabled()) {
0428: logger.error(t.fileName + ", line:" + t.line_no + "("
0429: + t.char_pos + "): " + message + "\n\t"
0430: + t.line_val);
0431: }
0432: error_count++;
0433: }
0434: }
0435:
0436: /**
0437: * Emit a warning message. The message will be marked with both the
0438: * current line number and the position in the line. Messages are
0439: * printed on standard error (System.err).
0440: *
0441: * @param message the message to print.
0442: */
0443:
0444: public static void emit_warn(String message) {
0445: if (parser.getLogger().isWarnEnabled()) {
0446: logger.warn(message + " at " + current_line + "("
0447: + current_position + "): \"" + line.toString()
0448: + "\"");
0449: }
0450: warning_count++;
0451: }
0452:
0453: public static void emit_warn(String message, str_token t) {
0454: if (t == null) {
0455: emit_warn(message);
0456: } else {
0457: if (parser.getLogger().isWarnEnabled()) {
0458: logger.warn(" at " + t.fileName + ", line:" + t.line_no
0459: + "(" + t.char_pos + "): " + message + "\n\t"
0460: + t.line_val);
0461: }
0462:
0463: warning_count++;
0464: }
0465: }
0466:
0467: /**
0468: * Determine if a character is ok to start an id.
0469: * @param ch the character in question.
0470: */
0471: protected static boolean id_start_char(int ch) {
0472: return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
0473: || (ch == '_');
0474: }
0475:
0476: /**
0477: * Determine if a character is ok for the middle of an id.
0478: * @param ch the character in question.
0479: */
0480: protected static boolean id_char(int ch) {
0481: return id_start_char(ch) || (ch == '_')
0482: || (ch >= '0' && ch <= '9');
0483: }
0484:
0485: /**
0486: * Try to look up a single character symbol, returns -1 for not found.
0487: * @param ch the character in question.
0488: */
0489:
0490: protected static int find_single_char(int ch) {
0491: Integer result;
0492:
0493: result = (Integer) char_symbols.get(new Integer((char) ch));
0494: if (result == null)
0495: return -1;
0496: else
0497: return result.intValue();
0498: }
0499:
0500: /**
0501: * Handle swallowing up a comment. Both old style C and new style C++
0502: * comments are handled.
0503: */
0504: protected static void swallow_comment() throws java.io.IOException {
0505: /* next_char == '/' at this point */
0506:
0507: /* is it a traditional comment */
0508: if (next_char2 == '*') {
0509: /* swallow the opener */
0510: advance();
0511: advance();
0512:
0513: /* swallow the comment until end of comment or EOF */
0514: for (;;) {
0515: /* if its EOF we have an error */
0516: if (next_char == EOF_CHAR) {
0517: emit_error(
0518: "Specification file ends inside a comment",
0519: null);
0520: return;
0521: }
0522:
0523: /* if we can see the closer we are done */
0524: if (next_char == '*' && next_char2 == '/') {
0525: advance();
0526: advance();
0527: return;
0528: }
0529:
0530: /* otherwise swallow char and move on */
0531: advance();
0532: }
0533: }
0534:
0535: /* is its a new style comment */
0536: if (next_char2 == '/') {
0537: /* swallow the opener */
0538: advance();
0539: advance();
0540:
0541: /* swallow to '\n', '\f', or EOF */
0542: while (next_char != '\n' && next_char != '\f'
0543: && next_char != '\r' && next_char != EOF_CHAR) {
0544: advance();
0545: }
0546:
0547: return;
0548: }
0549:
0550: /* shouldn't get here, but... if we get here we have an error */
0551: emit_error("Malformed comment in specification -- ignored",
0552: null);
0553: advance();
0554: }
0555:
0556: /**
0557: * Preprocessor directives are handled here.
0558: */
0559:
0560: protected static void preprocess() throws java.io.IOException {
0561: for (;;) {
0562: /* if its EOF we have an error */
0563: if (next_char == EOF_CHAR) {
0564: emit_error(
0565: "Specification file ends inside a preprocessor directive",
0566: null);
0567: return;
0568: } else if (next_char != '#') {
0569: emit_error("expected #, got " + (char) next_char
0570: + " instead!", null);
0571: } else
0572: advance(); // skip '#'
0573:
0574: // the following is done to allow for # ifdef sloppiness
0575: while ((' ' == next_char) || ('\t' == next_char))
0576: advance();
0577:
0578: String dir = get_string();
0579:
0580: if (dir.equals("include")) {
0581: if (!conditionalCompilation)
0582: return;
0583: // Swallow between include and < or "
0584: swallow_whitespace();
0585: boolean useIncludePath = (next_char == '<');
0586: advance(); // skip `\"' or '<'
0587:
0588: String fname = get_string();
0589:
0590: if (useIncludePath && (next_char != '>'))
0591: emit_error("Syntax error in #include directive, expecting '>'");
0592: else if (!useIncludePath && (next_char != '\"'))
0593: emit_error("Syntax error in #include directive, expecting \"");
0594:
0595: /* swallow to '\n', '\f', or EOF */
0596: while (next_char != '\n' && next_char != '\f'
0597: && next_char != '\r' && next_char != EOF_CHAR) {
0598: advance();
0599: }
0600: GlobalInputStream.include(fname, next_char2,
0601: useIncludePath);
0602: current_line = 0;
0603: advance();
0604: advance();
0605: return;
0606: } else if (dir.equals("define")) {
0607: if (!conditionalCompilation)
0608: return;
0609: swallow_whitespace();
0610: String name = get_string();
0611: StringBuffer text = new StringBuffer();
0612: if (next_char == ' ') {
0613: advance();
0614: }
0615: while (next_char != '\n') {
0616: if (next_char == '\\') {
0617: advance();
0618: advance();
0619: }
0620: text.append((char) next_char);
0621: advance();
0622: }
0623: define(name, text.toString());
0624: } else if (dir.equals("error")) {
0625: if (!conditionalCompilation)
0626: return;
0627: advance(); // skip ' '
0628: String name = get_string();
0629: emit_error(name);
0630: } else if (dir.equals("undef")) {
0631: // Undefining symbol
0632: if (!conditionalCompilation)
0633: return;
0634: swallow_whitespace();
0635: String name = get_string();
0636: undefine(name);
0637: } else if (dir.equals("if") || dir.equals("elif")) {
0638: if (!dir.equals("elif")) {
0639: ifStack.push(new Boolean(conditionalCompilation));
0640: if (!conditionalCompilation)
0641: return;
0642: }
0643:
0644: swallow_whitespace();
0645:
0646: // the following snippet distinguishes between #if defined
0647: // and #if !defined
0648:
0649: boolean straightDefined = true;
0650: if ('!' == next_char) {
0651: advance();
0652: straightDefined = false;
0653: }
0654:
0655: String defineStr = get_string_no_paren();
0656:
0657: if (defineStr.equals("defined")) {
0658: swallow_whitespace();
0659:
0660: boolean brackets = ('(' == next_char);
0661: if (brackets) {
0662: advance(); // skip '('
0663: swallow_whitespace(); // any whitespace after ( ? skip it
0664: }
0665:
0666: String name = get_string_no_paren();
0667:
0668: if (brackets) {
0669: swallow_whitespace();
0670: if (logger.isDebugEnabled())
0671: logger.debug("next char: " + next_char);
0672:
0673: if (')' != next_char) {
0674: emit_error(
0675: "Expected ) terminating #if defined",
0676: null);
0677: return;
0678: }
0679: advance();
0680: }
0681:
0682: if (straightDefined)
0683: conditionalCompilation = (null != defined(name));
0684: else
0685: conditionalCompilation = (null == defined(name));
0686: } else if (defineStr.equals("0")) {
0687: conditionalCompilation = false;
0688: } else if (defineStr.equals("1")) {
0689: conditionalCompilation = true;
0690: } else {
0691: emit_error("Expected \"defined\" following #if: "
0692: + dir, null);
0693: return;
0694: }
0695: } else if (dir.equals("ifdef")) {
0696: ifStack.push(new Boolean(conditionalCompilation));
0697: if (!conditionalCompilation)
0698: return;
0699: swallow_whitespace();
0700: String name = get_string();
0701: conditionalCompilation = (defined(name) != null);
0702: } else if (dir.equals("ifndef")) {
0703: ifStack.push(new Boolean(conditionalCompilation));
0704: if (!conditionalCompilation)
0705: return;
0706: swallow_whitespace();
0707: String name = get_string();
0708: conditionalCompilation = (defined(name) == null);
0709: } else if (dir.equals("else")) {
0710: if (((Boolean) ifStack.peek()).booleanValue())
0711: conditionalCompilation = !conditionalCompilation;
0712: } else if (dir.equals("endif")) {
0713: boolean b = ((Boolean) ifStack.pop()).booleanValue();
0714: conditionalCompilation = b;
0715: } else if (dir.equals("pragma")) {
0716: if (!conditionalCompilation)
0717: return;
0718: swallow_whitespace();
0719:
0720: String name = get_string();
0721: if (name.equals("prefix")) {
0722: advance();
0723: currentPragmaPrefix = get_string();
0724: } else if (name.equals("version")) {
0725: advance(); // skip ' '
0726: String vname = get_string();
0727: advance(); // skip ' '
0728: String version = get_string();
0729: String existingVersion = (String) parser
0730: .currentScopeData().versionMap.get(vname);
0731: if (existingVersion == null) {
0732: // Set version
0733: parser.currentScopeData().versionMap.put(vname,
0734: version);
0735: } else {
0736: // Check for version change
0737: if (!existingVersion.equals(version)) {
0738: emit_error(
0739: "Version re-declaration with different value: #pragma version "
0740: + version, null);
0741: }
0742: }
0743: String iname = (String) parser.currentScopeData().idMap
0744: .get(vname);
0745: if (iname != null) {
0746: if (version.equals(iname.substring(1 + iname
0747: .lastIndexOf(':'))) == false) {
0748: emit_error(
0749: "Declaring version with different version to already declared ID for "
0750: + name, null);
0751: }
0752: }
0753: } else if (name.equals("ID")) {
0754: advance(); // skip ' '
0755: String iname = get_string();
0756: advance(); // skip ' '
0757: String id = get_string();
0758: String existingID = (String) parser
0759: .currentScopeData().idMap.get(iname);
0760: if (existingID == null) {
0761: // Set id
0762: parser.currentScopeData().idMap.put(iname, id);
0763: } else {
0764: // Check for id change
0765: if (!existingID.equals(id)) {
0766: emit_error(
0767: "ID re-declaration with different value: #pragma id "
0768: + id, null);
0769: }
0770: }
0771: if (parser.currentScopeData().versionMap.get(iname) != null) {
0772: if (((String) parser.currentScopeData().versionMap
0773: .get(iname)).equals(id.substring(1 + id
0774: .lastIndexOf(':'))) == false) {
0775: emit_error(
0776: "Declaring ID with different version to already declared version for "
0777: + name, null);
0778: }
0779: }
0780: } else if (name.equals("inhibit_code_generation")) {
0781: /* proprietary pragma of the JacORB IDL compiler */
0782: parser.setInhibitionState(true);
0783: // do something with it
0784: } else {
0785: emit_warn("Unknown pragma, ignoring: #pragma "
0786: + name, null);
0787: }
0788: } else {
0789: emit_error(
0790: "Unrecognized preprocessor directive " + dir,
0791: null);
0792: }
0793:
0794: /* swallow to '\n', '\f', or EOF */
0795: while (next_char != '\n' && next_char != '\f'
0796: && next_char != '\r' && next_char != EOF_CHAR) {
0797: advance();
0798: }
0799: return;
0800: }
0801: }
0802:
0803: // the following is used for parsing the #if defined(...) construct
0804:
0805: private static String get_string_no_paren()
0806: throws java.io.IOException {
0807: StringBuffer sb = new StringBuffer();
0808: char c = (char) next_char;
0809: while (c != ' ' && c != '\t' && c != '\r' && c != '\n'
0810: && c != '\f' && c != EOF_CHAR && c != '\"' && c != '<'
0811: && c != '>' && c != '(' && c != ')') {
0812: sb.append(c);
0813: advance();
0814: c = (char) next_char;
0815: }
0816: return sb.toString();
0817: }
0818:
0819: private static String get_string() throws java.io.IOException {
0820:
0821: StringBuffer sb = new StringBuffer("");
0822:
0823: if (next_char == '\"') {
0824: advance();
0825: while (next_char != '\"') {
0826: if (next_char == EOF_CHAR)
0827: emit_error("Unexpected EOF in string");
0828: sb.append((char) next_char);
0829: advance();
0830: }
0831: } else {
0832: while (next_char != ' ' && next_char != '\t'
0833: && next_char != '\r' && next_char != '\n'
0834: && next_char != '\f' && next_char != EOF_CHAR
0835: && next_char != '\"' && next_char != '<'
0836: && next_char != '>') {
0837: sb.append((char) next_char);
0838: advance();
0839: }
0840: }
0841: return sb.toString();
0842: }
0843:
0844: /**
0845: * Process an identifier.
0846: * <P>
0847: * Identifiers begin with a letter, underscore, or dollar sign,
0848: * which is followed by zero or more letters, numbers,
0849: * underscores or dollar signs. This routine returns a str_token
0850: * suitable for return by the scanner or null, if the string that
0851: * was read expanded to a symbol that was #defined. In this case,
0852: * the symbol is expanded in place
0853: */
0854:
0855: protected static token do_symbol() throws java.io.IOException {
0856: StringBuffer result = new StringBuffer();
0857: String result_str;
0858: Integer keyword_num = null;
0859: char buffer[] = new char[1];
0860:
0861: /* next_char holds first character of id */
0862: buffer[0] = (char) next_char;
0863: result.append(buffer, 0, 1);
0864: advance();
0865:
0866: /* collect up characters while they fit in id */
0867: while (id_char(next_char)) {
0868: buffer[0] = (char) next_char;
0869: result.append(buffer, 0, 1);
0870: advance();
0871: }
0872:
0873: /* extract a string */
0874: result_str = result.toString();
0875:
0876: /* try to look it up as a defined symbol... */
0877:
0878: String text = defined(result_str);
0879: if (text != null) {
0880: char[] next = { (char) next_char, (char) next_char2 };
0881: GlobalInputStream.insert(text + (new String(next)));
0882: advance(); // restore lookahead
0883: advance(); // restore lookahead
0884: return null;
0885: }
0886:
0887: // check if it's a keyword
0888: keyword_num = (Integer) keywords.get(result_str);
0889: if (keyword_num != null) {
0890: if (isScope(result_str)) {
0891: parser.openScope();
0892: }
0893: return new token(keyword_num.intValue());
0894: }
0895:
0896: // not a keyword, so treat as identifier after verifying
0897: // case sensitivity rules and prefacing with an _
0898: // if it collides with a Java keyword.
0899:
0900: result_str = checkIdentifier(result_str);
0901: if (null != result_str)
0902: return new str_token(sym.ID, result_str, getPosition(),
0903: GlobalInputStream.currentFile().getName());
0904: else
0905: return null;
0906: }
0907:
0908: private static boolean isScope(String keyword) {
0909: return (keyword.equals("module") || keyword.equals("interface")
0910: || keyword.equals("struct")
0911: || keyword.equals("exception") || keyword
0912: .equals("union")
0913: // keyword.equals("valuetype")
0914: );
0915: }
0916:
0917: /**
0918: * Checks whether Identifier str is legal and returns it. If the
0919: * identifier is escaped with a leading underscore, that
0920: * underscore is removed. If a the legal IDL identifier clashes
0921: * with a Java reserved word, an underscore is prepended.
0922: * <BR>
0923: * @param str - the IDL identifier <BR>
0924: * <BR>
0925: * Prints an error msg if the identifier collides with an IDL
0926: * keyword.
0927: */
0928:
0929: public static String checkIdentifier(String str) {
0930:
0931: if (logger.isInfoEnabled())
0932: logger.info("checking identifier " + str);
0933:
0934: /* if it is an escaped identifier, look it up as a keyword,
0935: otherwise remove the underscore. */
0936:
0937: if (str.charAt(0) == '_') {
0938: str = str.substring(1);
0939: } else {
0940: String colliding_keyword = null;
0941:
0942: if (org.jacorb.idl.parser.strict_names) {
0943: // check for name clashes strictly (i.e. case insensitive)
0944: colliding_keyword = (String) keywords_lower_case
0945: .get(str.toLowerCase());
0946: } else {
0947: // check for name clashes only loosely (i.e. case sensitive)
0948: colliding_keyword = (String) keywords.get(str);
0949: }
0950:
0951: if (colliding_keyword != null) {
0952: emit_error("Identifier " + str
0953: + " collides with keyword " + colliding_keyword
0954: + ".");
0955: return null;
0956: }
0957:
0958: }
0959:
0960: /* clashes with a Java reserved word? */
0961: if (needsJavaEscape(str)) {
0962: str = "_" + str;
0963: }
0964:
0965: return str;
0966: }
0967:
0968: /**
0969: * Only the most general name clashes with Java keywords
0970: * are caught here. Identifiers need to be checked again
0971: * at different other places in the compiler!
0972: */
0973:
0974: private static boolean needsJavaEscape(String s) {
0975: return (java_keywords.containsKey(s));
0976: }
0977:
0978: /**
0979: * called during the parse phase to catch clashes with
0980: * Java reserved words.
0981: */
0982:
0983: public static boolean strictJavaEscapeCheck(String s) {
0984: return ((!s.equals("Helper") && s.endsWith("Helper"))
0985: || (!s.equals("Holder") && s.endsWith("Holder"))
0986: || (!s.equals("Operations") && s.endsWith("Operations"))
0987: || (!s.equals("Package") && s.endsWith("Package"))
0988: || (!s.equals("POA") && s.endsWith("POA")) || (!s
0989: .equals("POATie") && s.endsWith("POATie")));
0990: }
0991:
0992: public static boolean needsJavaEscape(Module m) {
0993: String s = m.pack_name;
0994: if (logger.isDebugEnabled())
0995: logger.debug("checking module name " + s);
0996: return (strictJavaEscapeCheck(s));
0997: }
0998:
0999: /**
1000: * Return one token. This is the main external interface to the scanner.
1001: * It consumes sufficient characters to determine the next input token
1002: * and returns it.
1003: */
1004:
1005: public static token next_token() throws java.io.IOException {
1006: parser.set_included(GlobalInputStream.includeState());
1007: token result = real_next_token();
1008: return result;
1009: }
1010:
1011: private static void swallow_whitespace() throws java.io.IOException {
1012: /* look for white space */
1013: while (next_char == ' ' || next_char == '\t'
1014: || next_char == '\n' || next_char == '\f'
1015: || next_char == '\r') {
1016: /* advance past it and try the next character */
1017: advance();
1018: }
1019: }
1020:
1021: /**
1022: * The actual routine to return one token.
1023: *
1024: * @return token
1025: * @throws java.io.IOException
1026: */
1027:
1028: protected static token real_next_token() throws java.io.IOException {
1029: int sym_num;
1030:
1031: /* if we found more than a single token last time, these
1032: tokens were remembered on the tokenStack - return the first
1033: one here */
1034:
1035: if (!tokenStack.empty())
1036: return (token) tokenStack.pop();
1037:
1038: /* else */
1039:
1040: for (;;) {
1041: /* scan input until we return something */
1042: if (!in_string) {
1043: swallow_whitespace();
1044:
1045: /* look for preprocessor directives */
1046: if ((char) next_char == '#') {
1047: preprocess();
1048: continue;
1049: }
1050:
1051: /* look for a comment */
1052: if (next_char == '/'
1053: && (next_char2 == '*' || next_char2 == '/')) {
1054: /* swallow then continue the scan */
1055: swallow_comment();
1056: continue;
1057: }
1058:
1059: if (!conditionalCompilation) {
1060: advance();
1061: if (next_char == EOF_CHAR) {
1062: emit_error("EOF in conditional compilation!",
1063: null);
1064: return null;
1065: } else
1066: continue;
1067: }
1068:
1069: /* look for COLON or DBLCOLON */
1070: if (next_char == ':') {
1071: if (next_char2 == ':') {
1072: advance();
1073: advance();
1074: return new token(sym.DBLCOLON);
1075: } else {
1076: advance();
1077: return new token(sym.COLON);
1078: }
1079: }
1080:
1081: /* leading L for wide strings */
1082: if (next_char == 'L'
1083: && (next_char2 == '\"' || next_char2 == '\'')) {
1084: wide = true;
1085: advance();
1086: if (next_char2 == '\"') {
1087: advance();
1088: in_string = true;
1089: return new token(sym.LDBLQUOTE);
1090: }
1091: // wide char literal may follow, but detecting that
1092: // is done below.
1093: }
1094:
1095: /* look for Shifts */
1096: if (next_char == '<') {
1097: if (next_char2 == '<') {
1098: advance();
1099: advance();
1100: return new token(sym.LSHIFT);
1101: } else {
1102: advance();
1103: return new token(sym.LESSTHAN);
1104: }
1105: }
1106: if (next_char == '>') {
1107: if (next_char2 == '>') {
1108: advance();
1109: advance();
1110: return new token(sym.RSHIFT);
1111: } else {
1112: advance();
1113: return new token(sym.GREATERTHAN);
1114: }
1115: }
1116:
1117: /* leading 0: */
1118: /* Try to scan octal/hexadecimal numbers, might even find a float */
1119: if (next_char == '0') {
1120: long l_val = 0;
1121: long l_val_old = 0;
1122: int radix = 8;
1123: int digit = 0;
1124: advance();
1125:
1126: if (next_char == '.') {
1127: StringBuffer f_string = new StringBuffer("0.");
1128: advance();
1129:
1130: while (next_char >= '0' && next_char <= '9') {
1131: f_string.append((char) next_char);
1132: advance();
1133: }
1134:
1135: float f_val = (new Float(f_string.toString()))
1136: .floatValue();
1137: return new float_token(sym.FLOAT_NUMBER, f_val);
1138: } else {
1139: // See if hexadecimal value
1140:
1141: if (next_char == 'x' || next_char == 'X') {
1142: advance();
1143: radix = 16;
1144: }
1145:
1146: StringBuffer val = new StringBuffer("0");
1147: digit = Character
1148: .digit((char) next_char, radix);
1149: while (digit != -1) {
1150: val.append((char) next_char);
1151: advance();
1152: digit = Character.digit((char) next_char,
1153: radix);
1154: }
1155:
1156: String str = val.toString();
1157: try {
1158: return new int_token(sym.NUMBER, Integer
1159: .parseInt(str, radix));
1160: } catch (NumberFormatException ex) {
1161: try {
1162: return new long_token(sym.LONG_NUMBER,
1163: Long.parseLong(str, radix));
1164: } catch (NumberFormatException ex2) {
1165: emit_error("Invalid octal/hex value: "
1166: + str);
1167: }
1168: }
1169: return null;
1170: }
1171: }
1172:
1173: /* Try to scan integer, floating point or fixed point literals */
1174:
1175: if (isDigit(((char) next_char))
1176: || next_char == '.'
1177: || (next_char == '-' && isDigit(((char) next_char2)))) {
1178: StringBuffer value = new StringBuffer();
1179: StringBuffer fraction = null;
1180: int exp = 0;
1181:
1182: if (next_char == '-') {
1183: value.append((char) next_char);
1184: advance();
1185: }
1186: // Read integer part
1187: while (next_char >= '0' && next_char <= '9') {
1188: value.append((char) next_char);
1189: advance();
1190: }
1191:
1192: // Read fraction
1193: if (next_char == '.') {
1194: fraction = new StringBuffer();
1195: advance();
1196:
1197: while (next_char >= '0' && next_char <= '9') {
1198: fraction.append((char) next_char);
1199: advance();
1200: }
1201: }
1202:
1203: // Read exponent
1204: if (next_char == 'e' || next_char == 'E') {
1205: if (fraction == null)
1206: fraction = new StringBuffer();
1207:
1208: fraction.append('e');
1209: advance();
1210: if (next_char == '-' || next_char == '+') {
1211: fraction.append((char) next_char);
1212: advance();
1213: }
1214:
1215: while (next_char >= '0' && next_char <= '9') {
1216: fraction.append((char) next_char);
1217: advance();
1218: }
1219:
1220: if (fraction.length() == 1) {
1221: emit_error("Empty exponent in float/double.");
1222: continue;
1223: }
1224:
1225: return new float_token(sym.FLOAT_NUMBER, Float
1226: .valueOf(
1227: value.toString() + "."
1228: + fraction.toString())
1229: .floatValue());
1230: }
1231:
1232: if (next_char == 'd' || next_char == 'D') {
1233: advance();
1234: if (fraction == null)
1235: fraction = new StringBuffer();
1236:
1237: java.math.BigDecimal bi = new java.math.BigDecimal(
1238: value.toString() + "."
1239: + fraction.toString());
1240: return new fixed_token(sym.FIXED_NUMBER, bi);
1241:
1242: }
1243:
1244: if (fraction == null) {
1245: /* integer or long */
1246:
1247: token tok = null;
1248: String str = value.toString();
1249:
1250: try {
1251: tok = new int_token(sym.NUMBER, Integer
1252: .parseInt(str));
1253: } catch (NumberFormatException ex) {
1254: try {
1255: tok = new long_token(sym.LONG_NUMBER,
1256: Long.parseLong(str));
1257: } catch (NumberFormatException ex2) {
1258: try {
1259: // Not quite critical yet - lets try stuffing it into
1260: // a bigdecimal for later checking.
1261: tok = new fixed_token(
1262: sym.FIXED_NUMBER,
1263: new java.math.BigDecimal(
1264: str));
1265: } catch (NumberFormatException ex3) {
1266: emit_error("Invalid long value: "
1267: + str);
1268: }
1269: }
1270: }
1271:
1272: return tok;
1273: } else {
1274: try {
1275: float f = Float.valueOf(
1276: value.toString() + "."
1277: + fraction.toString())
1278: .floatValue();
1279: return new float_token(sym.FLOAT_NUMBER, f);
1280: } catch (NumberFormatException nf) {
1281: emit_error("Unexpected symbol: "
1282: + value.toString() + "."
1283: + fraction.toString());
1284: }
1285: }
1286: }
1287:
1288: /* look for a single character symbol */
1289: sym_num = find_single_char(next_char);
1290:
1291: /* upon an opening double quote, return the
1292: sym.DBLQUOTE token and continue scanning in the
1293: in_string branch */
1294:
1295: if ((char) next_char == '\"') {
1296: in_string = true;
1297: advance();
1298: return new token(sym.DBLQUOTE);
1299: }
1300:
1301: if ((char) next_char == '\'') {
1302: advance();
1303:
1304: token t = null;
1305:
1306: if (next_char == '\\') {
1307: // Now need to process escaped character.
1308:
1309: advance();
1310:
1311: if (isDigit((char) next_char)) {
1312: // Octal character
1313: char octal1 = '0';
1314: char octal2 = '0';
1315: char octal3 = (char) next_char;
1316:
1317: if (isDigit((char) next_char2)) {
1318: advance();
1319: octal2 = octal3;
1320: octal3 = (char) next_char;
1321:
1322: if (isDigit((char) next_char2)) {
1323: advance();
1324: octal1 = octal2;
1325: octal2 = octal3;
1326: octal3 = (char) next_char;
1327: }
1328: }
1329:
1330: t = new char_token(sym.CH, (char) Integer
1331: .parseInt(new String(new char[] {
1332: octal1, octal2, octal3 }),
1333: 8));
1334: } else if ((char) next_char == 'x') {
1335: // Hexadecimal character
1336: advance();
1337:
1338: char hex1 = '0';
1339: char hex2 = (char) next_char;
1340:
1341: if (isHexLetterOrDigit((char) next_char2)) {
1342: advance();
1343: hex1 = hex2;
1344: hex2 = (char) next_char;
1345: } else if ((char) next_char2 != '\'') {
1346: emit_error("Illegal hex character");
1347: return null;
1348: }
1349:
1350: t = new char_token(sym.CH, (char) Integer
1351: .parseInt(new String(new char[] {
1352: hex1, hex2 }), 16));
1353:
1354: } else if ((char) next_char == 'u') {
1355: if (wide == false) {
1356: emit_error("Unicode characters are only legal with wide character");
1357: return null;
1358: } else {
1359: // Hexadecimal character
1360: advance();
1361:
1362: char uni1 = '0';
1363: char uni2 = '0';
1364: char uni3 = '0';
1365: char uni4 = (char) next_char;
1366:
1367: if (isHexLetterOrDigit((char) next_char2)) {
1368: advance();
1369: uni3 = uni4;
1370: uni4 = (char) next_char;
1371:
1372: if (isHexLetterOrDigit((char) next_char2)) {
1373: advance();
1374: uni2 = uni3;
1375: uni3 = uni4;
1376: uni4 = (char) next_char;
1377:
1378: if (isHexLetterOrDigit((char) next_char2)) {
1379: advance();
1380: uni1 = uni2;
1381: uni2 = uni3;
1382: uni3 = uni4;
1383: uni4 = (char) next_char;
1384: } else if ((char) next_char2 != '\'') {
1385: emit_error("Illegal unicode character");
1386: return null;
1387: }
1388: } else if ((char) next_char2 != '\'') {
1389: emit_error("Illegal unicode character");
1390: return null;
1391: }
1392: } else if ((char) next_char2 != '\'') {
1393: emit_error("Illegal unicode character");
1394: return null;
1395: }
1396:
1397: t = new char_token(sym.CH,
1398: (char) Integer.parseInt(
1399: new String(new char[] {
1400: uni1, uni2,
1401: uni3, uni4 }),
1402: 16));
1403: }
1404: } else {
1405: switch (next_char) {
1406: case 'n': {
1407: t = new char_token(sym.CH, '\n');
1408: break;
1409: }
1410: case 't': {
1411: t = new char_token(sym.CH, '\t');
1412: break;
1413: }
1414: case 'v': {
1415: t = new char_token(sym.CH, '\013');
1416: break;
1417: }
1418: case 'b': {
1419: t = new char_token(sym.CH, '\b');
1420: break;
1421: }
1422: case 'r': {
1423: t = new char_token(sym.CH, '\r');
1424: break;
1425: }
1426: case 'f': {
1427: t = new char_token(sym.CH, '\f');
1428: break;
1429: }
1430: case 'a': {
1431: t = new char_token(sym.CH, '\007');
1432: break;
1433: }
1434: case '\\': {
1435: t = new char_token(sym.CH, '\\');
1436: break;
1437: }
1438: case '?': {
1439: t = new char_token(sym.CH, '?');
1440: break;
1441: }
1442: case '0': {
1443: t = new char_token(sym.CH, '\0');
1444: break;
1445: }
1446: case '\'': {
1447: t = new char_token(sym.CH, '\'');
1448: break;
1449: }
1450: case '\"': {
1451: t = new char_token(sym.CH, '\"');
1452: break;
1453: }
1454: default: {
1455: emit_error("Invalid escape symbol \'");
1456: return null;
1457: }
1458: }
1459: }
1460: } else {
1461: t = new char_token(sym.CH, (char) next_char);
1462: }
1463: advance();
1464:
1465: if ((char) next_char == '\'') {
1466: tokenStack.push(new token(sym.QUOTE));
1467: tokenStack.push(t);
1468: advance();
1469: } else {
1470: emit_error("Expecting closing \'");
1471: return null;
1472: }
1473: wide = false;
1474:
1475: return new token(sym.QUOTE);
1476: }
1477:
1478: if (sym_num != -1) {
1479: /* found one -- advance past it and return a token for it */
1480: advance();
1481:
1482: return new token(sym_num);
1483: }
1484:
1485: /* look for an id or keyword */
1486: if (id_start_char(next_char)) {
1487: token t = do_symbol();
1488: if (t != null)
1489: return t;
1490: else
1491: continue;
1492: }
1493:
1494: /* look for EOF */
1495: if (next_char == EOF_CHAR) {
1496: return new token(sym.EOF);
1497: }
1498: } else // in_string
1499: {
1500:
1501: /* empty string ? */
1502: if ((char) next_char == '\"') {
1503: in_string = false;
1504: advance();
1505: return new token(sym.DBLQUOTE);
1506: }
1507:
1508: StringBuffer result = new StringBuffer();
1509: char previous = ' ';
1510:
1511: /* collect up characters while they fit in id */
1512: while (true) {
1513: if (next_char == '\\') {
1514: // Remap those characters that have no equivilant in java
1515: switch (next_char2) {
1516: case 'a': {
1517: result.append("\\007");
1518: previous = 'a';
1519: advance();
1520: break;
1521: }
1522: case 'v': {
1523: result.append("\\013");
1524: previous = 'v';
1525: advance();
1526: break;
1527: }
1528: case '?': {
1529: result.append("?");
1530: previous = '?';
1531: advance();
1532: break;
1533: }
1534: // Replace \xA0 by octal equivilant
1535: case 'x': {
1536: advance();
1537: advance();
1538:
1539: // Now next_char will be A and next_char2 will be 0
1540: String octal = Integer
1541: .toOctalString(Integer
1542: .parseInt(
1543: new String(
1544: new char[] {
1545: (char) next_char,
1546: (char) next_char2 }),
1547: 16));
1548: if (octal.length() != 3) {
1549: if (octal.length() == 1) {
1550: octal = "0" + octal;
1551: }
1552: octal = "0" + octal;
1553: }
1554: result.append("\\" + octal);
1555: previous = (char) next_char2;
1556:
1557: advance();
1558: break;
1559: }
1560: case 'u': {
1561: if (wide == false) {
1562: emit_error("Unicode characters are only legal with wide strings");
1563: return null;
1564: } else {
1565: result.append((char) next_char);
1566: result.append((char) next_char2);
1567: advance();
1568: advance();
1569:
1570: char uni1 = (char) next_char;
1571: char uni2 = '0';
1572: char uni3 = '0';
1573: char uni4 = '0';
1574:
1575: if (isHexLetterOrDigit((char) next_char2)) {
1576: advance();
1577: uni2 = (char) next_char;
1578:
1579: if (isHexLetterOrDigit((char) next_char2)) {
1580: advance();
1581: uni3 = (char) next_char;
1582:
1583: if (isHexLetterOrDigit((char) next_char2)) {
1584: advance();
1585: uni4 = (char) next_char;
1586: } else {
1587: emit_error("Illegal unicode character");
1588: return null;
1589: }
1590: } else {
1591: emit_error("Illegal unicode character");
1592: return null;
1593: }
1594: } else {
1595: emit_error("Illegal unicode character");
1596: return null;
1597: }
1598:
1599: previous = uni4;
1600: result.append(uni1);
1601: result.append(uni2);
1602: result.append(uni3);
1603: result.append(uni4);
1604: }
1605: break;
1606: }
1607: default: {
1608: previous = (char) next_char;
1609: result.append((char) next_char);
1610: }
1611: }
1612: } else {
1613: previous = (char) next_char;
1614: result.append((char) next_char);
1615: }
1616: advance();
1617:
1618: // Handle backslash quote but exit if just quote
1619: if (((char) next_char) == '\"' && previous != '\\') {
1620: break;
1621: }
1622: }
1623: wide = false;
1624:
1625: String s = result.toString();
1626:
1627: /* build and return an id token with an attached string */
1628: return new org.jacorb.idl.str_token(sym.ID, s,
1629: getPosition(), GlobalInputStream.currentFile()
1630: .getName());
1631: }
1632:
1633: /* if we get here, we have an unrecognized character */
1634: emit_warn("Unrecognized character '"
1635: + new Character((char) next_char) + "'("
1636: + next_char + ") -- ignored");
1637:
1638: /* advance past it */
1639: advance();
1640: }
1641: }
1642:
1643: /**
1644: * Returns true if character is US ASCII 0-9
1645: *
1646: * @param c a value of type 'char'
1647: * @return a value of type 'boolean'
1648: */
1649: static boolean isDigit(char c) {
1650: boolean result = false;
1651:
1652: if (c >= '\u0030') {
1653: if (c <= '\u0039') {
1654: // Range 0030 [0] -> 0039 [9]
1655: result = true;
1656: }
1657: }
1658: return result;
1659: }
1660:
1661: /**
1662: * Returns true if character is US ASCII 0-9, a-f, A-F
1663: *
1664: * @param c a value of type 'char'
1665: * @return a value of type 'boolean'
1666: */
1667: private static boolean isHexLetterOrDigit(char c) {
1668: boolean result = false;
1669:
1670: if (c >= '\u0030') {
1671: if (c <= '\u0039') {
1672: // Range 0030 [0] -> 0039 [9]
1673: result = true;
1674: } else {
1675: if (c >= '\u0041') {
1676: if (c <= '\u0046') {
1677: // Range 0041 [A] -> 0046 [F]
1678: result = true;
1679: }
1680: if (c >= '\u0061') {
1681: if (c <= '\u0066') {
1682: // Range 0061 [a] -> 0066 [f]
1683: result = true;
1684: }
1685: }
1686: }
1687: }
1688: }
1689: return result;
1690: }
1691: }
|