001: package fri.patterns.interpreter.parsergenerator;
002:
003: /**
004: Lexer-Parser communication struct. Utility methods defining EPSILON and terminals.
005: Definition of all global constants for parsergenerator packages.
006: <p>
007: This class defines special token identifiers: "token" (<i>Token.TOKEN</i>) and
008: "ignored" (<i>Token.IGNORED</i>), which are needed when specifiying a syntax with
009: mixed parser and lexer rules.
010: <p>
011: This class defines special symbols needed to define character sets with <i>Token.UPTO</i>
012: (e.g. A..Z) and intersections with <i>Token.BUTNOT</i> (e.g. `char` - `newline`).
013:
014: @author (c) 2000, Fritz Ritzberger
015: */
016:
017: public class Token {
018: /** This special token symbol means "end of input" to the Parser, which stops calling <i>getNextToken()</i> then. */
019: public static final String EPSILON = "\"EoI\"";
020:
021: /** Delimiter for literal terminals. */
022: public static final char STRING_QUOTE = '"';
023:
024: /** Delimiter for literal terminal. */
025: public static final char CHAR_QUOTE = '\'';
026:
027: /** Delimiter for lexer rules (terminal). */
028: public static final char COMMAND_QUOTE = '`';
029:
030: /** Symbol used to define lexer character sets, e.g. <i>a..z</i>. */
031: public static final String UPTO = "..";
032:
033: /** Symbol used to define lexer character set intersections, e.g. <i>char - "/*" - "//"</i>. */
034: public static final String BUTNOT = "-";
035:
036: /** Reserved identifier that marks nonterminals the lexer should deliver to parser. */
037: public static final String TOKEN = "token";
038:
039: /** Reserved identifier that marks tokens not to deliver to parser, e.g. <i>ignored ::= spaces ;</i>. */
040: public static final String IGNORED = "ignored";
041:
042: /** The character used to mark artificial nonterminal (e.g. made from "prolog?"). Every artificial rule starts with it. */
043: public static final String ARTIFICIAL_NONTERMINAL_START_CHARACTER = "_";
044:
045: /** The syntax symbol this Token represents. */
046: public final String symbol;
047: /** The text that was scanned for this Token. */
048: public final Object text;
049: /** The start and end Address of this Token. */
050: public final Range range;
051:
052: /** Address stores input line number (1-n), column (0-n) and character/byte offset (0-n). */
053: public static class Address implements Comparable {
054: /** The line number (1-n). */
055: public final int line;
056: /** The column number (0-n). */
057: public final int column;
058: /** The character/byte offset (0-n). */
059: public final int offset;
060:
061: public Address() {
062: this (1, 0, 0);
063: }
064:
065: public Address(int line, int column, int offset) {
066: this .line = line;
067: this .column = column;
068: this .offset = offset;
069: }
070:
071: public String toString() {
072: return line + "/" + column;
073: }
074:
075: public boolean equals(Object o) {
076: return offset == ((Address) o).offset;
077: }
078:
079: public int hashCode() {
080: return offset;
081: }
082:
083: public int compareTo(Object o) {
084: return offset - ((Address) o).offset;
085: }
086: }
087:
088: /** Range stores start and end Address of a token. */
089: public static class Range implements Comparable {
090: /** The start Address of this Token, pointing to the first character/byte. */
091: public final Address start;
092: /** The end Address of this Token, one after the last character/byte. */
093: public final Address end;
094:
095: public Range(Address start, Address end) {
096: this .start = start != null ? start : new Address();
097: this .end = end != null ? end : new Address();
098: }
099:
100: public boolean equals(Object o) {
101: return start.equals(((Range) o).start)
102: && end.equals(((Range) o).end);
103: }
104:
105: public int hashCode() {
106: return start.hashCode() + end.hashCode();
107: }
108:
109: public String toString() {
110: return start + "-" + end;
111: }
112:
113: public int compareTo(Object o) {
114: return start.compareTo(((Range) o).start)
115: + end.compareTo(((Range) o).end);
116: }
117: }
118:
119: public Token(String symbol, Object text, Range range) {
120: this .symbol = symbol;
121: this .text = text;
122: this .range = range;
123: }
124:
125: /**
126: Epsilon means end of input, EOF, no more bytes available.
127: @return true if passed token is the EPSILON-symbol.
128: */
129: public static boolean isEpsilon(Token token) {
130: return isEpsilon(token.symbol);
131: }
132:
133: /**
134: Epsilon means end of input, no more bytes available.
135: @return true if Token symbol not null and is the EPSILON-symbol.
136: */
137: public static boolean isEpsilon(String symbol) {
138: return symbol != null && symbol == EPSILON;
139: }
140:
141: /**
142: Distinction of 'terminals' and nonterminals:
143: terminals are either starting with digit or are enclosed in quotes '"` or equal to EPSILON.
144: */
145: public static boolean isTerminal(String symbol) {
146: char c = symbol.charAt(0);
147: return c == STRING_QUOTE || c == CHAR_QUOTE
148: || c == COMMAND_QUOTE || Character.isDigit(c)
149: || Token.isEpsilon(symbol);
150: }
151:
152: }
|