001: /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
002: * This file is part of Beaver Parser Generator. *
003: * Copyright (C) 2003,2004 Alexander Demenchuk <alder@softanvil.com>. *
004: * All rights reserved. *
005: * See the file "LICENSE" for the terms and conditions for copying, *
006: * distribution and modification of Beaver. *
007: * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
008:
009: package beaver;
010:
011: import java.io.ByteArrayInputStream;
012: import java.io.DataInputStream;
013: import java.io.IOException;
014: import java.io.InputStream;
015: import java.util.zip.InflaterInputStream;
016:
017: /**
018: * Parsing Tables
019: */
020: public final class ParsingTables {
021: /** A table with all actions */
022: private final short[] actions;
023:
024: /**
025: * A table containing the lookahead for each entry in "actions" table.
026: * Used to detect "collisions".
027: */
028: final short[] lookaheads;
029:
030: /**
031: * For each state, the offset into "actions" table that is used to find action for a terminal
032: * that has been fetched from the scanner.
033: */
034: final int[] actn_offsets;
035:
036: /**
037: * For each state, the offset into "actions" table that is used to find a next parser's state
038: * using a nonterminal that has been created by a reduced production.
039: */
040: private final int[] goto_offsets;
041:
042: /** Default action for each state */
043: private final short[] default_actions;
044:
045: /**
046: * A table with encoded production information.
047: * <p/>
048: * Each slot in this table is a "structure":
049: * <pre>
050: * short lhs_symbol_id ; // Symbol on the left-hand side of the production
051: * short rhs_length ; // Number of right-hand side symbols in the production
052: * </pre>
053: * where lhs_symbol_id uses high 16 bit of this structure, and rhs_length - lower 16 bits
054: */
055: final int[] rule_infos;
056:
057: /** ID of the "error" nonterminal */
058: final short error_symbol_id;
059:
060: /** Indicates whether action tables were compressed. */
061: final boolean compressed;
062:
063: /** Number of terminal symbols. */
064: final int n_term;
065:
066: public ParsingTables(Class impl_class) {
067: this (getSpecAsResourceStream(impl_class));
068: }
069:
070: /**
071: * Ensures that parser tables are loaded.
072: *
073: * @param impl_class class of the instance of the Parser
074: */
075: public ParsingTables(String spec) {
076: this (new ByteArrayInputStream(decode(spec)));
077: }
078:
079: private ParsingTables(InputStream in) {
080: try {
081: DataInputStream data = new DataInputStream(
082: new InflaterInputStream(in));
083: try {
084: int len = data.readInt();
085: actions = new short[len];
086: for (int i = 0; i < len; i++) {
087: actions[i] = data.readShort();
088: }
089: lookaheads = new short[len];
090: for (int i = 0; i < len; i++) {
091: lookaheads[i] = data.readShort();
092: }
093:
094: len = data.readInt();
095: actn_offsets = new int[len];
096: for (int i = 0; i < len; i++) {
097: actn_offsets[i] = data.readInt();
098: }
099: goto_offsets = new int[len];
100: for (int i = 0; i < len; i++) {
101: goto_offsets[i] = data.readInt();
102: }
103:
104: len = data.readInt();
105: compressed = len != 0;
106: if (compressed) {
107: default_actions = new short[len];
108: for (int i = 0; i < len; i++) {
109: default_actions[i] = data.readShort();
110: }
111: } else {
112: default_actions = null;
113: }
114:
115: int min_nt_id = Integer.MAX_VALUE;
116: len = data.readInt();
117: rule_infos = new int[len];
118: for (int i = 0; i < len; i++) {
119: rule_infos[i] = data.readInt();
120: min_nt_id = Math.min(min_nt_id,
121: rule_infos[i] >>> 16);
122: }
123: n_term = min_nt_id;
124:
125: error_symbol_id = data.readShort();
126: } finally {
127: data.close();
128: }
129: } catch (IOException e) {
130: throw new IllegalStateException(
131: "cannot initialize parser tables: "
132: + e.getMessage());
133: }
134: }
135:
136: /**
137: * Scans lookaheads expected in a given state for a terminal symbol.
138: * Used in error recovery when an unexpected terminal is replaced with one that is expected.
139: *
140: * @param state in which error occured
141: * @return ID of the expected terminal symbol or -1 if there is none
142: */
143: final short findFirstTerminal(int state) {
144: int offset = actn_offsets[state];
145: for (short term_id = offset < 0 ? (short) -offset : 0; term_id < n_term; term_id++) {
146: int index = offset + term_id;
147: if (index >= lookaheads.length)
148: break;
149: if (lookaheads[index] == term_id)
150: return term_id;
151: }
152: return -1;
153: }
154:
155: /**
156: * Find the appropriate action for a parser in a given state with a specified terminal look-ahead.
157: *
158: * @param state of a parser
159: * @param lookahead
160: * @return parser action
161: */
162: final short findParserAction(int state, short lookahead) {
163: int index = actn_offsets[state];
164: if (index != UNUSED_OFFSET) {
165: index += lookahead;
166: if (0 <= index && index < actions.length
167: && lookaheads[index] == lookahead) {
168: return actions[index];
169: }
170: }
171: return compressed ? default_actions[state] : 0;
172: }
173:
174: /**
175: * Find the appropriate action for a parser in a given state with a specified nonterminal look-ahead.
176: * In this case the only possible outcomes are either a state to shift to or an accept action.
177: *
178: * @param state of a parser
179: * @param lookahead
180: * @return parser action
181: */
182: final short findNextState(int state, short lookahead) {
183: int index = goto_offsets[state];
184: if (index != UNUSED_OFFSET) {
185: index += lookahead;
186: if (0 <= index && index < actions.length
187: && lookaheads[index] == lookahead) {
188: return actions[index];
189: }
190: }
191: return compressed ? default_actions[state] : 0;
192: }
193:
194: static final int UNUSED_OFFSET = Integer.MIN_VALUE;
195:
196: static byte[] decode(String spec) {
197: char[] chars = spec.toCharArray();
198: if (chars.length % 4 != 0)
199: throw new IllegalArgumentException("corrupted encoding");
200: int len = chars.length / 4 * 3;
201: byte[] bytes = new byte[chars[chars.length - 1] == '=' ? chars[chars.length - 2] == '=' ? len - 2
202: : len - 1
203: : len];
204:
205: len -= 3;
206: int ci = 0, bi = 0;
207: while (bi < len) {
208: int acc = decode(chars[ci++]) << 18
209: | decode(chars[ci++]) << 12
210: | decode(chars[ci++]) << 6 | decode(chars[ci++]);
211: bytes[bi++] = (byte) (acc >> 16);
212: bytes[bi++] = (byte) (acc >> 8 & 0xFF);
213: bytes[bi++] = (byte) (acc & 0xFF);
214: }
215: int acc = decode(chars[ci++]) << 18 | decode(chars[ci++]) << 12
216: | decode(chars[ci++]) << 6 | decode(chars[ci++]);
217: bytes[bi++] = (byte) (acc >> 16);
218: if (bi < bytes.length) {
219: bytes[bi++] = (byte) (acc >> 8 & 0xFF);
220: if (bi < bytes.length) {
221: bytes[bi++] = (byte) (acc & 0xFF);
222: }
223: }
224: return bytes;
225: }
226:
227: static int decode(char c) {
228: if (c <= '9') {
229: if (c >= '0')
230: return c - '0';
231: if (c == '#')
232: return 62;
233: if (c == '$')
234: return 63;
235: } else if (c <= 'Z') {
236: if (c >= 'A')
237: return c - 'A' + 10;
238: if (c == '=')
239: return 0;
240: } else if ('a' <= c && c <= 'z')
241: return c - 'a' + 36;
242: throw new IllegalStateException("illegal encoding character '"
243: + c + "'");
244: }
245:
246: static InputStream getSpecAsResourceStream(Class impl_class) {
247: String name = impl_class.getName();
248: name = name.substring(name.lastIndexOf('.') + 1) + ".spec";
249: InputStream spec_stream = impl_class.getResourceAsStream(name);
250: if (spec_stream == null)
251: throw new IllegalStateException(
252: "parser specification not found");
253: return spec_stream;
254: }
255: }
|