001: package fri.patterns.interpreter.parsergenerator.lexer;
002:
003: import java.io.*;
004: import java.util.*;
005: import fri.util.collections.AggregatingHashtable;
006: import fri.patterns.interpreter.parsergenerator.Token;
007:
008: /**
009: Strategy is the way how alternative concurrent character consumers are applied to input.
010: It is used by <i>LexerImpl</i> and <i>ConsumerAlternatives</i>.
011: There are consumers that have a fixed start character and others that have not.
012: Consumers have different length of fixed start sequences.
013: Some consumers have a fixed length to scan, others (with repeatable rules) have not.
014: Consumers differ in their character sets, some having many possible characters,
015: others lesser, some sets might overlap those of other consumers.
016: Last but not least the Parser gives hints about currently expected tokens.
017: <p />
018: Following is the strategy implemented here:
019: <ul>
020: <li>Divide consumers in two groups: those with fixed start character and those without.</li>
021: <li>Sort both consumers groups by
022: <ol>
023: <li>by the variance of their start character (which may be a set), the less the better</li>
024: <li>their start sequence length (ends before first repeatable character), the longer the better</li>
025: <li>by their fixed start length (ends before first character set), the longer the better.</li>
026: </ol>
027: Sorting is done by <i>Consumer implements Comparable</i>
028: </li>
029: <li>The lookahead character chooses consumers with fixed start character.</li>
030: <li>If one matches, overlapping consumers without fixed start character are tried, when one scans longer, it wins.</li>
031: <li>If none matches, consumers without fixed start character are tried, by sort order.</li>
032: <li>If one matches, overlapping consumers without fixed start character are tried, when one scans longer, it wins.</li>
033: </ul>
034:
035: @author Fritz Ritzberger
036: */
037:
038: public class Strategy implements Serializable {
039: private boolean inited;
040: private AggregatingHashtable itemsWithStartChar = new AggregatingHashtable();
041: private List itemsWithoutStartChar = new ArrayList();
042: private AggregatingHashtable competitiveGroups = new AggregatingHashtable();
043: private boolean competeForLongestInput = true;
044:
045: public void setCompeteForLongestInput(boolean competeForLongestInput) {
046: this .competeForLongestInput = competeForLongestInput;
047: }
048:
049: /** Adds a Consumer to list of possible consumers. This consumer will read input to be ignored by the parser. */
050: public void addIgnoringConsumer(String symbol, Consumer cc) {
051: addConsumer(symbol, cc);
052: }
053:
054: /** Adds a Consumer to list of possible consumers producing valid tokens. */
055: public void addTokenConsumer(String symbol, Consumer cc) {
056: addConsumer(symbol, cc);
057: }
058:
059: /** Returns true if the passed terminal is already in list. */
060: public boolean hasTerminal(String terminal) {
061: for (Enumeration e = new ItemEnumerator(); e.hasMoreElements();)
062: if (((Item) e.nextElement()).getSymbol().equals(terminal))
063: return true;
064: return false;
065: }
066:
067: private void addConsumer(String symbol, Consumer cc) {
068: Item item = new Item(symbol, cc);
069: Character c = item.consumer.getStartCharacter();
070: if (c != null)
071: itemsWithStartChar.put(c, item);
072: else
073: itemsWithoutStartChar.add(item);
074: }
075:
076: /** Liefert die Items, die dem uebergebenen Start-Zeichen entsprechen wuerden. */
077: private List getItemsWithStartCharacter(int c) {
078: init();
079: return (List) itemsWithStartChar.get(new Character((char) c));
080: }
081:
082: /** Liefert die Items, die kein bestimmtes Start-Zeichen definieren. */
083: private List getItemsWithoutStartCharacter() {
084: init();
085: return itemsWithoutStartChar;
086: }
087:
088: // separate consumers with or without fixed start character, build hashtable for start characters
089: private void init() {
090: if (inited == false) {
091: inited = true;
092: // sort items with start character
093: for (Iterator it = itemsWithStartChar.entrySet().iterator(); it
094: .hasNext();) {
095: Map.Entry entry = (Map.Entry) it.next();
096: Collections.sort((List) entry.getValue());
097: }
098: // sort items without start character
099: Collections.sort(itemsWithoutStartChar);
100: }
101: }
102:
103: private List initCompetitors(Item candidate) {
104: List competitors = (List) competitiveGroups.get(candidate);
105: if (competitors != null) // already inited
106: if (competitors.get(0) instanceof Item) // valid competitor list
107: return competitors;
108: else
109: // was dummy object
110: return null;
111:
112: for (Enumeration e = new ItemEnumerator(); e.hasMoreElements();) { // search a competitor among all items
113: Item item = (Item) e.nextElement();
114: if (item != candidate
115: && item.consumer.overlaps(candidate.consumer)) {
116: //System.err.println("Found competitive consumers: candidate = "+candidate.consumer+", competitor = "+item.consumer);
117: competitiveGroups.put(candidate, item);
118: }
119: }
120:
121: competitors = (List) competitiveGroups.get(candidate);
122: if (competitors == null) // no competitors were found, mark candidate with dummy object
123: competitiveGroups.put(candidate, new Byte((byte) 0));
124:
125: return competitors;
126: }
127:
128: /**
129: Liefert null wenn kein consumer den input lesen kann, sonst den laengstmoeglichen gescannten Text.
130: @param input the input to read from
131: @param lookahead the first byte or character from input
132: @param expectedTokenSymbols expected token symbols (in key enumeration), can be null
133: */
134: public Item consume(InputText input, int lookahead,
135: Map expectedTokenSymbols) throws IOException {
136: // try all scan-items by sort order, indexed ones first
137: List[] allItems = new List[] {
138: getItemsWithStartCharacter(lookahead),
139: getItemsWithoutStartCharacter() };
140:
141: for (int j = 0; j < allItems.length; j++) {
142: List items = allItems[j];
143:
144: if (items != null) {
145: for (int i = 0; i < items.size(); i++) {
146: Item item = (Item) items.get(i);
147:
148: if (expectedTokenSymbols == null
149: || expectedTokenSymbols.get(item
150: .getSymbol()) != null) {
151: int startMark = input.getMark(); // save start position for concurrent consumers
152: ResultTree result = item.consume(input);
153:
154: if (result != null) { // consumer succeeded
155: if (competeForLongestInput) {
156: List competitors = initCompetitors(item);
157: if (competitors != null) {
158: int bestMark = input.getMark();
159: input.setMark(startMark);
160: item = checkCompetitiveGroups(
161: result, item, input,
162: competitors, bestMark,
163: expectedTokenSymbols); // returns item that scans longest
164: }
165: }
166: return item;
167: }
168: }
169: } // end for all items
170: } // end if item != null
171: } // end for both item groups
172:
173: if (expectedTokenSymbols != null) // when this was a run with hints,
174: return consume(input, lookahead, null); // now try without hints
175:
176: return null;
177: }
178:
179: // loop competitive group of passed Item (if existent), return given Item or an Item that scans longer
180: private Item checkCompetitiveGroups(ResultTree result, Item item,
181: InputText input, List competitors, int bestMark,
182: Map expectedTokenSymbols) throws IOException {
183: int max = bestMark - input.getMark();
184:
185: for (int i = 0; i < competitors.size(); i++) {
186: Item competitor = (Item) competitors.get(i);
187:
188: // let take part only if no expected symbols or is in expected symbols
189: if (expectedTokenSymbols != null
190: && expectedTokenSymbols.get(competitor.getSymbol()) == null)
191: continue;
192:
193: int mark = input.getMark(); // memorize current input mark
194:
195: ResultTree r = competitor.consume(input); // consume
196: int len = input.getMark() - mark;
197:
198: if (r != null && len > max) { // scanned longer
199: bestMark = input.getMark();
200: max = len;
201: item = competitor;
202: }
203:
204: input.setMark(mark); // reset for next candidate
205: }
206:
207: input.setMark(bestMark); // set mark forward to best scan result
208:
209: return item;
210: }
211:
212: /** Returns a human readable representation of the lists and maps within this strategy. */
213: public String toString() {
214: init();
215: StringBuffer sb = new StringBuffer();
216: sb.append(" Indexed list is: " + itemsWithStartChar.size()
217: + "\n");
218: for (Iterator it = itemsWithStartChar.entrySet().iterator(); it
219: .hasNext();) {
220: Map.Entry entry = (Map.Entry) it.next();
221: sb.append(" " + entry.getKey() + " -> "
222: + entry.getValue() + "\n");
223: }
224: sb.append(" Sorted unindexed list is: "
225: + itemsWithoutStartChar.size() + "\n");
226: for (int i = 0; i < itemsWithoutStartChar.size(); i++) {
227: sb.append(" " + itemsWithoutStartChar.get(i) + "\n");
228: }
229: return sb.toString();
230: }
231:
232: /**
233: The List item wrapper for toplevel consumers. Items can be sorted by their relevance.
234: They encapsulate a consumer, its token symbol and the consumed lexer result.
235: */
236: public static class Item implements Comparable, Serializable {
237: private String symbol;
238: private Consumer consumer;
239: private transient ResultTree result;
240:
241: public Item(String symbol, Consumer consumer) {
242: if (consumer == null)
243: throw new IllegalArgumentException(
244: "Got no character consumer for symbol "
245: + symbol);
246: if (symbol == null)
247: throw new IllegalArgumentException(
248: "Got no symbol for consumer " + consumer);
249:
250: this .symbol = symbol;
251: this .consumer = consumer;
252: }
253:
254: /** Consumes from input by delegating to contained consumer, stores the result. */
255: public ResultTree consume(InputText input) throws IOException {
256: return result = consumer.consume(input);
257: }
258:
259: /** Returns the lexer item symbol, enclosed in `backquotes` when not a literal. */
260: public String getSymbol() {
261: return symbol;
262: }
263:
264: /** Returns the token symbol, always enclosed in some quotes. */
265: public String getTokenIdentifier() {
266: return Token.isTerminal(symbol) ? symbol
267: : Token.COMMAND_QUOTE + symbol
268: + Token.COMMAND_QUOTE;
269: }
270:
271: public ResultTree getResultTree() { // this is for ConsumerAlternatives
272: return result;
273: }
274:
275: public String toString() {
276: return "{" + symbol + "} " + consumer.toString();
277: }
278:
279: /** Implements Comparable: delegates to character consumer. If they are equal, "terminals" are preferred. */
280: public int compareTo(Object o) {
281: Consumer cc1 = consumer;
282: Consumer cc2 = ((Item) o).consumer;
283: int i;
284: if ((i = cc1.compareTo(cc2)) != 0)
285: return i;
286: return Token.isTerminal(symbol) ? -1 : 1;
287: }
288:
289: /** Compares the contained consumer with other via "==". */
290: public boolean equals(Object o) {
291: return ((Item) o).consumer == consumer;
292: }
293:
294: /** Returns the consumers hashcode. */
295: public int hashCode() {
296: return consumer.hashCode();
297: }
298: }
299:
300: // enumeration over all strategy items (toplevel consumers)
301: private class ItemEnumerator implements Enumeration {
302: private Iterator it, it1, it2;
303:
304: public ItemEnumerator() {
305: it = itemsWithStartChar.entrySet().iterator();
306: it1 = it.hasNext() ? ((List) ((Map.Entry) it.next())
307: .getValue()).iterator() : null;
308: it2 = itemsWithoutStartChar.iterator();
309: }
310:
311: public boolean hasMoreElements() {
312: return it.hasNext() || it1 != null && it1.hasNext()
313: || it2.hasNext();
314: }
315:
316: public Object nextElement() {
317: if (it1 != null)
318: if (it1.hasNext())
319: return it1.next();
320: else if (it.hasNext())
321: return (it1 = ((List) ((Map.Entry) it.next())
322: .getValue()).iterator()).next();
323:
324: if (it2.hasNext())
325: return it2.next();
326:
327: throw new IllegalStateException(
328: "Do not call nextElement() when hasMoreElements() returned false!");
329: }
330: }
331:
332: }
|