001: package fri.patterns.interpreter.parsergenerator.lexer;
002:
003: import java.util.*;
004: import java.io.IOException;
005: import java.io.Serializable;
006: import fri.patterns.interpreter.parsergenerator.Token;
007: import fri.patterns.interpreter.parsergenerator.syntax.Rule;
008:
009: /**
010: Consuming characters (or bytes) means reading from an input stream as long
011: as the read characters match the pattern the consumer was built for.
012: A consumer can be built with a fixed string, a character set, other
013: character consumers, or a mixed sequence of those. It can be a list of
014: alternating (parallel) consumers. It can be set repeatable and nullable.
015: A character consumer succeeds when he could apply all consumers or
016: patterns/sets stored in its sequence when reading input. It fails and
017: unreads all characters when one non-nullable consumer in its sequence fails.
018:
019: @author (c) 2002, Fritz Ritzberger
020: */
021:
022: class Consumer implements Comparable, Serializable {
023: private List sequence = new ArrayList();
024: private List constraints;
025: private boolean nullable = false;
026: private boolean repeatable = false;
027: protected Rule rule;
028: protected int fixedLength = -1, startLength = -1, variance = -1;
029:
030: /** Empty consumer knowing its rule. This could become a Lexer toplevel consumer. */
031: Consumer(Rule rule) {
032: this .rule = rule;
033: }
034:
035: /** Consumer with some start character or fixed string, without rule. */
036: Consumer(String charOrString) {
037: append(charOrString);
038: }
039:
040: /** Internal constructor needed in LexerBuilder. */
041: protected Consumer() {
042: }
043:
044: /** Append a String or character to sequence. */
045: public void append(String charOrString) {
046: Object o = sequence.size() > 0 ? sequence
047: .get(sequence.size() - 1) : null; // check preceding
048: if (o instanceof String) { // has a string preceding
049: String s = (String) o;
050: s = s + charOrString; // append to it
051: sequence.set(sequence.size() - 1, s);
052: } else {
053: sequence.add(charOrString);
054: }
055: }
056:
057: /** Add a character consumer reference. */
058: public void append(Reference subConsumer) {
059: sequence.add(subConsumer);
060: }
061:
062: /** Add a character consumer. */
063: public void append(Consumer subConsumer) {
064: sequence.add(subConsumer);
065: }
066:
067: /** Add a character set by its high character. Low character is previously appended one. */
068: public void appendSet(String high) throws LexerException {
069: String low = (String) sequence.get(sequence.size() - 1); // throws ClassCastException if not String
070: if (low.length() > 1) { // low character was appended to previous string
071: int i = low.length() - 1;
072: sequence.set(sequence.size() - 1, low.substring(0, i)); // cut last character
073: sequence.add(new CharacterSet("" + low.charAt(i), high)); // append set
074: } else {
075: sequence.set(sequence.size() - 1, new CharacterSet(low,
076: high));
077: }
078: }
079:
080: /** Passed Consumer will constrain every character of this consumer. */
081: public void subtract(Consumer constraint) {
082: if (constraints == null)
083: constraints = new ArrayList();
084: constraints.add(constraint);
085: }
086:
087: /** Passed reference will constrain every character of this consumer. */
088: public void subtract(Reference constraint) {
089: if (constraints == null)
090: constraints = new ArrayList();
091: constraints.add(constraint);
092: }
093:
094: /** Resolve all references to real consumers after build. */
095: public void resolveConsumerReferences(Map charConsumers,
096: Map doneList) throws LexerException {
097: if (doneList.get(this ) != null)
098: return;
099: doneList.put(this , this );
100:
101: List[] varr = new List[] { sequence, getAlternatives(),
102: constraints };
103: for (int j = 0; j < varr.length; j++) {
104: List v = varr[j];
105:
106: if (v != null) {
107: for (int i = 0; v != null && i < v.size(); i++) {
108: Object o = v.get(i);
109:
110: if (o instanceof Reference) {
111: //System.err.println("Found consumer reference "+o+" within "+rule);
112: Reference cr = (Reference) o;
113: Object cc = charConsumers.get(cr.nonterminal);
114:
115: if (cc == null)
116: throw new LexerException(
117: "Consumer-Reference not found: "
118: + cr.nonterminal);
119:
120: v.set(i, cc);
121: } else if (o instanceof Consumer) {
122: Consumer cc = (Consumer) o;
123: cc.resolveConsumerReferences(charConsumers,
124: doneList);
125: }
126: }
127: }
128: }
129: }
130:
131: public void setNullable() {
132: nullable = true;
133: }
134:
135: public boolean isNullable() {
136: return nullable;
137: }
138:
139: public void setRepeatable() {
140: repeatable = true;
141: }
142:
143: public boolean isRepeatable() {
144: return repeatable;
145: }
146:
147: /** Always returns null as this consumer holds no alternatives. */
148: public List getAlternatives() {
149: return null;
150: }
151:
152: /** Implements Comparable: Sort alternatives by precedence: - getStartVariance(), + getStartLength(), + getFixedLength(). */
153: public int compareTo(Object o) {
154: Consumer cc = (Consumer) o;
155: int i;
156: i = getStartVariance() - cc.getStartVariance(); // be as exact as possible
157: if (i != 0)
158: return i;
159: i = cc.getStartLength() - getStartLength(); // be as significant as possible
160: if (i != 0)
161: return i;
162: i = cc.getFixedLength() - getFixedLength(); // be long as possible
163: if (i != 0)
164: return i;
165: return 0;
166: }
167:
168: /** Returns the first character of this consumer, or null if starting with a set. */
169: public Character getStartCharacter() {
170: Object o = sequence.get(0);
171: //System.err.println("getStartCharacter from sequence "+o);
172: if (o instanceof Consumer)
173: return ((Consumer) o).getStartCharacter();
174: else if (o instanceof CharacterSet)
175: return null;
176: else
177: return new Character(((String) o).charAt(0));
178: }
179:
180: /** Returns the count of possible start characters. For fixed start character this is 1. */
181: public int getStartVariance() {
182: if (this .variance > 0)
183: return this .variance;
184:
185: if (getStartCharacter() != null)
186: return this .variance = 1;
187:
188: int variance;
189: Object o = sequence.get(0);
190: if (o instanceof Consumer)
191: variance = ((Consumer) o).getStartVariance();
192: else if (o instanceof CharacterSet)
193: variance = ((CharacterSet) o).getVariance();
194: else
195: throw new IllegalStateException(
196: "No fixed start character, no character set, where am i?");
197:
198: for (int i = 0; constraints != null && i < constraints.size(); i++)
199: variance -= ((Consumer) constraints.get(i))
200: .getStartVariance();
201:
202: return this .variance = variance;
203: }
204:
205: /** The fixed length ends before the first found character set (like "0..9") or repeatable or nullable consumer. */
206: public int getFixedLength() {
207: if (fixedLength >= 0) // never call toString() before all sequences have arrived
208: return fixedLength;
209: fixedLength = getSomeLength(false, new ArrayList());
210: return fixedLength;
211: }
212:
213: /** The start length ends before the first found repeatable or nullable consumer (like "chars*"), but not before a character set. */
214: public int getStartLength() {
215: if (startLength >= 0) // never call toString() before all sequences have arrived
216: return startLength;
217: List reason = new ArrayList();
218: startLength = getSomeLength(true, reason);
219: //System.err.println("found start length "+startLength+" for rule "+rule+", sequence "+sequence+", reason "+reason);
220: return startLength;
221: }
222:
223: protected int getSomeLength(boolean exploreStartLength,
224: List breakIndicator) {
225: int len = 0;
226:
227: for (int i = 0; breakIndicator.size() <= 0
228: && i < sequence.size(); i++) {
229: Object o = sequence.get(i);
230:
231: if (o instanceof Consumer) {
232: Consumer cc = (Consumer) o;
233:
234: if (cc.isNullable()) { // fixed start length ends here
235: breakIndicator.add("nullable"); // make parent terminate
236: return len;
237: } else if (cc.isRepeatable()) { // fixed start length ends here
238: len += cc.getSomeLength(exploreStartLength,
239: breakIndicator);
240: breakIndicator.add("repeatable"); // make parent terminate
241: return len;
242: }
243:
244: len += cc.getSomeLength(exploreStartLength,
245: breakIndicator);
246: } else if (o instanceof CharacterSet) {
247: if (exploreStartLength) {
248: breakIndicator.add("set"); // make parent terminate
249: return len;
250: }
251: len += 1; // would match exactly one character
252: } else { // must be String
253: len += ((String) o).length(); // matches a string of same length
254: }
255: }
256:
257: return len;
258: }
259:
260: /** Return contained consumer when having only one and no constraints. This is called immediately after construction. */
261: Consumer optimize() {
262: if (constraints == null && sequence.size() == 1
263: && sequence.get(0) instanceof Consumer) {
264: // give up this formal-only consumer, take rule to sub-consumer
265: Consumer cc = (Consumer) sequence.get(0);
266: cc.rule = rule;
267: return cc;
268: }
269: return this ;
270: }
271:
272: /**
273: Returns true if the rule of this consumer matches the passed left recursive rule.
274: E.g. passing "nonterm ::= nonterm something" would match "nonterm ::= something".
275: */
276: boolean matchesRepeatableRule(Rule rule) {
277: if (rule.rightSize() != this .rule.rightSize() + 1)
278: return false;
279: for (int i = 0; i < this .rule.rightSize(); i++)
280: if (this .rule.getRightSymbol(i).equals(
281: rule.getRightSymbol(i + 1)) == false)
282: return false;
283: return true;
284: }
285:
286: // consume methods
287:
288: /** Passes the factory for Strategy objects to all contained ConsumerAlternatives. */
289: public void setStrategyFactoryMethod(
290: StrategyFactoryMethod strategyFactoryMethod) {
291: for (int i = 0; i < sequence.size(); i++) {
292: Object c = sequence.get(i);
293: if (c instanceof Consumer) {
294: ((Consumer) c)
295: .setStrategyFactoryMethod(strategyFactoryMethod);
296: }
297: }
298: }
299:
300: /**
301: Reads from input. Returns null if input did not match, else a result tree containing the text.
302: Ensures that a nullable consumer never returns null and a repeatable consumer repeats.
303: @param input Input object where to read from.
304: @return null if no match, else scanned input as a result tree.
305: */
306: public ResultTree consume(InputText input) throws IOException {
307: // prepare scanning for results
308: ResultTree result = isRepeatable() ? ensureResultTree(null)
309: : null; // prepare a list container when repeatable
310: ResultTree r;
311: Token.Address start = new Token.Address(input.getScanLine(),
312: input.getScanColumn(), input.getScanOffset());
313:
314: // consume input and optionally loop when having a repeatable rule
315: do {
316: r = consumeInternal(input);
317: if (r != null && r.hasText())
318: result = (result == null) ? r : result.addChild(r);
319: } while (r != null && isRepeatable());
320:
321: // check the result tree element
322: if (result != null && isRepeatable()
323: && result.getChildCount() <= 0)
324: result = null;
325:
326: if (result == null && isNullable()) // having read no input should not break loop of caller
327: result = ensureResultTree(null); // return empty result element
328:
329: // add range when there was a result
330: if (result != null) {
331: Token.Address end = new Token.Address(input.getScanLine(),
332: input.getScanColumn(), input.getScanOffset());
333: result.setRange(new Token.Range(start, end));
334: }
335:
336: return result;
337: }
338:
339: /**
340: Consumes characters from Input and stores it into returned result tree.
341: Returns null if nothing has been consumed.
342: */
343: protected ResultTree consumeInternal(InputText input)
344: throws IOException {
345: ResultTree result = null;
346: boolean ok = true;
347: int mark = input.getMark();
348: Token.Address start = new Token.Address(input.getScanLine(),
349: input.getScanColumn(), input.getScanOffset());
350:
351: // all members of this sequence must match
352: for (int i = 0; ok && i < sequence.size(); i++) {
353:
354: // first check constraint consumers negatively
355: for (int j = 0; ok && constraints != null
356: && j < constraints.size(); j++) {
357: Consumer cc = (Consumer) constraints.get(j);
358:
359: int mark1 = input.getMark(); // set mark as constraint-consumer is no permitted consumer
360: ok = (cc.consumeInternal(input) == null); // ok when constraint failed to read
361: input.setMark(mark1); // reset to mark
362: }
363:
364: if (ok) { // no constraint succeeded, now check positively
365: Object o = sequence.get(i);
366:
367: if (o instanceof Consumer) { // match next consumer
368: Consumer cc = (Consumer) o;
369: ResultTree r = cc.consume(input);
370:
371: if (r == null)
372: ok = false;
373: else
374: result = ensureResultTree(result).addChild(r);
375: } else if (o instanceof CharacterSet) { // match character set
376: CharacterSet charSet = (CharacterSet) o;
377: int ic = input.read();
378: char c = (char) ic;
379: if (ic == Input.EOF || charSet.includes(c) == false)
380: ok = false;
381: else
382: result = ensureResultTree(result).append(c);
383: } else { // match a literal terminal String
384: String s = (String) o;
385: int j = 0;
386:
387: do {
388: int ic = input.read();
389: ok = (ic != Input.EOF && ((char) ic) == s
390: .charAt(j));
391: j++;
392: } while (ok && j < s.length());
393:
394: if (ok)
395: result = ensureResultTree(result).append(s);
396:
397: } // end possible sequence objects
398:
399: } // end if ok (not constrained)
400: } // end for all sequences
401:
402: if (ok && result != null && result.hasText()) { // sequence did match and read non-null text
403: Token.Address end = new Token.Address(input.getScanLine(),
404: input.getScanColumn(), input.getScanOffset());
405: result.setRange(new Token.Range(start, end));
406: //System.err.println("Consumer success, free memory: "+Runtime.getRuntime().freeMemory()+", "+rule);
407: return result; // null-text will be considered by consume() that knows about nullable rules
408: }
409:
410: input.setMark(mark); // start again from initial mark
411:
412: //System.err.println("Consumer failed: "+rule);
413: return null; // there was no match
414: }
415:
416: private ResultTree ensureResultTree(ResultTree result) {
417: if (result == null)
418: result = new ResultTree(rule);
419: return result;
420: }
421:
422: /** String representation of consumer: hashcode, sequence and constraints. */
423: public String toString() {
424: return hashCode()
425: + "("
426: + getStartVariance()
427: + ","
428: + getStartLength()
429: + ","
430: + getFixedLength()
431: + ")> "
432: + toStringBase()
433: + (isNullable() && isRepeatable() ? " *"
434: : isNullable() ? " ?" : isRepeatable() ? " +"
435: : "");
436: }
437:
438: /** Returns the base string for toString() method. */
439: protected String toStringBase() {
440: StringBuffer sb = new StringBuffer();
441: listToString(sequence, sb, " ", false);
442: listToString(constraints, sb, " - ", true);
443: return sb.toString();
444: }
445:
446: /** Converts a list into String, for toString() method. */
447: protected void listToString(List list, StringBuffer sb,
448: String separator, boolean separatorAtFirst) {
449: for (int i = 0; list != null && i < list.size(); i++) {
450: Object o = list.get(i);
451:
452: if (separatorAtFirst || i > 0)
453: sb.append(separator);
454:
455: if (o instanceof Consumer)
456: sb.append("[" + o.hashCode() + "]");
457: else
458: // String or CharacterSet
459: sb.append(o.toString());
460: }
461: }
462:
463: /**
464: Returns true if the passed consumer could be concurrent with this.
465: This method does not consider constraints, as these could be complex.
466: */
467: public boolean overlaps(Consumer cc) {
468: Object o1 = sequence.get(0);
469:
470: if (o1 instanceof Consumer) { // drill down
471: return ((Consumer) o1).overlaps(cc);
472: } else { // primitive found
473: if (cc.sequence.size() <= 0) // must be ConsumerAlternatives
474: return cc.overlaps(this );
475:
476: Object o2 = cc.sequence.get(0);
477:
478: if (o2 instanceof Consumer) { // drill down
479: return ((Consumer) o2).overlaps(this );
480: } else if (o1 instanceof CharacterSet) {
481: if (o2 instanceof CharacterSet)
482: return ((CharacterSet) o1)
483: .overlaps((CharacterSet) o2);
484: else if (o2 instanceof String)
485: return ((CharacterSet) o1).includes(((String) o2)
486: .charAt(0));
487: } else if (o2 instanceof CharacterSet) {
488: if (o1 instanceof String)
489: return ((CharacterSet) o2).includes(((String) o1)
490: .charAt(0));
491: }
492:
493: String seq1 = (String) o1;
494: String seq2 = (String) o2;
495: return seq1.charAt(0) == seq2.charAt(0);
496: }
497: }
498:
499: // helper classes
500:
501: /** Marker class for references that must be resolved after building consumers. */
502: public static class Reference {
503: String nonterminal;
504:
505: Reference(String nonterminal) {
506: this .nonterminal = nonterminal;
507: }
508:
509: public String toString() {
510: return nonterminal;
511: }
512: }
513:
514: private static class CharacterSet implements Serializable {
515: private String stringRepres;
516: private char firstChar, lastChar;
517:
518: CharacterSet(String first, String last) throws LexerException {
519: this .firstChar = first.charAt(0);
520: this .lastChar = last.charAt(0);
521:
522: if (firstChar >= lastChar)
523: throw new LexerException(
524: "First character is bigger equal last: "
525: + toString());
526: }
527:
528: public char getFirstChar() {
529: return firstChar;
530: }
531:
532: public char getLastChar() {
533: return lastChar;
534: }
535:
536: /** Returns the number of contained characters. */
537: public int getVariance() {
538: return lastChar - firstChar;
539: }
540:
541: /** Returns true if passed character is contained in this set. */
542: public boolean includes(char c) {
543: return c >= firstChar && c <= lastChar;
544: }
545:
546: /** Returns true if passed character set contains characters contained in this set. */
547: public boolean overlaps(CharacterSet other) {
548: return other.includes(firstChar)
549: || other.includes(lastChar)
550: || includes(other.firstChar)
551: || includes(other.lastChar);
552: }
553:
554: public String toString() {
555: if (stringRepres == null) {
556: if (Character.isISOControl(firstChar)
557: || Character.isISOControl(lastChar))
558: this .stringRepres = Integer.toHexString(firstChar)
559: + ".." + Integer.toHexString(lastChar);
560: else
561: this .stringRepres = firstChar + ".." + lastChar;
562: }
563: return stringRepres;
564: }
565: }
566:
567: }
|