001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.misc;
029:
030: import org.antlr.analysis.Label;
031: import org.antlr.tool.Grammar;
032:
033: import java.util.*;
034:
035: /** A set of integers that relies on ranges being common to do
036: * "run-length-encoded" like compression (if you view an IntSet like
037: * a BitSet with runs of 0s and 1s). Only ranges are recorded so that
038: * a few ints up near value 1000 don't cause massive bitsets, just two
039: * integer intervals.
040: *
041: * element values may be negative. Useful for sets of EPSILON and EOF.
042: *
043: * 0..9 char range is index pair ['\u0030','\u0039'].
044: * Multiple ranges are encoded with multiple index pairs. Isolated
045: * elements are encoded with an index pair where both intervals are the same.
046: *
047: * The ranges are ordered and disjoint so that 2..6 appears before 101..103.
048: */
049: public class IntervalSet implements IntSet {
050: /** The list of sorted, disjoint intervals. */
051: protected List intervals;
052:
053: /** Create a set with no elements */
054: public IntervalSet() {
055: intervals = new ArrayList(2); // most sets are 1 or 2 elements
056: }
057:
058: /** Create a set with a single element, el. */
059: public static IntervalSet of(int a) {
060: IntervalSet s = new IntervalSet();
061: s.add(a);
062: return s;
063: }
064:
065: /** Create a set with all ints within range [a..b] (inclusive) */
066: public static IntervalSet of(int a, int b) {
067: IntervalSet s = new IntervalSet();
068: s.add(a, b);
069: return s;
070: }
071:
072: /** Add a single element to the set. An isolated element is stored
073: * as a range el..el.
074: */
075: public void add(int el) {
076: add(el, el);
077: }
078:
079: /** Add interval; i.e., add all integers from a to b to set.
080: * If b<a, do nothing.
081: * Keep list in sorted order (by left range value).
082: * If overlap, combine ranges. For example,
083: * If this is {1..5, 10..20}, adding 6..7 yields
084: * {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}.
085: */
086: public void add(int a, int b) {
087: add(Interval.create(a, b));
088: }
089:
090: protected void add(Interval addition) {
091: //System.out.println("add "+addition+" to "+intervals.toString());
092: if (addition.b < addition.a) {
093: return;
094: }
095: // find position in list
096: // Use iterators as we modify list in place
097: for (ListIterator iter = intervals.listIterator(); iter
098: .hasNext();) {
099: Interval r = (Interval) iter.next();
100: if (addition.equals(r)) {
101: return;
102: }
103: if (addition.adjacent(r) || !addition.disjoint(r)) {
104: // next to each other, make a single larger interval
105: Interval bigger = addition.union(r);
106: iter.set(bigger);
107: // make sure we didn't just create an interval that
108: // should be merged with next interval in list
109: if (iter.hasNext()) {
110: Interval next = (Interval) iter.next();
111: if (bigger.adjacent(next) || !bigger.disjoint(next)) {
112: // if we bump up against or overlap next, merge
113: iter.remove(); // remove this one
114: iter.previous(); // move backwards to what we just set
115: iter.set(bigger.union(next)); // set to 3 merged ones
116: }
117: }
118: return;
119: }
120: if (addition.startsBeforeDisjoint(r)) {
121: // insert before r
122: iter.previous();
123: iter.add(addition);
124: return;
125: }
126: // if disjoint and after r, a future iteration will handle it
127: }
128: // ok, must be after last interval (and disjoint from last interval)
129: // just add it
130: intervals.add(addition);
131: }
132:
133: /*
134: protected void add(Interval addition) {
135: //System.out.println("add "+addition+" to "+intervals.toString());
136: if ( addition.b<addition.a ) {
137: return;
138: }
139: // find position in list
140: //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
141: int n = intervals.size();
142: for (int i=0; i<n; i++) {
143: Interval r = (Interval)intervals.get(i);
144: if ( addition.equals(r) ) {
145: return;
146: }
147: if ( addition.adjacent(r) || !addition.disjoint(r) ) {
148: // next to each other, make a single larger interval
149: Interval bigger = addition.union(r);
150: intervals.set(i, bigger);
151: // make sure we didn't just create an interval that
152: // should be merged with next interval in list
153: if ( (i+1)<n ) {
154: i++;
155: Interval next = (Interval)intervals.get(i);
156: if ( bigger.adjacent(next)||!bigger.disjoint(next) ) {
157: // if we bump up against or overlap next, merge
158: intervals.remove(i); // remove next one
159: i--;
160: intervals.set(i, bigger.union(next)); // set to 3 merged ones
161: }
162: }
163: return;
164: }
165: if ( addition.startsBeforeDisjoint(r) ) {
166: // insert before r
167: intervals.add(i, addition);
168: return;
169: }
170: // if disjoint and after r, a future iteration will handle it
171: }
172: // ok, must be after last interval (and disjoint from last interval)
173: // just add it
174: intervals.add(addition);
175: }
176: */
177:
178: public void addAll(IntSet set) {
179: if (set == null) {
180: return;
181: }
182: if (!(set instanceof IntervalSet)) {
183: throw new IllegalArgumentException("can't add non IntSet ("
184: + set.getClass().getName() + ") to IntervalSet");
185: }
186: IntervalSet other = (IntervalSet) set;
187: // walk set and add each interval
188: for (Iterator iter = other.intervals.iterator(); iter.hasNext();) {
189: Interval I = (Interval) iter.next();
190: this .add(I.a, I.b);
191: }
192: }
193:
194: public IntSet complement(int minElement, int maxElement) {
195: return this .complement(IntervalSet.of(minElement, maxElement));
196: }
197:
198: /** Given the set of possible values (rather than, say UNICODE or MAXINT),
199: * return a new set containing all elements in vocabulary, but not in
200: * this. The computation is (vocabulary - this).
201: *
202: * 'this' is assumed to be either a subset or equal to vocabulary.
203: */
204: public IntSet complement(IntSet vocabulary) {
205: if (vocabulary == null) {
206: return null; // nothing in common with null set
207: }
208: if (!(vocabulary instanceof IntervalSet)) {
209: throw new IllegalArgumentException(
210: "can't complement with non IntervalSet ("
211: + vocabulary.getClass().getName() + ")");
212: }
213: IntervalSet vocabularyIS = ((IntervalSet) vocabulary);
214: int maxElement = vocabularyIS.getMaxElement();
215:
216: IntervalSet compl = new IntervalSet();
217: if (intervals.size() == 0) {
218: return compl;
219: }
220: Interval first = (Interval) intervals.get(0);
221: // add a range from 0 to first.a constrained to vocab
222: if (first.a > 0) {
223: IntervalSet s = IntervalSet.of(0, first.a - 1);
224: IntervalSet a = (IntervalSet) s.and(vocabularyIS);
225: compl.addAll(a);
226: }
227: for (int i = 1; i < intervals.size(); i++) { // from 2nd interval .. nth
228: Interval previous = (Interval) intervals.get(i - 1);
229: Interval current = (Interval) intervals.get(i);
230: IntervalSet s = IntervalSet.of(previous.b + 1,
231: current.a - 1);
232: IntervalSet a = (IntervalSet) s.and(vocabularyIS);
233: compl.addAll(a);
234: }
235: Interval last = (Interval) intervals.get(intervals.size() - 1);
236: // add a range from last.b to maxElement constrained to vocab
237: if (last.b < maxElement) {
238: IntervalSet s = IntervalSet.of(last.b + 1, maxElement);
239: IntervalSet a = (IntervalSet) s.and(vocabularyIS);
240: compl.addAll(a);
241: }
242: return compl;
243: }
244:
245: /** Compute this-other via this&~other.
246: * Return a new set containing all elements in this but not in other.
247: * other is assumed to be a subset of this;
248: * anything that is in other but not in this will be ignored.
249: */
250: public IntSet subtract(IntSet other) {
251: // assume the whole unicode range here for the complement
252: // because it doesn't matter. Anything beyond the max of this' set
253: // will be ignored since we are doing this & ~other. The intersection
254: // will be empty. The only problem would be when this' set max value
255: // goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
256: // will prevent this.
257: return this .and(((IntervalSet) other).complement(0,
258: Label.MAX_CHAR_VALUE));
259: }
260:
261: /** return a new set containing all elements in this but not in other.
262: * Intervals may have to be broken up when ranges in this overlap
263: * with ranges in other. other is assumed to be a subset of this;
264: * anything that is in other but not in this will be ignored.
265: *
266: * Keep around, but 10-20-2005, I decided to make complement work w/o
267: * subtract and so then subtract can simply be a&~b
268: *
269: public IntSet subtract(IntSet other) {
270: if ( other==null || !(other instanceof IntervalSet) ) {
271: return null; // nothing in common with null set
272: }
273:
274: IntervalSet diff = new IntervalSet();
275:
276: // iterate down both interval lists
277: ListIterator thisIter = this.intervals.listIterator();
278: ListIterator otherIter = ((IntervalSet)other).intervals.listIterator();
279: Interval mine=null;
280: Interval theirs=null;
281: if ( thisIter.hasNext() ) {
282: mine = (Interval)thisIter.next();
283: }
284: if ( otherIter.hasNext() ) {
285: theirs = (Interval)otherIter.next();
286: }
287: while ( mine!=null ) {
288: //System.out.println("mine="+mine+", theirs="+theirs);
289: // CASE 1: nothing in theirs removes a chunk from mine
290: if ( theirs==null || mine.disjoint(theirs) ) {
291: // SUBCASE 1a: finished traversing theirs; keep adding mine now
292: if ( theirs==null ) {
293: // add everything in mine to difference since theirs done
294: diff.add(mine);
295: mine = null;
296: if ( thisIter.hasNext() ) {
297: mine = (Interval)thisIter.next();
298: }
299: }
300: else {
301: // SUBCASE 1b: mine is completely to the left of theirs
302: // so we can add to difference; move mine, but not theirs
303: if ( mine.startsBeforeDisjoint(theirs) ) {
304: diff.add(mine);
305: mine = null;
306: if ( thisIter.hasNext() ) {
307: mine = (Interval)thisIter.next();
308: }
309: }
310: // SUBCASE 1c: theirs is completely to the left of mine
311: else {
312: // keep looking in theirs
313: theirs = null;
314: if ( otherIter.hasNext() ) {
315: theirs = (Interval)otherIter.next();
316: }
317: }
318: }
319: }
320: else {
321: // CASE 2: theirs breaks mine into two chunks
322: if ( mine.properlyContains(theirs) ) {
323: // must add two intervals: stuff to left and stuff to right
324: diff.add(mine.a, theirs.a-1);
325: // don't actually add stuff to right yet as next 'theirs'
326: // might overlap with it
327: // The stuff to the right might overlap with next "theirs".
328: // so it is considered next
329: Interval right = new Interval(theirs.b+1, mine.b);
330: mine = right;
331: // move theirs forward
332: theirs = null;
333: if ( otherIter.hasNext() ) {
334: theirs = (Interval)otherIter.next();
335: }
336: }
337:
338: // CASE 3: theirs covers mine; nothing to add to diff
339: else if ( theirs.properlyContains(mine) ) {
340: // nothing to add, theirs forces removal totally of mine
341: // just move mine looking for an overlapping interval
342: mine = null;
343: if ( thisIter.hasNext() ) {
344: mine = (Interval)thisIter.next();
345: }
346: }
347:
348: // CASE 4: non proper overlap
349: else {
350: // overlap, but not properly contained
351: diff.add(mine.differenceNotProperlyContained(theirs));
352: // update iterators
353: boolean moveTheirs = true;
354: if ( mine.startsBeforeNonDisjoint(theirs) ||
355: theirs.b > mine.b )
356: {
357: // uh oh, right of theirs extends past right of mine
358: // therefore could overlap with next of mine so don't
359: // move theirs iterator yet
360: moveTheirs = false;
361: }
362: // always move mine
363: mine = null;
364: if ( thisIter.hasNext() ) {
365: mine = (Interval)thisIter.next();
366: }
367: if ( moveTheirs ) {
368: theirs = null;
369: if ( otherIter.hasNext() ) {
370: theirs = (Interval)otherIter.next();
371: }
372: }
373: }
374: }
375: }
376: return diff;
377: }
378: */
379:
380: /** TODO: implement this! */
381: public IntSet or(IntSet a) {
382: throw new NoSuchMethodError();
383: }
384:
385: /** Return a new set with the intersection of this set with other. Because
386: * the intervals are sorted, we can use an iterator for each list and
387: * just walk them together. This is roughly O(min(n,m)) for interval
388: * list lengths n and m.
389: */
390: public IntSet and(IntSet other) {
391: if (other == null) { //|| !(other instanceof IntervalSet) ) {
392: return null; // nothing in common with null set
393: }
394:
395: ArrayList myIntervals = (ArrayList) this .intervals;
396: ArrayList theirIntervals = (ArrayList) ((IntervalSet) other).intervals;
397: IntervalSet intersection = null;
398: int mySize = myIntervals.size();
399: int theirSize = theirIntervals.size();
400: int i = 0;
401: int j = 0;
402: // iterate down both interval lists looking for nondisjoint intervals
403: while (i < mySize && j < theirSize) {
404: Interval mine = (Interval) myIntervals.get(i);
405: Interval theirs = (Interval) theirIntervals.get(j);
406: //System.out.println("mine="+mine+" and theirs="+theirs);
407: if (mine.startsBeforeDisjoint(theirs)) {
408: // move this iterator looking for interval that might overlap
409: i++;
410: } else if (theirs.startsBeforeDisjoint(mine)) {
411: // move other iterator looking for interval that might overlap
412: j++;
413: } else if (mine.properlyContains(theirs)) {
414: // overlap, add intersection, get next theirs
415: if (intersection == null) {
416: intersection = new IntervalSet();
417: }
418: intersection.add(mine.intersection(theirs));
419: j++;
420: } else if (theirs.properlyContains(mine)) {
421: // overlap, add intersection, get next mine
422: if (intersection == null) {
423: intersection = new IntervalSet();
424: }
425: intersection.add(mine.intersection(theirs));
426: i++;
427: } else if (!mine.disjoint(theirs)) {
428: // overlap, add intersection
429: if (intersection == null) {
430: intersection = new IntervalSet();
431: }
432: intersection.add(mine.intersection(theirs));
433: // Move the iterator of lower range [a..b], but not
434: // the upper range as it may contain elements that will collide
435: // with the next iterator. So, if mine=[0..115] and
436: // theirs=[115..200], then intersection is 115 and move mine
437: // but not theirs as theirs may collide with the next range
438: // in thisIter.
439: // move both iterators to next ranges
440: if (mine.startsAfterNonDisjoint(theirs)) {
441: j++;
442: } else if (theirs.startsAfterNonDisjoint(mine)) {
443: i++;
444: }
445: }
446: }
447: if (intersection == null) {
448: return new IntervalSet();
449: }
450: return intersection;
451: }
452:
453: /** Is el in any range of this set? */
454: public boolean member(int el) {
455: for (ListIterator iter = intervals.listIterator(); iter
456: .hasNext();) {
457: Interval I = (Interval) iter.next();
458: if (el < I.a) {
459: break; // list is sorted and el is before this interval; not here
460: }
461: if (el >= I.a && el <= I.b) {
462: return true; // found in this interval
463: }
464: }
465: return false;
466: }
467:
468: /** return true if this set has no members */
469: public boolean isNil() {
470: return intervals == null || intervals.size() == 0;
471: }
472:
473: /** If this set is a single integer, return it otherwise Label.INVALID */
474: public int getSingleElement() {
475: if (intervals != null && intervals.size() == 1) {
476: Interval I = (Interval) intervals.get(0);
477: if (I.a == I.b) {
478: return I.a;
479: }
480: }
481: return Label.INVALID;
482: }
483:
484: public int getMaxElement() {
485: if (isNil()) {
486: return Label.INVALID;
487: }
488: Interval last = (Interval) intervals.get(intervals.size() - 1);
489: return last.b;
490: }
491:
492: /** Return minimum element >= 0 */
493: public int getMinElement() {
494: if (isNil()) {
495: return Label.INVALID;
496: }
497: Iterator iter = this .intervals.iterator();
498: while (iter.hasNext()) {
499: Interval I = (Interval) iter.next();
500: int a = I.a;
501: int b = I.b;
502: for (int v = a; v <= b; v++) {
503: if (v >= 0)
504: return v;
505: }
506: }
507: return Label.INVALID;
508: }
509:
510: /** Return a list of Interval objects. */
511: public List getIntervals() {
512: return intervals;
513: }
514:
515: /** Are two IntervalSets equal? Because all intervals are sorted
516: * and disjoint, equals is a simple linear walk over both lists
517: * to make sure they are the same. Interval.equals() is used
518: * by the List.equals() method to check the ranges.
519: */
520: public boolean equals(Object obj) {
521: if (obj == null || !(obj instanceof IntervalSet)) {
522: return false;
523: }
524: IntervalSet other = (IntervalSet) obj;
525: return this .intervals.equals(other.intervals);
526: }
527:
528: public String toString() {
529: return toString(null);
530: }
531:
532: public String toString(Grammar g) {
533: StringBuffer buf = new StringBuffer();
534: if (this .intervals == null || this .intervals.size() == 0) {
535: return "{}";
536: }
537: if (this .intervals.size() > 1) {
538: buf.append("{");
539: }
540: Iterator iter = this .intervals.iterator();
541: while (iter.hasNext()) {
542: Interval I = (Interval) iter.next();
543: int a = I.a;
544: int b = I.b;
545: if (a == b) {
546: if (g != null) {
547: buf.append(g.getTokenDisplayName(a));
548: } else {
549: buf.append(a);
550: }
551: } else {
552: if (g != null) {
553: buf.append(g.getTokenDisplayName(a) + ".."
554: + g.getTokenDisplayName(b));
555: } else {
556: buf.append(a + ".." + b);
557: }
558: }
559: if (iter.hasNext()) {
560: buf.append(", ");
561: }
562: }
563: if (this .intervals.size() > 1) {
564: buf.append("}");
565: }
566: return buf.toString();
567: }
568:
569: public int size() {
570: int n = 0;
571: Iterator iter = this .intervals.iterator();
572: while (iter.hasNext()) {
573: Interval I = (Interval) iter.next();
574: int a = I.a;
575: int b = I.b;
576: n += (b - a + 1);
577: }
578: return n;
579: }
580:
581: public List toList() {
582: List values = new ArrayList();
583: Iterator iter = this .intervals.iterator();
584: while (iter.hasNext()) {
585: Interval I = (Interval) iter.next();
586: int a = I.a;
587: int b = I.b;
588: for (int v = a; v <= b; v++) {
589: values.add(Utils.integer(v));
590: }
591: }
592: return values;
593: }
594:
595: public int[] toArray() {
596: int[] values = new int[size()];
597: Iterator iter = this .intervals.iterator();
598: int i = 0;
599: while (iter.hasNext()) {
600: Interval I = (Interval) iter.next();
601: int a = I.a;
602: int b = I.b;
603: for (int v = a; v <= b; v++) {
604: values[i] = v;
605: i++;
606: }
607: }
608: return values;
609: }
610:
611: public org.antlr.runtime.BitSet toRuntimeBitSet() {
612: org.antlr.runtime.BitSet s = new org.antlr.runtime.BitSet(
613: getMaxElement() + 1);
614: Iterator iter = this .intervals.iterator();
615: int i = 0;
616: while (iter.hasNext()) {
617: Interval I = (Interval) iter.next();
618: int a = I.a;
619: int b = I.b;
620: for (int v = a; v <= b; v++) {
621: s.add(v);
622: i++;
623: }
624: }
625: return s;
626: }
627:
628: public void remove(int el) {
629: throw new NoSuchMethodError(
630: "IntervalSet.remove() unimplemented");
631: }
632:
633: /*
634: protected void finalize() throws Throwable {
635: super.finalize();
636: System.out.println("size "+intervals.size()+" "+size());
637: }
638: */
639: }
|