001: /**
002: * Copyright (c) 2001, Sergey A. Samokhodkin
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without modification,
006: * are permitted provided that the following conditions are met:
007: *
008: * - Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * - Redistributions in binary form
011: * must reproduce the above copyright notice, this list of conditions and the following
012: * disclaimer in the documentation and/or other materials provided with the distribution.
013: * - Neither the name of jregex nor the names of its contributors may be used
014: * to endorse or promote products derived from this software without specific prior
015: * written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
018: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
021: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
022: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
023: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
024: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
025: * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026: *
027: * @version 1.2_01
028: */package jregex;
029:
030: class Bitset implements UnicodeConstants {
031: private static final Block[][] categoryBits = new Block[CATEGORY_COUNT][BLOCK_COUNT];
032: static {
033: for (int i = Character.MIN_VALUE; i <= Character.MAX_VALUE; i++) {
034: int cat = Character.getType((char) i);
035: int blockNo = (i >> 8) & 0xff;
036: Block b = categoryBits[cat][blockNo];
037: if (b == null)
038: categoryBits[cat][blockNo] = b = new Block();
039: //if(i>32 && i<127)System.out.println((char)i+" -> ["+cat+"]["+blockNo+"].("+i+")");
040: b.set(i & 0xff);
041: }
042: }
043:
044: private boolean positive = true;
045: private boolean isLarge = false;
046:
047: boolean[] block0; //1-byte bit set
048: private static final boolean[] emptyBlock0 = new boolean[BLOCK_SIZE];
049:
050: Block[] blocks; //2-byte bit set
051:
052: private int weight;
053:
054: final void reset() {
055: positive = true;
056: block0 = null;
057: blocks = null;
058: isLarge = false;
059: weight = 0;
060: }
061:
062: final static void unify(Bitset bs, Term term) {
063: if (bs.isLarge) {
064: term.type = Term.BITSET2;
065: term.bitset2 = Block.toBitset2(bs.blocks);
066: } else {
067: term.type = Term.BITSET;
068: term.bitset = bs.block0 == null ? emptyBlock0 : bs.block0;
069: }
070: term.inverse = !bs.positive;
071: term.weight = bs.positive ? bs.weight : MAX_WEIGHT - bs.weight;
072: }
073:
074: final void setPositive(boolean b) {
075: positive = b;
076: }
077:
078: final boolean isPositive() {
079: return positive;
080: }
081:
082: final boolean isLarge() {
083: return isLarge;
084: }
085:
086: private final void enableLargeMode() {
087: if (isLarge)
088: return;
089: Block[] blocks = new Block[BLOCK_COUNT];
090: this .blocks = blocks;
091: if (block0 != null) {
092: blocks[0] = new Block(block0);
093: }
094: isLarge = true;
095: }
096:
097: final int getWeight() {
098: return positive ? weight : MAX_WEIGHT - weight;
099: }
100:
101: final void setWordChar(boolean unicode) {
102: if (unicode) {
103: setCategory(Lu);
104: setCategory(Ll);
105: setCategory(Lt);
106: setCategory(Lo);
107: setCategory(Nd);
108: setChar('_');
109: } else {
110: setRange('a', 'z');
111: setRange('A', 'Z');
112: setRange('0', '9');
113: setChar('_');
114: }
115: }
116:
117: final void setDigit(boolean unicode) {
118: if (unicode) {
119: setCategory(Nd);
120: } else {
121: setRange('0', '9');
122: }
123: }
124:
125: final void setSpace(boolean unicode) {
126: if (unicode) {
127: setCategory(Zs);
128: setCategory(Zp);
129: setCategory(Zl);
130: } else {
131: setChar(' ');
132: setChar('\r');
133: setChar('\n');
134: setChar('\t');
135: setChar('\f');
136: }
137: }
138:
139: final void setCategory(int c) {
140: if (!isLarge)
141: enableLargeMode();
142: Block[] catBits = categoryBits[c];
143: weight += Block.add(this .blocks, catBits, 0, BLOCK_COUNT - 1,
144: false);
145: //System.out.println("["+this+"].setCategory("+c+"): weight="+weight);
146: }
147:
148: final void setChars(String chars) {
149: for (int i = chars.length() - 1; i >= 0; i--)
150: setChar(chars.charAt(i));
151: }
152:
153: final void setChar(char c) {
154: setRange(c, c);
155: }
156:
157: final void setRange(char c1, char c2) {
158: //System.out.println("["+this+"].setRange("+c1+","+c2+"):");
159: //if(c1>31 && c1<=126 && c2>31 && c2<=126) System.out.println("setRange('"+c1+"','"+c2+"'):");
160: //else System.out.println("setRange(["+Integer.toHexString(c1)+"],["+Integer.toHexString(c2)+"]):");
161: if (c2 >= 256 || isLarge) {
162: int s = 0;
163: if (!isLarge) {
164: enableLargeMode();
165: }
166: Block[] blocks = this .blocks;
167: for (int c = c1; c <= c2; c++) {
168: int i2 = (c >> 8) & 0xff;
169: int i = c & 0xff;
170: Block block = blocks[i2];
171: if (block == null) {
172: blocks[i2] = block = new Block();
173: }
174: if (block.set(i))
175: s++;
176: }
177: weight += s;
178: } else {
179: boolean[] block0 = this .block0;
180: if (block0 == null) {
181: this .block0 = block0 = new boolean[BLOCK_SIZE];
182: }
183: weight += set(block0, true, c1, c2);
184: }
185: }
186:
187: final void add(Bitset bs) {
188: add(bs, false);
189: }
190:
191: final void add(Bitset bs, boolean inverse) {
192: weight += addImpl(this , bs, !bs.positive ^ inverse);
193: }
194:
195: private final static int addImpl(Bitset bs1, Bitset bs2, boolean inv) {
196: int s = 0;
197: if (!bs1.isLarge && !bs2.isLarge && !inv) {
198: if (bs2.block0 != null) {
199: boolean[] bits = bs1.block0;
200: if (bits == null)
201: bs1.block0 = bits = new boolean[BLOCK_SIZE];
202: s += add(bits, bs2.block0, 0, BLOCK_SIZE - 1, false);
203: }
204: } else {
205: if (!bs1.isLarge)
206: bs1.enableLargeMode();
207: if (!bs2.isLarge)
208: bs2.enableLargeMode();
209: s += Block.add(bs1.blocks, bs2.blocks, 0, BLOCK_COUNT - 1,
210: inv);
211: }
212: return s;
213: }
214:
215: final void subtract(Bitset bs) {
216: subtract(bs, false);
217: }
218:
219: final void subtract(Bitset bs, boolean inverse) {
220: //System.out.println("["+this+"].subtract(["+bs+"],"+inverse+"):");
221: weight += subtractImpl(this , bs, !bs.positive ^ inverse);
222: }
223:
224: private final static int subtractImpl(Bitset bs1, Bitset bs2,
225: boolean inv) {
226: int s = 0;
227: if (!bs1.isLarge && !bs2.isLarge && !inv) {
228: boolean[] bits1, bits2;
229: if ((bits2 = bs2.block0) != null) {
230: bits1 = bs1.block0;
231: if (bits1 == null)
232: return 0;
233: s += subtract(bits1, bits2, 0, BLOCK_SIZE - 1, false);
234: }
235: } else {
236: if (!bs1.isLarge)
237: bs1.enableLargeMode();
238: if (!bs2.isLarge)
239: bs2.enableLargeMode();
240: s += Block.subtract(bs1.blocks, bs2.blocks, 0,
241: BLOCK_COUNT - 1, inv);
242: }
243: return s;
244: }
245:
246: final void intersect(Bitset bs) {
247: intersect(bs, false);
248: }
249:
250: final void intersect(Bitset bs, boolean inverse) {
251: //System.out.println("["+this+"].intersect(["+bs+"],"+inverse+"):");
252: subtract(bs, !inverse);
253: }
254:
255: static final int add(boolean[] bs1, boolean[] bs2, int from,
256: int to, boolean inv) {
257: //System.out.println("Bitset.add(boolean[],boolean[],"+inv+"):");
258: int s = 0;
259: for (int i = from; i <= to; i++) {
260: if (bs1[i])
261: continue;
262: if (!(bs2[i] ^ inv))
263: continue;
264: //System.out.println(" "+i+": value0="+value0+", value="+value);
265: s++;
266: bs1[i] = true;
267: //System.out.println(" s="+s+", bs1[i]->"+bs1[i]);
268: }
269: return s;
270: }
271:
272: static final int subtract(boolean[] bs1, boolean[] bs2, int from,
273: int to, boolean inv) {
274: //System.out.println("Bitset.subtract(boolean[],boolean[],"+inv+"):");
275: int s = 0;
276: for (int i = from; i <= to; i++) {
277: if (!bs1[i])
278: continue;
279: if (!(bs2[i] ^ inv))
280: continue;
281: s--;
282: bs1[i] = false;
283: //if(i>32 && i<127) System.out.println(" s="+s+", bs1['"+(char)i+"']->"+bs1[i]);
284: //else System.out.println(" s="+s+", bs1["+i+"]->"+bs1[i]);
285: }
286: return s;
287: }
288:
289: static final int set(boolean[] arr, boolean value, int from, int to) {
290: int s = 0;
291: for (int i = from; i <= to; i++) {
292: if (arr[i] == value)
293: continue;
294: if (value)
295: s++;
296: else
297: s--;
298: arr[i] = value;
299: }
300: return s;
301: }
302:
303: public String toString() {
304: StringBuffer sb = new StringBuffer();
305: if (!positive)
306: sb.append('^');
307:
308: if (isLarge)
309: sb.append(CharacterClass.stringValue2(Block
310: .toBitset2(blocks)));
311: else if (block0 != null)
312: sb.append(CharacterClass.stringValue0(block0));
313:
314: sb.append('(');
315: sb.append(getWeight());
316: sb.append(')');
317: return sb.toString();
318: }
319: }
320:
321: class Block implements UnicodeConstants {
322: private boolean isFull;
323: //private boolean[] bits;
324: boolean[] bits;
325: private boolean shared = false;
326:
327: Block() {
328: }
329:
330: Block(boolean[] bits) {
331: this .bits = bits;
332: shared = true;
333: }
334:
335: final boolean set(int c) {
336: //System.out.println("Block.add("+CharacterClass.stringValue2(toBitset2(targets))+","+CharacterClass.stringValue2(toBitset2(addends))+","+from*BLOCK_SIZE+","+to*BLOCK_SIZE+","+inv+"):");
337: if (isFull)
338: return false;
339: boolean[] bits = this .bits;
340: if (bits == null) {
341: this .bits = bits = new boolean[BLOCK_SIZE];
342: shared = false;
343: bits[c] = true;
344: return true;
345: }
346:
347: if (bits[c])
348: return false;
349:
350: if (shared)
351: bits = copyBits(this );
352:
353: bits[c] = true;
354: return true;
355: }
356:
357: final boolean get(int c) {
358: if (isFull)
359: return true;
360: boolean[] bits = this .bits;
361: if (bits == null) {
362: return false;
363: }
364: return bits[c];
365: }
366:
367: final static int add(Block[] targets, Block[] addends, int from,
368: int to, boolean inv) {
369: //System.out.println("Block.add("+CharacterClass.stringValue2(toBitset2(targets))+","+CharacterClass.stringValue2(toBitset2(addends))+","+from*BLOCK_SIZE+","+to*BLOCK_SIZE+","+inv+"):");
370: //System.out.println("Block.add():");
371: int s = 0;
372: for (int i = from; i <= to; i++) {
373: Block addend = addends[i];
374: //System.out.println(" "+i+": ");
375: //System.out.println(" target="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+count(target.bits,0,BLOCK_SIZE-1)+"}"));
376: //System.out.println(" addend="+(addend==null? "null": i==0? CharacterClass.stringValue0(addend.bits): "{"+count(addend.bits,0,BLOCK_SIZE-1)+"}"));
377: if (addend == null) {
378: if (!inv)
379: continue;
380: } else if (addend.isFull && inv)
381: continue;
382:
383: Block target = targets[i];
384: if (target == null)
385: targets[i] = target = new Block();
386: else if (target.isFull)
387: continue;
388:
389: s += add(target, addend, inv);
390: //System.out.println(" result="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+count(target.bits,0,BLOCK_SIZE-1)+"}"));
391: //System.out.println(" s="+s);
392: }
393: //System.out.println(" s="+s);
394: return s;
395: }
396:
397: private final static int add(Block target, Block addend, boolean inv) {
398: //System.out.println("Block.add(Block,Block):");
399: //there is provided that !target.isFull
400: boolean[] targetbits, addbits;
401: if (addend == null) {
402: if (!inv)
403: return 0;
404: int s = BLOCK_SIZE;
405: if ((targetbits = target.bits) != null) {
406: s -= count(targetbits, 0, BLOCK_SIZE - 1);
407: }
408: target.isFull = true;
409: target.bits = null;
410: target.shared = false;
411: return s;
412: } else if (addend.isFull) {
413: if (inv)
414: return 0;
415: int s = BLOCK_SIZE;
416: if ((targetbits = target.bits) != null) {
417: s -= count(targetbits, 0, BLOCK_SIZE - 1);
418: }
419: target.isFull = true;
420: target.bits = null;
421: target.shared = false;
422: return s;
423: } else if ((addbits = addend.bits) == null) {
424: if (!inv)
425: return 0;
426: int s = BLOCK_SIZE;
427: if ((targetbits = target.bits) != null) {
428: s -= count(targetbits, 0, BLOCK_SIZE - 1);
429: }
430: target.isFull = true;
431: target.bits = null;
432: target.shared = false;
433: return s;
434: } else {
435: if ((targetbits = target.bits) == null) {
436: if (!inv) {
437: target.bits = addbits;
438: target.shared = true;
439: return count(addbits, 0, BLOCK_SIZE - 1);
440: } else {
441: target.bits = targetbits = emptyBits(null);
442: target.shared = false;
443: return Bitset.add(targetbits, addbits, 0,
444: BLOCK_SIZE - 1, inv);
445: }
446: } else {
447: if (target.shared)
448: targetbits = copyBits(target);
449: return Bitset.add(targetbits, addbits, 0,
450: BLOCK_SIZE - 1, inv);
451: }
452: }
453: }
454:
455: final static int subtract(Block[] targets, Block[] subtrahends,
456: int from, int to, boolean inv) {
457: //System.out.println("Block.subtract(Block[],Block[],"+inv+"):");
458: int s = 0;
459: for (int i = from; i <= to; i++) {
460: //System.out.println(" "+i+": ");
461:
462: Block target = targets[i];
463: if (target == null
464: || (!target.isFull && target.bits == null))
465: continue;
466: //System.out.println(" target="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+ (target.isFull? BLOCK_SIZE: count(target.bits,0,BLOCK_SIZE-1))+"}"));
467:
468: Block subtrahend = subtrahends[i];
469: //System.out.println(" subtrahend="+(subtrahend==null? "null": i==0? CharacterClass.stringValue0(subtrahend.bits): "{"+(subtrahend.isFull? BLOCK_SIZE: count(subtrahend.bits,0,BLOCK_SIZE-1))+"}"));
470:
471: if (subtrahend == null) {
472: if (!inv)
473: continue;
474: else {
475: if (target.isFull) {
476: s -= BLOCK_SIZE;
477: } else {
478: s -= count(target.bits, 0, BLOCK_SIZE - 1);
479: }
480: target.isFull = false;
481: target.bits = null;
482: target.shared = false;
483: }
484: } else {
485: s += subtract(target, subtrahend, inv);
486: }
487: //System.out.println(" result="+(target==null? "null": i==0? CharacterClass.stringValue0(target.bits): "{"+ (target.isFull? BLOCK_SIZE: target.bits==null? 0: count(target.bits,0,BLOCK_SIZE-1))+"}"));
488: //System.out.println(" s="+s);
489: }
490: //System.out.println(" s="+s);
491: return s;
492: }
493:
494: private final static int subtract(Block target, Block subtrahend,
495: boolean inv) {
496: boolean[] targetbits, subbits;
497: //System.out.println("subtract(Block,Block,"+inv+")");
498: //there is provided that target.isFull or target.bits!=null
499: if (subtrahend.isFull) {
500: if (inv)
501: return 0;
502: int s = 0;
503: if (target.isFull) {
504: s = BLOCK_SIZE;
505: } else {
506: s = count(target.bits, 0, BLOCK_SIZE - 1);
507: }
508: target.isFull = false;
509: target.bits = null;
510: target.shared = false;
511: return s;
512: } else if ((subbits = subtrahend.bits) == null) {
513: if (!inv)
514: return 0;
515: int s = 0;
516: if (target.isFull) {
517: s = BLOCK_SIZE;
518: } else {
519: s = count(target.bits, 0, BLOCK_SIZE - 1);
520: }
521: target.isFull = false;
522: target.bits = null;
523: target.shared = false;
524: return s;
525: } else {
526: if (target.isFull) {
527: boolean[] bits = fullBits(target.bits);
528: int s = Bitset.subtract(bits, subbits, 0,
529: BLOCK_SIZE - 1, inv);
530: target.isFull = false;
531: target.shared = false;
532: target.bits = bits;
533: return s;
534: } else {
535: if (target.shared)
536: targetbits = copyBits(target);
537: else
538: targetbits = target.bits;
539: return Bitset.subtract(targetbits, subbits, 0,
540: BLOCK_SIZE - 1, inv);
541: }
542: }
543: }
544:
545: private static boolean[] copyBits(Block block) {
546: boolean[] bits = new boolean[BLOCK_SIZE];
547: System.arraycopy(block.bits, 0, bits, 0, BLOCK_SIZE);
548: block.bits = bits;
549: block.shared = false;
550: return bits;
551: }
552:
553: private static boolean[] fullBits(boolean[] bits) {
554: if (bits == null)
555: bits = new boolean[BLOCK_SIZE];
556: System.arraycopy(FULL_BITS, 0, bits, 0, BLOCK_SIZE);
557: return bits;
558: }
559:
560: private static boolean[] emptyBits(boolean[] bits) {
561: if (bits == null)
562: bits = new boolean[BLOCK_SIZE];
563: else
564: System.arraycopy(EMPTY_BITS, 0, bits, 0, BLOCK_SIZE);
565: return bits;
566: }
567:
568: final static int count(boolean[] arr, int from, int to) {
569: int s = 0;
570: for (int i = from; i <= to; i++) {
571: if (arr[i])
572: s++;
573: }
574: return s;
575: }
576:
577: final static boolean[][] toBitset2(Block[] blocks) {
578: int len = blocks.length;
579: boolean[][] result = new boolean[len][];
580: for (int i = 0; i < len; i++) {
581: Block block = blocks[i];
582: if (block == null)
583: continue;
584: if (block.isFull) {
585: result[i] = FULL_BITS;
586: } else
587: result[i] = block.bits;
588: }
589: return result;
590: }
591:
592: private final static boolean[] EMPTY_BITS = new boolean[BLOCK_SIZE];
593: private final static boolean[] FULL_BITS = new boolean[BLOCK_SIZE];
594: static {
595: for (int i = 0; i < BLOCK_SIZE; i++)
596: FULL_BITS[i] = true;
597: }
598: }
|