001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2004, International Business Machines
004: * Corporation and others. All Rights Reserved.
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.impl;
009:
010: /**
011: * @version 1.1
012: * @author Markus W. Scherer
013: * Ram: Add documentation, remove unwanted methods, improve coverage.
014: */
015:
016: /**
017: * Simple class for handling serialized USet/UnicodeSet structures
018: * without object creation. See ICU4C icu/source/common/uset.c.
019: *
020: * @internal
021: */
022: public final class USerializedSet {
023: /**
024: * Fill in the given serialized set object.
025: * @param src pointer to start of array
026: * @param srcStart pointer to start of serialized data (length value)
027: * @return true if the given array is valid, otherwise false
028: * @draft ICU 2.4
029: */
030: public final boolean getSet(char src[], int srcStart) {
031: // leave most argument checking up to Java exceptions
032: array = null;
033: arrayOffset = bmpLength = length = 0;
034:
035: length = src[srcStart++];
036:
037: if ((length & 0x8000) > 0) {
038: /* there are supplementary values */
039: length &= 0x7fff;
040: if (src.length < (srcStart + 1 + length)) {
041: length = 0;
042: throw new IndexOutOfBoundsException();
043: }
044: bmpLength = src[srcStart++];
045: } else {
046: /* only BMP values */
047: if (src.length < (srcStart + length)) {
048: length = 0;
049: throw new IndexOutOfBoundsException();
050: }
051: bmpLength = length;
052: }
053: array = new char[length];
054: System.arraycopy(src, srcStart, array, 0, length);
055: //arrayOffset=srcStart;
056: return true;
057: }
058:
059: /**
060: * Set the USerializedSet to contain the given character (and nothing
061: * else).
062: * @draft ICU 2.4
063: */
064: public final void setToOne(int c) {
065: if (0x10ffff < c) {
066: return;
067: }
068:
069: if (c < 0xffff) {
070: bmpLength = length = 2;
071: array[0] = (char) c;
072: array[1] = (char) (c + 1);
073: } else if (c == 0xffff) {
074: bmpLength = 1;
075: length = 3;
076: array[0] = 0xffff;
077: array[1] = 1;
078: array[2] = 0;
079: } else if (c < 0x10ffff) {
080: bmpLength = 0;
081: length = 4;
082: array[0] = (char) (c >> 16);
083: array[1] = (char) c;
084: ++c;
085: array[2] = (char) (c >> 16);
086: array[3] = (char) c;
087: } else /* c==0x10ffff */{
088: bmpLength = 0;
089: length = 2;
090: array[0] = 0x10;
091: array[1] = 0xffff;
092: }
093: }
094:
095: /**
096: * Returns a range of characters contained in the given serialized
097: * set.
098: * @param rangeIndex a non-negative integer in the range <code>0..
099: * getSerializedRangeCount()-1</code>
100: * @param range variable to receive the data in the range
101: * @return true if rangeIndex is valid, otherwise false
102: * @draft ICU 2.4
103: */
104: public final boolean getRange(int rangeIndex, int[] range) {
105: if (rangeIndex < 0) {
106: return false;
107: }
108: if (array == null) {
109: array = new char[8];
110: }
111: if (range == null || range.length < 2) {
112: throw new IllegalArgumentException();
113: }
114: rangeIndex *= 2; /* address start/limit pairs */
115: if (rangeIndex < bmpLength) {
116: range[0] = array[rangeIndex++];
117: if (rangeIndex < bmpLength) {
118: range[1] = array[rangeIndex];
119: } else if (rangeIndex < length) {
120: range[1] = (((int) array[rangeIndex]) << 16)
121: | array[rangeIndex + 1];
122: } else {
123: range[1] = 0x110000;
124: }
125: range[1] -= 1;
126: return true;
127: } else {
128: rangeIndex -= bmpLength;
129: rangeIndex *= 2; /* address pairs of pairs of units */
130: length -= bmpLength;
131: if (rangeIndex < length) {
132: int offset = arrayOffset + bmpLength;
133: range[0] = (((int) array[offset + rangeIndex]) << 16)
134: | array[offset + rangeIndex + 1];
135: rangeIndex += 2;
136: if (rangeIndex < length) {
137: range[1] = (((int) array[offset + rangeIndex]) << 16)
138: | array[offset + rangeIndex + 1];
139: } else {
140: range[1] = 0x110000;
141: }
142: range[1] -= 1;
143: return true;
144: } else {
145: return false;
146: }
147: }
148: }
149:
150: /**
151: * Returns true if the given USerializedSet contains the given
152: * character.
153: * @param c the character to test for
154: * @return true if set contains c
155: * @draft ICU 2.4
156: */
157: ///CLOVER:OFF
158: public final boolean contains(int c) {
159:
160: if (c > 0x10ffff) {
161: return false;
162: }
163:
164: if (c <= 0xffff) {
165: int i;
166: /* find c in the BMP part */
167: for (i = 0; i < bmpLength && (char) c >= array[i]; ++i) {
168: }
169: return (boolean) ((i & 1) != 0);
170: } else {
171: int i;
172: /* find c in the supplementary part */
173: char high = (char) (c >> 16), low = (char) c;
174: for (i = bmpLength; i < length
175: && (high > array[i] || (high == array[i] && low >= array[i + 1])); i += 2) {
176: }
177:
178: /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
179: return (boolean) (((i + bmpLength) & 2) != 0);
180: }
181: }
182:
183: ///CLOVER:ON
184: /**
185: * Returns the number of disjoint ranges of characters contained in
186: * the given serialized set. Ignores any strings contained in the
187: * set.
188: * @return a non-negative integer counting the character ranges
189: * contained in set
190: * @draft ICU 2.4
191: */
192: public final int countRanges() {
193: return (bmpLength + (length - bmpLength) / 2 + 1) / 2;
194: }
195:
196: private char array[] = new char[8];
197: private int arrayOffset, bmpLength, length;
198: }
|