001: /*
002: ******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: ******************************************************************************
006: */
007:
008: package com.ibm.icu.impl;
009:
010: import com.ibm.icu.lang.UCharacter;
011: import com.ibm.icu.text.UTF16;
012: import java.util.Arrays;
013: import java.io.OutputStream;
014: import java.io.DataOutputStream;
015: import java.io.IOException;
016:
017: /**
018: * Builder class to manipulate and generate a trie.
019: * This is useful for ICU data in primitive types.
020: * Provides a compact way to store information that is indexed by Unicode
021: * values, such as character properties, types, keyboard values, etc. This is
022: * very useful when you have a block of Unicode data that contains significant
023: * values while the rest of the Unicode data is unused in the application or
024: * when you have a lot of redundance, such as where all 21,000 Han ideographs
025: * have the same value. However, lookup is much faster than a hash table.
026: * A trie of any primitive data type serves two purposes:
027: * <UL type = round>
028: * <LI>Fast access of the indexed values.
029: * <LI>Smaller memory footprint.
030: * </UL>
031: * This is a direct port from the ICU4C version
032: * @author Syn Wee Quek
033: */
034: public class IntTrieBuilder extends TrieBuilder {
035: // public constructor ----------------------------------------------
036:
037: /**
038: * Copy constructor
039: */
040: public IntTrieBuilder(IntTrieBuilder table) {
041: super (table);
042: m_data_ = new int[m_dataCapacity_];
043: System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
044: m_initialValue_ = table.m_initialValue_;
045: m_leadUnitValue_ = table.m_leadUnitValue_;
046: }
047:
048: /**
049: * Constructs a build table
050: * @param aliasdata data to be filled into table
051: * @param maxdatalength maximum data length allowed in table
052: * @param initialvalue inital data value
053: * @param latin1linear is latin 1 to be linear
054: */
055: public IntTrieBuilder(int aliasdata[], int maxdatalength,
056: int initialvalue, int leadunitvalue, boolean latin1linear) {
057: super ();
058: if (maxdatalength < DATA_BLOCK_LENGTH
059: || (latin1linear && maxdatalength < 1024)) {
060: throw new IllegalArgumentException(
061: "Argument maxdatalength is too small");
062: }
063:
064: if (aliasdata != null) {
065: m_data_ = aliasdata;
066: } else {
067: m_data_ = new int[maxdatalength];
068: }
069:
070: // preallocate and reset the first data block (block index 0)
071: int j = DATA_BLOCK_LENGTH;
072:
073: if (latin1linear) {
074: // preallocate and reset the first block (number 0) and Latin-1
075: // (U+0000..U+00ff) after that made sure above that
076: // maxDataLength >= 1024
077: // set indexes to point to consecutive data blocks
078: int i = 0;
079: do {
080: // do this at least for trie->index[0] even if that block is
081: // only partly used for Latin-1
082: m_index_[i++] = j;
083: j += DATA_BLOCK_LENGTH;
084: } while (i < (256 >> SHIFT_));
085: }
086:
087: m_dataLength_ = j;
088: // reset the initially allocated blocks to the initial value
089: Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
090: m_initialValue_ = initialvalue;
091: m_leadUnitValue_ = leadunitvalue;
092: m_dataCapacity_ = maxdatalength;
093: m_isLatin1Linear_ = latin1linear;
094: m_isCompacted_ = false;
095: }
096:
097: // public methods -------------------------------------------------------
098:
099: /*public final void print()
100: {
101: int i = 0;
102: int oldvalue = m_index_[i];
103: int count = 0;
104: System.out.println("index length " + m_indexLength_
105: + " --------------------------");
106: while (i < m_indexLength_) {
107: if (m_index_[i] != oldvalue) {
108: System.out.println("index has " + count + " counts of "
109: + Integer.toHexString(oldvalue));
110: count = 0;
111: oldvalue = m_index_[i];
112: }
113: count ++;
114: i ++;
115: }
116: System.out.println("index has " + count + " counts of "
117: + Integer.toHexString(oldvalue));
118: i = 0;
119: oldvalue = m_data_[i];
120: count = 0;
121: System.out.println("data length " + m_dataLength_
122: + " --------------------------");
123: while (i < m_dataLength_) {
124: if (m_data_[i] != oldvalue) {
125: if ((oldvalue & 0xf1000000) == 0xf1000000) {
126: int temp = oldvalue & 0xffffff;
127: temp += 0x320;
128: oldvalue = 0xf1000000 | temp;
129: }
130: if ((oldvalue & 0xf2000000) == 0xf2000000) {
131: int temp = oldvalue & 0xffffff;
132: temp += 0x14a;
133: oldvalue = 0xf2000000 | temp;
134: }
135: System.out.println("data has " + count + " counts of "
136: + Integer.toHexString(oldvalue));
137: count = 0;
138: oldvalue = m_data_[i];
139: }
140: count ++;
141: i ++;
142: }
143: if ((oldvalue & 0xf1000000) == 0xf1000000) {
144: int temp = oldvalue & 0xffffff;
145: temp += 0x320;
146: oldvalue = 0xf1000000 | temp;
147: }
148: if ((oldvalue & 0xf2000000) == 0xf2000000) {
149: int temp = oldvalue & 0xffffff;
150: temp += 0x14a;
151: oldvalue = 0xf2000000 | temp;
152: }
153: System.out.println("data has " + count + " counts of "
154: + Integer.toHexString(oldvalue));
155: }
156: */
157: /**
158: * Gets a 32 bit data from the table data
159: * @param ch codepoint which data is to be retrieved
160: * @return the 32 bit data
161: */
162: public int getValue(int ch) {
163: // valid, uncompacted trie and valid c?
164: if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
165: return 0;
166: }
167:
168: int block = m_index_[ch >> SHIFT_];
169: return m_data_[Math.abs(block) + (ch & MASK_)];
170: }
171:
172: /**
173: * Get a 32 bit data from the table data
174: * @param ch code point for which data is to be retrieved.
175: * @param inBlockZero Output parameter, inBlockZero[0] returns true if the
176: * char maps into block zero, otherwise false.
177: * @return the 32 bit data value.
178: */
179: public int getValue(int ch, boolean[] inBlockZero) {
180: // valid, uncompacted trie and valid c?
181: if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
182: if (inBlockZero != null) {
183: inBlockZero[0] = true;
184: }
185: return 0;
186: }
187:
188: int block = m_index_[ch >> SHIFT_];
189: if (inBlockZero != null) {
190: inBlockZero[0] = (block == 0);
191: }
192: return m_data_[Math.abs(block) + (ch & MASK_)];
193: }
194:
195: /**
196: * Sets a 32 bit data in the table data
197: * @param ch codepoint which data is to be set
198: * @param value to set
199: * @return true if the set is successful, otherwise
200: * if the table has been compacted return false
201: */
202: public boolean setValue(int ch, int value) {
203: // valid, uncompacted trie and valid c?
204: if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
205: return false;
206: }
207:
208: int block = getDataBlock(ch);
209: if (block < 0) {
210: return false;
211: }
212:
213: m_data_[block + (ch & MASK_)] = value;
214: return true;
215: }
216:
217: /**
218: * Serializes the build table with 32 bit data
219: * @param datamanipulate builder raw fold method implementation
220: * @param triedatamanipulate result trie fold method
221: * @return a new trie
222: */
223: public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
224: Trie.DataManipulate triedatamanipulate) {
225: if (datamanipulate == null) {
226: throw new IllegalArgumentException(
227: "Parameters can not be null");
228: }
229: // fold and compact if necessary, also checks that indexLength is
230: // within limits
231: if (!m_isCompacted_) {
232: // compact once without overlap to improve folding
233: compact(false);
234: // fold the supplementary part of the index array
235: fold(datamanipulate);
236: // compact again with overlap for minimum data array length
237: compact(true);
238: m_isCompacted_ = true;
239: }
240: // is dataLength within limits?
241: if (m_dataLength_ >= MAX_DATA_LENGTH_) {
242: throw new ArrayIndexOutOfBoundsException(
243: "Data length too small");
244: }
245:
246: char index[] = new char[m_indexLength_];
247: int data[] = new int[m_dataLength_];
248: // write the index (stage 1) array and the 32-bit data (stage 2) array
249: // write 16-bit index values shifted right by INDEX_SHIFT_
250: for (int i = 0; i < m_indexLength_; i++) {
251: index[i] = (char) (m_index_[i] >>> INDEX_SHIFT_);
252: }
253: // write 32-bit data values
254: System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
255:
256: int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
257: options |= OPTIONS_DATA_IS_32_BIT_;
258: if (m_isLatin1Linear_) {
259: options |= OPTIONS_LATIN1_IS_LINEAR_;
260: }
261: return new IntTrie(index, data, m_initialValue_, options,
262: triedatamanipulate);
263: }
264:
265: /**
266: * Serializes the build table to an output stream.
267: *
268: * Compacts the build-time trie after all values are set, and then
269: * writes the serialized form onto an output stream.
270: *
271: * After this, this build-time Trie can only be serialized again and/or closed;
272: * no further values can be added.
273: *
274: * This function is the rough equivalent of utrie_seriaize() in ICU4C.
275: *
276: * @param os the output stream to which the seriaized trie will be written.
277: * If nul, the function still returns the size of the serialized Trie.
278: * @param reduceTo16Bits If true, reduce the data size to 16 bits. The resulting
279: * serialized form can then be used to create a CharTrie.
280: * @param datamanipulate builder raw fold method implementation
281: * @return the number of bytes written to the output stream.
282: */
283: public int serialize(OutputStream os, boolean reduceTo16Bits,
284: TrieBuilder.DataManipulate datamanipulate)
285: throws IOException {
286: if (datamanipulate == null) {
287: throw new IllegalArgumentException(
288: "Parameters can not be null");
289: }
290:
291: // fold and compact if necessary, also checks that indexLength is
292: // within limits
293: if (!m_isCompacted_) {
294: // compact once without overlap to improve folding
295: compact(false);
296: // fold the supplementary part of the index array
297: fold(datamanipulate);
298: // compact again with overlap for minimum data array length
299: compact(true);
300: m_isCompacted_ = true;
301: }
302:
303: // is dataLength within limits?
304: int length;
305: if (reduceTo16Bits) {
306: length = m_dataLength_ + m_indexLength_;
307: } else {
308: length = m_dataLength_;
309: }
310: if (length >= MAX_DATA_LENGTH_) {
311: throw new ArrayIndexOutOfBoundsException(
312: "Data length too small");
313: }
314:
315: // struct UTrieHeader {
316: // int32_t signature;
317: // int32_t options (a bit field)
318: // int32_t indexLength
319: // int32_t dataLength
320: length = Trie.HEADER_LENGTH_ + 2 * m_indexLength_;
321: if (reduceTo16Bits) {
322: length += 2 * m_dataLength_;
323: } else {
324: length += 4 * m_dataLength_;
325: }
326:
327: if (os == null) {
328: // No output stream. Just return the length of the serialized Trie, in bytes.
329: return length;
330: }
331:
332: DataOutputStream dos = new DataOutputStream(os);
333: dos.writeInt(Trie.HEADER_SIGNATURE_);
334:
335: int options = Trie.INDEX_STAGE_1_SHIFT_
336: | (Trie.INDEX_STAGE_2_SHIFT_ << Trie.HEADER_OPTIONS_INDEX_SHIFT_);
337: if (!reduceTo16Bits) {
338: options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
339: }
340: if (m_isLatin1Linear_) {
341: options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
342: }
343: dos.writeInt(options);
344:
345: dos.writeInt(m_indexLength_);
346: dos.writeInt(m_dataLength_);
347:
348: /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
349: if (reduceTo16Bits) {
350: /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
351: for (int i = 0; i < m_indexLength_; i++) {
352: int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
353: dos.writeChar(v);
354: }
355:
356: /* write 16-bit data values */
357: for (int i = 0; i < m_dataLength_; i++) {
358: int v = m_data_[i] & 0x0000ffff;
359: dos.writeChar(v);
360: }
361: } else {
362: /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
363: for (int i = 0; i < m_indexLength_; i++) {
364: int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
365: dos.writeChar(v);
366: }
367:
368: /* write 32-bit data values */
369: for (int i = 0; i < m_dataLength_; i++) {
370: dos.writeInt(m_data_[i]);
371: }
372: }
373:
374: return length;
375:
376: }
377:
378: /**
379: * Set a value in a range of code points [start..limit].
380: * All code points c with start <= c < limit will get the value if
381: * overwrite is true or if the old value is 0.
382: * @param start the first code point to get the value
383: * @param limit one past the last code point to get the value
384: * @param value the value
385: * @param overwrite flag for whether old non-initial values are to be
386: * overwritten
387: * @return false if a failure occurred (illegal argument or data array
388: * overrun)
389: */
390: public boolean setRange(int start, int limit, int value,
391: boolean overwrite) {
392: // repeat value in [start..limit[
393: // mark index values for repeat-data blocks by setting bit 31 of the
394: // index values fill around existing values if any, if(overwrite)
395:
396: // valid, uncompacted trie and valid indexes?
397: if (m_isCompacted_ || start < UCharacter.MIN_VALUE
398: || start > UCharacter.MAX_VALUE
399: || limit < UCharacter.MIN_VALUE
400: || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
401: return false;
402: }
403:
404: if (start == limit) {
405: return true; // nothing to do
406: }
407:
408: if ((start & MASK_) != 0) {
409: // set partial block at [start..following block boundary[
410: int block = getDataBlock(start);
411: if (block < 0) {
412: return false;
413: }
414:
415: int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
416: if (nextStart <= limit) {
417: fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
418: value, overwrite);
419: start = nextStart;
420: } else {
421: fillBlock(block, start & MASK_, limit & MASK_, value,
422: overwrite);
423: return true;
424: }
425: }
426:
427: // number of positions in the last, partial block
428: int rest = limit & MASK_;
429:
430: // round down limit to a block boundary
431: limit &= ~MASK_;
432:
433: // iterate over all-value blocks
434: int repeatBlock = 0;
435: if (value == m_initialValue_) {
436: // repeatBlock = 0; assigned above
437: } else {
438: repeatBlock = -1;
439: }
440: while (start < limit) {
441: // get index value
442: int block = m_index_[start >> SHIFT_];
443: if (block > 0) {
444: // already allocated, fill in value
445: fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
446: } else if (m_data_[-block] != value
447: && (block == 0 || overwrite)) {
448: // set the repeatBlock instead of the current block 0 or range
449: // block
450: if (repeatBlock >= 0) {
451: m_index_[start >> SHIFT_] = -repeatBlock;
452: } else {
453: // create and set and fill the repeatBlock
454: repeatBlock = getDataBlock(start);
455: if (repeatBlock < 0) {
456: return false;
457: }
458:
459: // set the negative block number to indicate that it is a
460: // repeat block
461: m_index_[start >> SHIFT_] = -repeatBlock;
462: fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value,
463: true);
464: }
465: }
466:
467: start += DATA_BLOCK_LENGTH;
468: }
469:
470: if (rest > 0) {
471: // set partial block at [last block boundary..limit[
472: int block = getDataBlock(start);
473: if (block < 0) {
474: return false;
475: }
476: fillBlock(block, 0, rest, value, overwrite);
477: }
478:
479: return true;
480: }
481:
482: // protected data member ------------------------------------------------
483:
484: protected int m_data_[];
485: protected int m_initialValue_;
486:
487: // private data member ------------------------------------------------
488:
489: private int m_leadUnitValue_;
490:
491: // private methods ------------------------------------------------------
492:
493: private int allocDataBlock() {
494: int newBlock = m_dataLength_;
495: int newTop = newBlock + DATA_BLOCK_LENGTH;
496: if (newTop > m_dataCapacity_) {
497: // out of memory in the data array
498: return -1;
499: }
500: m_dataLength_ = newTop;
501: return newBlock;
502: }
503:
504: /**
505: * No error checking for illegal arguments.
506: * @param ch codepoint to look for
507: * @return -1 if no new data block available (out of memory in data array)
508: */
509: private int getDataBlock(int ch) {
510: ch >>= SHIFT_;
511: int indexValue = m_index_[ch];
512: if (indexValue > 0) {
513: return indexValue;
514: }
515:
516: // allocate a new data block
517: int newBlock = allocDataBlock();
518: if (newBlock < 0) {
519: // out of memory in the data array
520: return -1;
521: }
522: m_index_[ch] = newBlock;
523:
524: // copy-on-write for a block from a setRange()
525: System.arraycopy(m_data_, Math.abs(indexValue), m_data_,
526: newBlock, DATA_BLOCK_LENGTH << 2);
527: return newBlock;
528: }
529:
530: /**
531: * Compact a folded build-time trie.
532: * The compaction
533: * - removes blocks that are identical with earlier ones
534: * - overlaps adjacent blocks as much as possible (if overlap == true)
535: * - moves blocks in steps of the data granularity
536: * - moves and overlaps blocks that overlap with multiple values in the overlap region
537: *
538: * It does not
539: * - try to move and overlap blocks that are not already adjacent
540: * @param overlap flag
541: */
542: private void compact(boolean overlap) {
543: if (m_isCompacted_) {
544: return; // nothing left to do
545: }
546:
547: // compaction
548: // initialize the index map with "block is used/unused" flags
549: findUnusedBlocks();
550:
551: // if Latin-1 is preallocated and linear, then do not compact Latin-1
552: // data
553: int overlapStart = DATA_BLOCK_LENGTH;
554: if (m_isLatin1Linear_ && SHIFT_ <= 8) {
555: overlapStart += 256;
556: }
557:
558: int newStart = DATA_BLOCK_LENGTH;
559: int i;
560: for (int start = newStart; start < m_dataLength_;) {
561: // start: index of first entry of current block
562: // newStart: index where the current block is to be moved
563: // (right after current end of already-compacted data)
564: // skip blocks that are not used
565: if (m_map_[start >>> SHIFT_] < 0) {
566: // advance start to the next block
567: start += DATA_BLOCK_LENGTH;
568: // leave newStart with the previous block!
569: continue;
570: }
571: // search for an identical block
572: if (start >= overlapStart) {
573: i = findSameDataBlock(m_data_, newStart, start,
574: overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
575: if (i >= 0) {
576: // found an identical block, set the other block's index
577: // value for the current block
578: m_map_[start >>> SHIFT_] = i;
579: // advance start to the next block
580: start += DATA_BLOCK_LENGTH;
581: // leave newStart with the previous block!
582: continue;
583: }
584: }
585: // see if the beginning of this block can be overlapped with the
586: // end of the previous block
587: if (overlap && start >= overlapStart) {
588: /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
589: for (i = DATA_BLOCK_LENGTH - DATA_GRANULARITY_; i > 0
590: && !equal_int(m_data_, newStart - i, start, i); i -= DATA_GRANULARITY_) {
591: }
592: } else {
593: i = 0;
594: }
595: if (i > 0) {
596: // some overlap
597: m_map_[start >>> SHIFT_] = newStart - i;
598: // move the non-overlapping indexes to their new positions
599: start += i;
600: for (i = DATA_BLOCK_LENGTH - i; i > 0; --i) {
601: m_data_[newStart++] = m_data_[start++];
602: }
603: } else if (newStart < start) {
604: // no overlap, just move the indexes to their new positions
605: m_map_[start >>> SHIFT_] = newStart;
606: for (i = DATA_BLOCK_LENGTH; i > 0; --i) {
607: m_data_[newStart++] = m_data_[start++];
608: }
609: } else { // no overlap && newStart==start
610: m_map_[start >>> SHIFT_] = start;
611: newStart += DATA_BLOCK_LENGTH;
612: start = newStart;
613: }
614: }
615: // now adjust the index (stage 1) table
616: for (i = 0; i < m_indexLength_; ++i) {
617: m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
618: }
619: m_dataLength_ = newStart;
620: }
621:
622: /**
623: * Find the same data block
624: * @param data array
625: * @param dataLength
626: * @param otherBlock
627: * @param step
628: */
629: private static final int findSameDataBlock(int data[],
630: int dataLength, int otherBlock, int step) {
631: // ensure that we do not even partially get past dataLength
632: dataLength -= DATA_BLOCK_LENGTH;
633:
634: for (int block = 0; block <= dataLength; block += step) {
635: if (equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
636: return block;
637: }
638: }
639: return -1;
640: }
641:
642: /**
643: * Fold the normalization data for supplementary code points into
644: * a compact area on top of the BMP-part of the trie index,
645: * with the lead surrogates indexing this compact area.
646: *
647: * Duplicate the index values for lead surrogates:
648: * From inside the BMP area, where some may be overridden with folded values,
649: * to just after the BMP area, where they can be retrieved for
650: * code point lookups.
651: * @param manipulate fold implementation
652: */
653: private final void fold(DataManipulate manipulate) {
654: int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
655: int index[] = m_index_;
656: // copy the lead surrogate indexes into a temporary array
657: System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
658: SURROGATE_BLOCK_COUNT_);
659:
660: // set all values for lead surrogate code *units* to leadUnitValue
661: // so that by default runtime lookups will find no data for associated
662: // supplementary code points, unless there is data for such code points
663: // which will result in a non-zero folding value below that is set for
664: // the respective lead units
665: // the above saved the indexes for surrogate code *points*
666: // fill the indexes with simplified code from utrie_setRange32()
667: int block = 0;
668: if (m_leadUnitValue_ == m_initialValue_) {
669: // leadUnitValue == initialValue, use all-initial-value block
670: // block = 0; if block here left empty
671: } else {
672: // create and fill the repeatBlock
673: block = allocDataBlock();
674: if (block < 0) {
675: // data table overflow
676: throw new IllegalStateException(
677: "Internal error: Out of memory space");
678: }
679: fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_,
680: true);
681: // negative block number to indicate that it is a repeat block
682: block = -block;
683: }
684: for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++c) {
685: m_index_[c] = block;
686: }
687:
688: // Fold significant index values into the area just after the BMP
689: // indexes.
690: // In case the first lead surrogate has significant data,
691: // its index block must be used first (in which case the folding is a
692: // no-op).
693: // Later all folded index blocks are moved up one to insert the copied
694: // lead surrogate indexes.
695: int indexLength = BMP_INDEX_LENGTH_;
696: // search for any index (stage 1) entries for supplementary code points
697: for (int c = 0x10000; c < 0x110000;) {
698: if (index[c >> SHIFT_] != 0) {
699: // there is data, treat the full block for a lead surrogate
700: c &= ~0x3ff;
701: // is there an identical index block?
702: block = findSameIndexBlock(index, indexLength,
703: c >> SHIFT_);
704:
705: // get a folded value for [c..c+0x400[ and,
706: // if different from the value for the lead surrogate code
707: // point, set it for the lead surrogate code unit
708:
709: int value = manipulate.getFoldedValue(c, block
710: + SURROGATE_BLOCK_COUNT_);
711: if (value != getValue(UTF16.getLeadSurrogate(c))) {
712: if (!setValue(UTF16.getLeadSurrogate(c), value)) {
713: // data table overflow
714: throw new ArrayIndexOutOfBoundsException(
715: "Data table overflow");
716: }
717: // if we did not find an identical index block...
718: if (block == indexLength) {
719: // move the actual index (stage 1) entries from the
720: // supplementary position to the new one
721: System.arraycopy(index, c >> SHIFT_, index,
722: indexLength, SURROGATE_BLOCK_COUNT_);
723: indexLength += SURROGATE_BLOCK_COUNT_;
724: }
725: }
726: c += 0x400;
727: } else {
728: c += DATA_BLOCK_LENGTH;
729: }
730: }
731:
732: // index array overflow?
733: // This is to guarantee that a folding offset is of the form
734: // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
735: // If the index is too large, then n>=1024 and more than 10 bits are
736: // necessary.
737: // In fact, it can only ever become n==1024 with completely unfoldable
738: // data and the additional block of duplicated values for lead
739: // surrogates.
740: if (indexLength >= MAX_INDEX_LENGTH_) {
741: throw new ArrayIndexOutOfBoundsException(
742: "Index table overflow");
743: }
744: // make space for the lead surrogate index block and insert it between
745: // the BMP indexes and the folded ones
746: System.arraycopy(index, BMP_INDEX_LENGTH_, index,
747: BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, indexLength
748: - BMP_INDEX_LENGTH_);
749: System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
750: SURROGATE_BLOCK_COUNT_);
751: indexLength += SURROGATE_BLOCK_COUNT_;
752: m_indexLength_ = indexLength;
753: }
754:
755: /**
756: * @internal
757: */
758: private void fillBlock(int block, int start, int limit, int value,
759: boolean overwrite) {
760: limit += block;
761: block += start;
762: if (overwrite) {
763: while (block < limit) {
764: m_data_[block++] = value;
765: }
766: } else {
767: while (block < limit) {
768: if (m_data_[block] == m_initialValue_) {
769: m_data_[block] = value;
770: }
771: ++block;
772: }
773: }
774: }
775: }
|