Source Code Cross Referenced for IntTrieBuilder.java in » Internationalization-Localization » icu4j » com » ibm » icu » impl » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.impl
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         ******************************************************************************
003:         * Copyright (C) 1996-2006, International Business Machines Corporation and   *
004:         * others. All Rights Reserved.                                               *
005:         ******************************************************************************
006:         */
007:
008:        package com.ibm.icu.impl;
009:
010:        import com.ibm.icu.lang.UCharacter;
011:        import com.ibm.icu.text.UTF16;
012:        import java.util.Arrays;
013:        import java.io.OutputStream;
014:        import java.io.DataOutputStream;
015:        import java.io.IOException;
016:
017:        /**
018:         * Builder class to manipulate and generate a trie.
019:         * This is useful for ICU data in primitive types.
020:         * Provides a compact way to store information that is indexed by Unicode 
021:         * values, such as character properties, types, keyboard values, etc. This is 
022:         * very useful when you have a block of Unicode data that contains significant 
023:         * values while the rest of the Unicode data is unused in the application or 
024:         * when you have a lot of redundance, such as where all 21,000 Han ideographs 
025:         * have the same value.  However, lookup is much faster than a hash table.
026:         * A trie of any primitive data type serves two purposes:
027:         * <UL type = round>
028:         *     <LI>Fast access of the indexed values.
029:         *     <LI>Smaller memory footprint.
030:         * </UL>
031:         * This is a direct port from the ICU4C version
032:         * @author             Syn Wee Quek
033:         */
034:        public class IntTrieBuilder extends TrieBuilder {
035:            // public constructor ----------------------------------------------
036:
037:            /**
038:             * Copy constructor
039:             */
040:            public IntTrieBuilder(IntTrieBuilder table) {
041:                super (table);
042:                m_data_ = new int[m_dataCapacity_];
043:                System.arraycopy(table.m_data_, 0, m_data_, 0, m_dataLength_);
044:                m_initialValue_ = table.m_initialValue_;
045:                m_leadUnitValue_ = table.m_leadUnitValue_;
046:            }
047:
048:            /**
049:             * Constructs a build table
050:             * @param aliasdata data to be filled into table
051:             * @param maxdatalength maximum data length allowed in table
052:             * @param initialvalue inital data value
053:             * @param latin1linear is latin 1 to be linear
054:             */
055:            public IntTrieBuilder(int aliasdata[], int maxdatalength,
056:                    int initialvalue, int leadunitvalue, boolean latin1linear) {
057:                super ();
058:                if (maxdatalength < DATA_BLOCK_LENGTH
059:                        || (latin1linear && maxdatalength < 1024)) {
060:                    throw new IllegalArgumentException(
061:                            "Argument maxdatalength is too small");
062:                }
063:
064:                if (aliasdata != null) {
065:                    m_data_ = aliasdata;
066:                } else {
067:                    m_data_ = new int[maxdatalength];
068:                }
069:
070:                // preallocate and reset the first data block (block index 0)
071:                int j = DATA_BLOCK_LENGTH;
072:
073:                if (latin1linear) {
074:                    // preallocate and reset the first block (number 0) and Latin-1 
075:                    // (U+0000..U+00ff) after that made sure above that 
076:                    // maxDataLength >= 1024
077:                    // set indexes to point to consecutive data blocks
078:                    int i = 0;
079:                    do {
080:                        // do this at least for trie->index[0] even if that block is 
081:                        // only partly used for Latin-1
082:                        m_index_[i++] = j;
083:                        j += DATA_BLOCK_LENGTH;
084:                    } while (i < (256 >> SHIFT_));
085:                }
086:
087:                m_dataLength_ = j;
088:                // reset the initially allocated blocks to the initial value
089:                Arrays.fill(m_data_, 0, m_dataLength_, initialvalue);
090:                m_initialValue_ = initialvalue;
091:                m_leadUnitValue_ = leadunitvalue;
092:                m_dataCapacity_ = maxdatalength;
093:                m_isLatin1Linear_ = latin1linear;
094:                m_isCompacted_ = false;
095:            }
096:
097:            // public methods -------------------------------------------------------
098:
099:            /*public final void print()
100:              {
101:              int i = 0;
102:              int oldvalue = m_index_[i];
103:              int count = 0;
104:              System.out.println("index length " + m_indexLength_ 
105:              + " --------------------------");
106:              while (i < m_indexLength_) {
107:              if (m_index_[i] != oldvalue) {
108:              System.out.println("index has " + count + " counts of " 
109:              + Integer.toHexString(oldvalue));
110:              count = 0;
111:              oldvalue = m_index_[i];
112:              }
113:              count ++;
114:              i ++;
115:              }
116:              System.out.println("index has " + count + " counts of " 
117:              + Integer.toHexString(oldvalue));
118:              i = 0;
119:              oldvalue = m_data_[i];
120:              count = 0;
121:              System.out.println("data length " + m_dataLength_ 
122:              + " --------------------------");
123:              while (i < m_dataLength_) {
124:              if (m_data_[i] != oldvalue) {
125:              if ((oldvalue & 0xf1000000) == 0xf1000000) {
126:              int temp = oldvalue & 0xffffff; 
127:              temp += 0x320;
128:              oldvalue = 0xf1000000 | temp;
129:              }
130:              if ((oldvalue & 0xf2000000) == 0xf2000000) {
131:              int temp = oldvalue & 0xffffff; 
132:              temp += 0x14a;
133:              oldvalue = 0xf2000000 | temp;
134:              }
135:              System.out.println("data has " + count + " counts of " 
136:              + Integer.toHexString(oldvalue));
137:              count = 0;
138:              oldvalue = m_data_[i];
139:              }
140:              count ++;
141:              i ++;
142:              }
143:              if ((oldvalue & 0xf1000000) == 0xf1000000) {
144:              int temp = oldvalue & 0xffffff; 
145:              temp += 0x320;
146:              oldvalue = 0xf1000000 | temp;
147:              }
148:              if ((oldvalue & 0xf2000000) == 0xf2000000) {
149:              int temp = oldvalue & 0xffffff; 
150:              temp += 0x14a;
151:              oldvalue = 0xf2000000 | temp;
152:              }
153:              System.out.println("data has " + count + " counts of " 
154:              + Integer.toHexString(oldvalue));
155:              }
156:             */
157:            /**
158:             * Gets a 32 bit data from the table data
159:             * @param ch codepoint which data is to be retrieved
160:             * @return the 32 bit data
161:             */
162:            public int getValue(int ch) {
163:                // valid, uncompacted trie and valid c?
164:                if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
165:                    return 0;
166:                }
167:
168:                int block = m_index_[ch >> SHIFT_];
169:                return m_data_[Math.abs(block) + (ch & MASK_)];
170:            }
171:
172:            /**
173:             * Get a 32 bit data from the table data
174:             * @param ch  code point for which data is to be retrieved.
175:             * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
176:             *                      char maps into block zero, otherwise false.
177:             * @return the 32 bit data value.
178:             */
179:            public int getValue(int ch, boolean[] inBlockZero) {
180:                // valid, uncompacted trie and valid c?
181:                if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
182:                    if (inBlockZero != null) {
183:                        inBlockZero[0] = true;
184:                    }
185:                    return 0;
186:                }
187:
188:                int block = m_index_[ch >> SHIFT_];
189:                if (inBlockZero != null) {
190:                    inBlockZero[0] = (block == 0);
191:                }
192:                return m_data_[Math.abs(block) + (ch & MASK_)];
193:            }
194:
195:            /**
196:             * Sets a 32 bit data in the table data
197:             * @param ch codepoint which data is to be set
198:             * @param value to set
199:             * @return true if the set is successful, otherwise 
200:             *              if the table has been compacted return false
201:             */
202:            public boolean setValue(int ch, int value) {
203:                // valid, uncompacted trie and valid c? 
204:                if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
205:                    return false;
206:                }
207:
208:                int block = getDataBlock(ch);
209:                if (block < 0) {
210:                    return false;
211:                }
212:
213:                m_data_[block + (ch & MASK_)] = value;
214:                return true;
215:            }
216:
217:            /**
218:             * Serializes the build table with 32 bit data
219:             * @param datamanipulate builder raw fold method implementation
220:             * @param triedatamanipulate result trie fold method
221:             * @return a new trie
222:             */
223:            public IntTrie serialize(TrieBuilder.DataManipulate datamanipulate,
224:                    Trie.DataManipulate triedatamanipulate) {
225:                if (datamanipulate == null) {
226:                    throw new IllegalArgumentException(
227:                            "Parameters can not be null");
228:                }
229:                // fold and compact if necessary, also checks that indexLength is 
230:                // within limits 
231:                if (!m_isCompacted_) {
232:                    // compact once without overlap to improve folding
233:                    compact(false);
234:                    // fold the supplementary part of the index array
235:                    fold(datamanipulate);
236:                    // compact again with overlap for minimum data array length
237:                    compact(true);
238:                    m_isCompacted_ = true;
239:                }
240:                // is dataLength within limits? 
241:                if (m_dataLength_ >= MAX_DATA_LENGTH_) {
242:                    throw new ArrayIndexOutOfBoundsException(
243:                            "Data length too small");
244:                }
245:
246:                char index[] = new char[m_indexLength_];
247:                int data[] = new int[m_dataLength_];
248:                // write the index (stage 1) array and the 32-bit data (stage 2) array
249:                // write 16-bit index values shifted right by INDEX_SHIFT_ 
250:                for (int i = 0; i < m_indexLength_; i++) {
251:                    index[i] = (char) (m_index_[i] >>> INDEX_SHIFT_);
252:                }
253:                // write 32-bit data values
254:                System.arraycopy(m_data_, 0, data, 0, m_dataLength_);
255:
256:                int options = SHIFT_ | (INDEX_SHIFT_ << OPTIONS_INDEX_SHIFT_);
257:                options |= OPTIONS_DATA_IS_32_BIT_;
258:                if (m_isLatin1Linear_) {
259:                    options |= OPTIONS_LATIN1_IS_LINEAR_;
260:                }
261:                return new IntTrie(index, data, m_initialValue_, options,
262:                        triedatamanipulate);
263:            }
264:
265:            /**
266:             * Serializes the build table to an output stream.
267:             * 
268:             * Compacts the build-time trie after all values are set, and then
269:             * writes the serialized form onto an output stream.
270:             * 
271:             * After this, this build-time Trie can only be serialized again and/or closed;
272:             * no further values can be added.
273:             * 
274:             * This function is the rough equivalent of utrie_seriaize() in ICU4C.
275:             * 
276:             * @param os the output stream to which the seriaized trie will be written.
277:             *         If nul, the function still returns the size of the serialized Trie.
278:             * @param reduceTo16Bits If true, reduce the data size to 16 bits.  The resulting
279:             *         serialized form can then be used to create a CharTrie.
280:             * @param datamanipulate builder raw fold method implementation
281:             * @return the number of bytes written to the output stream.
282:             */
283:            public int serialize(OutputStream os, boolean reduceTo16Bits,
284:                    TrieBuilder.DataManipulate datamanipulate)
285:                    throws IOException {
286:                if (datamanipulate == null) {
287:                    throw new IllegalArgumentException(
288:                            "Parameters can not be null");
289:                }
290:
291:                // fold and compact if necessary, also checks that indexLength is 
292:                // within limits 
293:                if (!m_isCompacted_) {
294:                    // compact once without overlap to improve folding
295:                    compact(false);
296:                    // fold the supplementary part of the index array
297:                    fold(datamanipulate);
298:                    // compact again with overlap for minimum data array length
299:                    compact(true);
300:                    m_isCompacted_ = true;
301:                }
302:
303:                // is dataLength within limits? 
304:                int length;
305:                if (reduceTo16Bits) {
306:                    length = m_dataLength_ + m_indexLength_;
307:                } else {
308:                    length = m_dataLength_;
309:                }
310:                if (length >= MAX_DATA_LENGTH_) {
311:                    throw new ArrayIndexOutOfBoundsException(
312:                            "Data length too small");
313:                }
314:
315:                //  struct UTrieHeader {
316:                //      int32_t   signature;
317:                //      int32_t   options  (a bit field)
318:                //      int32_t   indexLength
319:                //      int32_t   dataLength
320:                length = Trie.HEADER_LENGTH_ + 2 * m_indexLength_;
321:                if (reduceTo16Bits) {
322:                    length += 2 * m_dataLength_;
323:                } else {
324:                    length += 4 * m_dataLength_;
325:                }
326:
327:                if (os == null) {
328:                    // No output stream.  Just return the length of the serialized Trie, in bytes.
329:                    return length;
330:                }
331:
332:                DataOutputStream dos = new DataOutputStream(os);
333:                dos.writeInt(Trie.HEADER_SIGNATURE_);
334:
335:                int options = Trie.INDEX_STAGE_1_SHIFT_
336:                        | (Trie.INDEX_STAGE_2_SHIFT_ << Trie.HEADER_OPTIONS_INDEX_SHIFT_);
337:                if (!reduceTo16Bits) {
338:                    options |= Trie.HEADER_OPTIONS_DATA_IS_32_BIT_;
339:                }
340:                if (m_isLatin1Linear_) {
341:                    options |= Trie.HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_;
342:                }
343:                dos.writeInt(options);
344:
345:                dos.writeInt(m_indexLength_);
346:                dos.writeInt(m_dataLength_);
347:
348:                /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
349:                if (reduceTo16Bits) {
350:                    /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
351:                    for (int i = 0; i < m_indexLength_; i++) {
352:                        int v = (m_index_[i] + m_indexLength_) >>> Trie.INDEX_STAGE_2_SHIFT_;
353:                        dos.writeChar(v);
354:                    }
355:
356:                    /* write 16-bit data values */
357:                    for (int i = 0; i < m_dataLength_; i++) {
358:                        int v = m_data_[i] & 0x0000ffff;
359:                        dos.writeChar(v);
360:                    }
361:                } else {
362:                    /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
363:                    for (int i = 0; i < m_indexLength_; i++) {
364:                        int v = (m_index_[i]) >>> Trie.INDEX_STAGE_2_SHIFT_;
365:                        dos.writeChar(v);
366:                    }
367:
368:                    /* write 32-bit data values */
369:                    for (int i = 0; i < m_dataLength_; i++) {
370:                        dos.writeInt(m_data_[i]);
371:                    }
372:                }
373:
374:                return length;
375:
376:            }
377:
378:            /**
379:             * Set a value in a range of code points [start..limit].
380:             * All code points c with start &lt;= c &lt; limit will get the value if
381:             * overwrite is true or if the old value is 0.
382:             * @param start the first code point to get the value
383:             * @param limit one past the last code point to get the value
384:             * @param value the value
385:             * @param overwrite flag for whether old non-initial values are to be 
386:             *        overwritten
387:             * @return false if a failure occurred (illegal argument or data array 
388:             *               overrun)
389:             */
390:            public boolean setRange(int start, int limit, int value,
391:                    boolean overwrite) {
392:                // repeat value in [start..limit[
393:                // mark index values for repeat-data blocks by setting bit 31 of the 
394:                // index values fill around existing values if any, if(overwrite)
395:
396:                // valid, uncompacted trie and valid indexes?
397:                if (m_isCompacted_ || start < UCharacter.MIN_VALUE
398:                        || start > UCharacter.MAX_VALUE
399:                        || limit < UCharacter.MIN_VALUE
400:                        || limit > (UCharacter.MAX_VALUE + 1) || start > limit) {
401:                    return false;
402:                }
403:
404:                if (start == limit) {
405:                    return true; // nothing to do
406:                }
407:
408:                if ((start & MASK_) != 0) {
409:                    // set partial block at [start..following block boundary[
410:                    int block = getDataBlock(start);
411:                    if (block < 0) {
412:                        return false;
413:                    }
414:
415:                    int nextStart = (start + DATA_BLOCK_LENGTH) & ~MASK_;
416:                    if (nextStart <= limit) {
417:                        fillBlock(block, start & MASK_, DATA_BLOCK_LENGTH,
418:                                value, overwrite);
419:                        start = nextStart;
420:                    } else {
421:                        fillBlock(block, start & MASK_, limit & MASK_, value,
422:                                overwrite);
423:                        return true;
424:                    }
425:                }
426:
427:                // number of positions in the last, partial block
428:                int rest = limit & MASK_;
429:
430:                // round down limit to a block boundary 
431:                limit &= ~MASK_;
432:
433:                // iterate over all-value blocks 
434:                int repeatBlock = 0;
435:                if (value == m_initialValue_) {
436:                    // repeatBlock = 0; assigned above
437:                } else {
438:                    repeatBlock = -1;
439:                }
440:                while (start < limit) {
441:                    // get index value 
442:                    int block = m_index_[start >> SHIFT_];
443:                    if (block > 0) {
444:                        // already allocated, fill in value
445:                        fillBlock(block, 0, DATA_BLOCK_LENGTH, value, overwrite);
446:                    } else if (m_data_[-block] != value
447:                            && (block == 0 || overwrite)) {
448:                        // set the repeatBlock instead of the current block 0 or range 
449:                        // block 
450:                        if (repeatBlock >= 0) {
451:                            m_index_[start >> SHIFT_] = -repeatBlock;
452:                        } else {
453:                            // create and set and fill the repeatBlock
454:                            repeatBlock = getDataBlock(start);
455:                            if (repeatBlock < 0) {
456:                                return false;
457:                            }
458:
459:                            // set the negative block number to indicate that it is a 
460:                            // repeat block
461:                            m_index_[start >> SHIFT_] = -repeatBlock;
462:                            fillBlock(repeatBlock, 0, DATA_BLOCK_LENGTH, value,
463:                                    true);
464:                        }
465:                    }
466:
467:                    start += DATA_BLOCK_LENGTH;
468:                }
469:
470:                if (rest > 0) {
471:                    // set partial block at [last block boundary..limit[
472:                    int block = getDataBlock(start);
473:                    if (block < 0) {
474:                        return false;
475:                    }
476:                    fillBlock(block, 0, rest, value, overwrite);
477:                }
478:
479:                return true;
480:            }
481:
482:            // protected data member ------------------------------------------------
483:
484:            protected int m_data_[];
485:            protected int m_initialValue_;
486:
487:            //  private data member ------------------------------------------------
488:
489:            private int m_leadUnitValue_;
490:
491:            // private methods ------------------------------------------------------
492:
493:            private int allocDataBlock() {
494:                int newBlock = m_dataLength_;
495:                int newTop = newBlock + DATA_BLOCK_LENGTH;
496:                if (newTop > m_dataCapacity_) {
497:                    // out of memory in the data array
498:                    return -1;
499:                }
500:                m_dataLength_ = newTop;
501:                return newBlock;
502:            }
503:
504:            /**
505:             * No error checking for illegal arguments.
506:             * @param ch codepoint to look for
507:             * @return -1 if no new data block available (out of memory in data array)
508:             */
509:            private int getDataBlock(int ch) {
510:                ch >>= SHIFT_;
511:                int indexValue = m_index_[ch];
512:                if (indexValue > 0) {
513:                    return indexValue;
514:                }
515:
516:                // allocate a new data block
517:                int newBlock = allocDataBlock();
518:                if (newBlock < 0) {
519:                    // out of memory in the data array 
520:                    return -1;
521:                }
522:                m_index_[ch] = newBlock;
523:
524:                // copy-on-write for a block from a setRange()
525:                System.arraycopy(m_data_, Math.abs(indexValue), m_data_,
526:                        newBlock, DATA_BLOCK_LENGTH << 2);
527:                return newBlock;
528:            }
529:
530:            /**
531:             * Compact a folded build-time trie.
532:             * The compaction
533:             * - removes blocks that are identical with earlier ones
534:             * - overlaps adjacent blocks as much as possible (if overlap == true)
535:             * - moves blocks in steps of the data granularity
536:             * - moves and overlaps blocks that overlap with multiple values in the overlap region
537:             *
538:             * It does not
539:             * - try to move and overlap blocks that are not already adjacent
540:             * @param overlap flag
541:             */
542:            private void compact(boolean overlap) {
543:                if (m_isCompacted_) {
544:                    return; // nothing left to do
545:                }
546:
547:                // compaction
548:                // initialize the index map with "block is used/unused" flags
549:                findUnusedBlocks();
550:
551:                // if Latin-1 is preallocated and linear, then do not compact Latin-1 
552:                // data
553:                int overlapStart = DATA_BLOCK_LENGTH;
554:                if (m_isLatin1Linear_ && SHIFT_ <= 8) {
555:                    overlapStart += 256;
556:                }
557:
558:                int newStart = DATA_BLOCK_LENGTH;
559:                int i;
560:                for (int start = newStart; start < m_dataLength_;) {
561:                    // start: index of first entry of current block
562:                    // newStart: index where the current block is to be moved
563:                    //           (right after current end of already-compacted data)
564:                    // skip blocks that are not used 
565:                    if (m_map_[start >>> SHIFT_] < 0) {
566:                        // advance start to the next block 
567:                        start += DATA_BLOCK_LENGTH;
568:                        // leave newStart with the previous block!
569:                        continue;
570:                    }
571:                    // search for an identical block
572:                    if (start >= overlapStart) {
573:                        i = findSameDataBlock(m_data_, newStart, start,
574:                                overlap ? DATA_GRANULARITY_ : DATA_BLOCK_LENGTH);
575:                        if (i >= 0) {
576:                            // found an identical block, set the other block's index 
577:                            // value for the current block
578:                            m_map_[start >>> SHIFT_] = i;
579:                            // advance start to the next block
580:                            start += DATA_BLOCK_LENGTH;
581:                            // leave newStart with the previous block!
582:                            continue;
583:                        }
584:                    }
585:                    // see if the beginning of this block can be overlapped with the 
586:                    // end of the previous block
587:                    if (overlap && start >= overlapStart) {
588:                        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
589:                        for (i = DATA_BLOCK_LENGTH - DATA_GRANULARITY_; i > 0
590:                                && !equal_int(m_data_, newStart - i, start, i); i -= DATA_GRANULARITY_) {
591:                        }
592:                    } else {
593:                        i = 0;
594:                    }
595:                    if (i > 0) {
596:                        // some overlap
597:                        m_map_[start >>> SHIFT_] = newStart - i;
598:                        // move the non-overlapping indexes to their new positions
599:                        start += i;
600:                        for (i = DATA_BLOCK_LENGTH - i; i > 0; --i) {
601:                            m_data_[newStart++] = m_data_[start++];
602:                        }
603:                    } else if (newStart < start) {
604:                        // no overlap, just move the indexes to their new positions
605:                        m_map_[start >>> SHIFT_] = newStart;
606:                        for (i = DATA_BLOCK_LENGTH; i > 0; --i) {
607:                            m_data_[newStart++] = m_data_[start++];
608:                        }
609:                    } else { // no overlap && newStart==start
610:                        m_map_[start >>> SHIFT_] = start;
611:                        newStart += DATA_BLOCK_LENGTH;
612:                        start = newStart;
613:                    }
614:                }
615:                // now adjust the index (stage 1) table
616:                for (i = 0; i < m_indexLength_; ++i) {
617:                    m_index_[i] = m_map_[Math.abs(m_index_[i]) >>> SHIFT_];
618:                }
619:                m_dataLength_ = newStart;
620:            }
621:
622:            /**
623:             * Find the same data block
624:             * @param data array
625:             * @param dataLength
626:             * @param otherBlock
627:             * @param step
628:             */
629:            private static final int findSameDataBlock(int data[],
630:                    int dataLength, int otherBlock, int step) {
631:                // ensure that we do not even partially get past dataLength
632:                dataLength -= DATA_BLOCK_LENGTH;
633:
634:                for (int block = 0; block <= dataLength; block += step) {
635:                    if (equal_int(data, block, otherBlock, DATA_BLOCK_LENGTH)) {
636:                        return block;
637:                    }
638:                }
639:                return -1;
640:            }
641:
642:            /**
643:             * Fold the normalization data for supplementary code points into
644:             * a compact area on top of the BMP-part of the trie index,
645:             * with the lead surrogates indexing this compact area.
646:             *
647:             * Duplicate the index values for lead surrogates:
648:             * From inside the BMP area, where some may be overridden with folded values,
649:             * to just after the BMP area, where they can be retrieved for
650:             * code point lookups.
651:             * @param manipulate fold implementation
652:             */
653:            private final void fold(DataManipulate manipulate) {
654:                int leadIndexes[] = new int[SURROGATE_BLOCK_COUNT_];
655:                int index[] = m_index_;
656:                // copy the lead surrogate indexes into a temporary array
657:                System.arraycopy(index, 0xd800 >> SHIFT_, leadIndexes, 0,
658:                        SURROGATE_BLOCK_COUNT_);
659:
660:                // set all values for lead surrogate code *units* to leadUnitValue
661:                // so that by default runtime lookups will find no data for associated
662:                // supplementary code points, unless there is data for such code points
663:                // which will result in a non-zero folding value below that is set for
664:                // the respective lead units
665:                // the above saved the indexes for surrogate code *points*
666:                // fill the indexes with simplified code from utrie_setRange32()
667:                int block = 0;
668:                if (m_leadUnitValue_ == m_initialValue_) {
669:                    // leadUnitValue == initialValue, use all-initial-value block
670:                    // block = 0; if block here left empty
671:                } else {
672:                    // create and fill the repeatBlock
673:                    block = allocDataBlock();
674:                    if (block < 0) {
675:                        // data table overflow
676:                        throw new IllegalStateException(
677:                                "Internal error: Out of memory space");
678:                    }
679:                    fillBlock(block, 0, DATA_BLOCK_LENGTH, m_leadUnitValue_,
680:                            true);
681:                    // negative block number to indicate that it is a repeat block
682:                    block = -block;
683:                }
684:                for (int c = (0xd800 >> SHIFT_); c < (0xdc00 >> SHIFT_); ++c) {
685:                    m_index_[c] = block;
686:                }
687:
688:                // Fold significant index values into the area just after the BMP 
689:                // indexes.
690:                // In case the first lead surrogate has significant data,
691:                // its index block must be used first (in which case the folding is a 
692:                // no-op).
693:                // Later all folded index blocks are moved up one to insert the copied
694:                // lead surrogate indexes.
695:                int indexLength = BMP_INDEX_LENGTH_;
696:                // search for any index (stage 1) entries for supplementary code points 
697:                for (int c = 0x10000; c < 0x110000;) {
698:                    if (index[c >> SHIFT_] != 0) {
699:                        // there is data, treat the full block for a lead surrogate
700:                        c &= ~0x3ff;
701:                        // is there an identical index block?
702:                        block = findSameIndexBlock(index, indexLength,
703:                                c >> SHIFT_);
704:
705:                        // get a folded value for [c..c+0x400[ and,
706:                        // if different from the value for the lead surrogate code 
707:                        // point, set it for the lead surrogate code unit
708:
709:                        int value = manipulate.getFoldedValue(c, block
710:                                + SURROGATE_BLOCK_COUNT_);
711:                        if (value != getValue(UTF16.getLeadSurrogate(c))) {
712:                            if (!setValue(UTF16.getLeadSurrogate(c), value)) {
713:                                // data table overflow 
714:                                throw new ArrayIndexOutOfBoundsException(
715:                                        "Data table overflow");
716:                            }
717:                            // if we did not find an identical index block...
718:                            if (block == indexLength) {
719:                                // move the actual index (stage 1) entries from the 
720:                                // supplementary position to the new one
721:                                System.arraycopy(index, c >> SHIFT_, index,
722:                                        indexLength, SURROGATE_BLOCK_COUNT_);
723:                                indexLength += SURROGATE_BLOCK_COUNT_;
724:                            }
725:                        }
726:                        c += 0x400;
727:                    } else {
728:                        c += DATA_BLOCK_LENGTH;
729:                    }
730:                }
731:
732:                // index array overflow?
733:                // This is to guarantee that a folding offset is of the form
734:                // UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
735:                // If the index is too large, then n>=1024 and more than 10 bits are 
736:                // necessary.
737:                // In fact, it can only ever become n==1024 with completely unfoldable 
738:                // data and the additional block of duplicated values for lead 
739:                // surrogates.
740:                if (indexLength >= MAX_INDEX_LENGTH_) {
741:                    throw new ArrayIndexOutOfBoundsException(
742:                            "Index table overflow");
743:                }
744:                // make space for the lead surrogate index block and insert it between 
745:                // the BMP indexes and the folded ones
746:                System.arraycopy(index, BMP_INDEX_LENGTH_, index,
747:                        BMP_INDEX_LENGTH_ + SURROGATE_BLOCK_COUNT_, indexLength
748:                                - BMP_INDEX_LENGTH_);
749:                System.arraycopy(leadIndexes, 0, index, BMP_INDEX_LENGTH_,
750:                        SURROGATE_BLOCK_COUNT_);
751:                indexLength += SURROGATE_BLOCK_COUNT_;
752:                m_indexLength_ = indexLength;
753:            }
754:
755:            /**
756:             * @internal
757:             */
758:            private void fillBlock(int block, int start, int limit, int value,
759:                    boolean overwrite) {
760:                limit += block;
761:                block += start;
762:                if (overwrite) {
763:                    while (block < limit) {
764:                        m_data_[block++] = value;
765:                    }
766:                } else {
767:                    while (block < limit) {
768:                        if (m_data_[block] == m_initialValue_) {
769:                            m_data_[block] = value;
770:                        }
771:                        ++block;
772:                    }
773:                }
774:            }
775:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.