Source Code Cross Referenced for Punycode.java in » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 2003-2006, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:        package com.ibm.icu.text;
008:
009:        import com.ibm.icu.lang.UCharacter;
010:
011:        /**
012:         * Ported code from ICU punycode.c 
013:         * @author ram
014:         */
015:
016:        /* Package Private class */
017:        final class Punycode {
018:
019:            /* Punycode parameters for Bootstring */
020:            private static final int BASE = 36;
021:            private static final int TMIN = 1;
022:            private static final int TMAX = 26;
023:            private static final int SKEW = 38;
024:            private static final int DAMP = 700;
025:            private static final int INITIAL_BIAS = 72;
026:            private static final int INITIAL_N = 0x80;
027:
028:            /* "Basic" Unicode/ASCII code points */
029:            private static final int HYPHEN = 0x2d;
030:            private static final int DELIMITER = HYPHEN;
031:
032:            private static final int ZERO = 0x30;
033:            private static final int NINE = 0x39;
034:
035:            private static final int SMALL_A = 0x61;
036:            private static final int SMALL_Z = 0x7a;
037:
038:            private static final int CAPITAL_A = 0x41;
039:            private static final int CAPITAL_Z = 0x5a;
040:            private static final int MAX_CP_COUNT = 200;
041:            private static final int UINT_MAGIC = 0x80000000;
042:            private static final long ULONG_MAGIC = 0x8000000000000000L;
043:
044:            private static int adaptBias(int delta, int length,
045:                    boolean firstTime) {
046:                if (firstTime) {
047:                    delta /= DAMP;
048:                } else {
049:                    delta /= 2;
050:                }
051:                delta += delta / length;
052:
053:                int count = 0;
054:                for (; delta > ((BASE - TMIN) * TMAX) / 2; count += BASE) {
055:                    delta /= (BASE - TMIN);
056:                }
057:
058:                return count + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
059:            }
060:
061:            /**
062:             * basicToDigit[] contains the numeric value of a basic code
063:             * point (for use in representing integers) in the range 0 to
064:             * BASE-1, or -1 if b is does not represent a value.
065:             */
066:            static final int[] basicToDigit = new int[] { -1, -1, -1, -1, -1,
067:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
068:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
069:
070:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
071:                    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1,
072:                    -1, -1,
073:
074:                    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
075:                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
076:
077:                    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
078:                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
079:
080:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
081:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
082:                    -1, -1,
083:
084:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
085:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
086:                    -1, -1,
087:
088:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
089:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
090:                    -1, -1,
091:
092:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
093:                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
094:                    -1, -1 };
095:
096:            ///CLOVER:OFF
097:            private static char asciiCaseMap(char b, boolean uppercase) {
098:                if (uppercase) {
099:                    if (SMALL_A <= b && b <= SMALL_Z) {
100:                        b -= (SMALL_A - CAPITAL_A);
101:                    }
102:                } else {
103:                    if (CAPITAL_A <= b && b <= CAPITAL_Z) {
104:                        b += (SMALL_A - CAPITAL_A);
105:                    }
106:                }
107:                return b;
108:            }
109:
110:            ///CLOVER:ON
111:            /**
112:             * digitToBasic() returns the basic code point whose value
113:             * (when used for representing integers) is d, which must be in the
114:             * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
115:             * nonzero, in which case the uppercase form is used.
116:             */
117:            private static char digitToBasic(int digit, boolean uppercase) {
118:                /*  0..25 map to ASCII a..z or A..Z */
119:                /* 26..35 map to ASCII 0..9         */
120:                if (digit < 26) {
121:                    if (uppercase) {
122:                        return (char) (CAPITAL_A + digit);
123:                    } else {
124:                        return (char) (SMALL_A + digit);
125:                    }
126:                } else {
127:                    return (char) ((ZERO - 26) + digit);
128:                }
129:            }
130:
131:            /**
132:             * Converts Unicode to Punycode.
133:             * The input string must not contain single, unpaired surrogates.
134:             * The output will be represented as an array of ASCII code points.
135:             * 
136:             * @param src
137:             * @param caseFlags
138:             * @return
139:             * @throws ParseException
140:             */
141:            public static StringBuffer encode(StringBuffer src,
142:                    boolean[] caseFlags) throws StringPrepParseException {
143:
144:                int[] cpBuffer = new int[MAX_CP_COUNT];
145:                int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
146:                char c, c2;
147:                int srcLength = src.length();
148:                int destCapacity = MAX_CP_COUNT;
149:                char[] dest = new char[destCapacity];
150:                StringBuffer result = new StringBuffer();
151:                /*
152:                 * Handle the basic code points and
153:                 * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
154:                 */
155:                srcCPCount = destLength = 0;
156:
157:                for (j = 0; j < srcLength; ++j) {
158:                    if (srcCPCount == MAX_CP_COUNT) {
159:                        /* too many input code points */
160:                        throw new IndexOutOfBoundsException();
161:                    }
162:                    c = src.charAt(j);
163:                    if (isBasic(c)) {
164:                        if (destLength < destCapacity) {
165:                            cpBuffer[srcCPCount++] = 0;
166:                            dest[destLength] = caseFlags != null ? asciiCaseMap(
167:                                    (char) c, caseFlags[j])
168:                                    : (char) c;
169:                        }
170:                        ++destLength;
171:                    } else {
172:                        n = ((caseFlags != null && caseFlags[j]) ? 1 : 0) << 31L;
173:                        if (!UTF16.isSurrogate(c)) {
174:                            n |= c;
175:                        } else if (UTF16.isLeadSurrogate(c)
176:                                && (j + 1) < srcLength
177:                                && UTF16.isTrailSurrogate(c2 = src
178:                                        .charAt(j + 1))) {
179:                            ++j;
180:
181:                            n |= UCharacter.getCodePoint(c, c2);
182:                        } else {
183:                            /* error: unmatched surrogate */
184:                            throw new StringPrepParseException(
185:                                    "Illegal char found",
186:                                    StringPrepParseException.ILLEGAL_CHAR_FOUND);
187:                        }
188:                        cpBuffer[srcCPCount++] = n;
189:                    }
190:                }
191:
192:                /* Finish the basic string - if it is not empty - with a delimiter. */
193:                basicLength = destLength;
194:                if (basicLength > 0) {
195:                    if (destLength < destCapacity) {
196:                        dest[destLength] = DELIMITER;
197:                    }
198:                    ++destLength;
199:                }
200:
201:                /*
202:                 * handledCPCount is the number of code points that have been handled
203:                 * basicLength is the number of basic code points
204:                 * destLength is the number of chars that have been output
205:                 */
206:
207:                /* Initialize the state: */
208:                n = INITIAL_N;
209:                delta = 0;
210:                bias = INITIAL_BIAS;
211:
212:                /* Main encoding loop: */
213:                for (handledCPCount = basicLength; handledCPCount < srcCPCount; /* no op */) {
214:                    /*
215:                     * All non-basic code points < n have been handled already.
216:                     * Find the next larger one:
217:                     */
218:                    for (m = 0x7fffffff, j = 0; j < srcCPCount; ++j) {
219:                        q = cpBuffer[j] & 0x7fffffff; /* remove case flag from the sign bit */
220:                        if (n <= q && q < m) {
221:                            m = q;
222:                        }
223:                    }
224:
225:                    /*
226:                     * Increase delta enough to advance the decoder's
227:                     * <n,i> state to <m,0>, but guard against overflow:
228:                     */
229:                    if (m - n > (0x7fffffff - MAX_CP_COUNT - delta)
230:                            / (handledCPCount + 1)) {
231:                        throw new IllegalStateException(
232:                                "Internal program error");
233:                    }
234:                    delta += (m - n) * (handledCPCount + 1);
235:                    n = m;
236:
237:                    /* Encode a sequence of same code points n */
238:                    for (j = 0; j < srcCPCount; ++j) {
239:                        q = cpBuffer[j] & 0x7fffffff; /* remove case flag from the sign bit */
240:                        if (q < n) {
241:                            ++delta;
242:                        } else if (q == n) {
243:                            /* Represent delta as a generalized variable-length integer: */
244:                            for (q = delta, k = BASE; /* no condition */; k += BASE) {
245:
246:                                /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt   
247:
248:                                t=k-bias;
249:                                if(t<TMIN) {
250:                                    t=TMIN;
251:                                } else if(t>TMAX) {
252:                                    t=TMAX;
253:                                }
254:                                 */
255:
256:                                t = k - bias;
257:                                if (t < TMIN) {
258:                                    t = TMIN;
259:                                } else if (k >= (bias + TMAX)) {
260:                                    t = TMAX;
261:                                }
262:
263:                                if (q < t) {
264:                                    break;
265:                                }
266:
267:                                if (destLength < destCapacity) {
268:                                    dest[destLength++] = digitToBasic(t
269:                                            + (q - t) % (BASE - t), false);
270:                                }
271:                                q = (q - t) / (BASE - t);
272:                            }
273:
274:                            if (destLength < destCapacity) {
275:                                dest[destLength++] = digitToBasic(q,
276:                                        (cpBuffer[j] < 0));
277:                            }
278:                            bias = adaptBias(delta, handledCPCount + 1,
279:                                    (handledCPCount == basicLength));
280:                            delta = 0;
281:                            ++handledCPCount;
282:                        }
283:                    }
284:
285:                    ++delta;
286:                    ++n;
287:                }
288:
289:                return result.append(dest, 0, destLength);
290:            }
291:
292:            private static boolean isBasic(int ch) {
293:                return (ch < INITIAL_N);
294:            }
295:
296:            ///CLOVER:OFF
297:            private static boolean isBasicUpperCase(int ch) {
298:                return (CAPITAL_A <= ch && ch >= CAPITAL_Z);
299:            }
300:
301:            ///CLOVER:ON
302:            private static boolean isSurrogate(int ch) {
303:                return (((ch) & 0xfffff800) == 0xd800);
304:            }
305:
306:            /**
307:             * Converts Punycode to Unicode.
308:             * The Unicode string will be at most as long as the Punycode string.
309:             * 
310:             * @param src
311:             * @param caseFlags
312:             * @return
313:             * @throws ParseException
314:             */
315:            public static StringBuffer decode(StringBuffer src,
316:                    boolean[] caseFlags) throws StringPrepParseException {
317:                int srcLength = src.length();
318:                StringBuffer result = new StringBuffer();
319:                int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, destCPCount, firstSupplementaryIndex, cpLength;
320:                char b;
321:                int destCapacity = MAX_CP_COUNT;
322:                char[] dest = new char[destCapacity];
323:
324:                /*
325:                 * Handle the basic code points:
326:                 * Let basicLength be the number of input code points
327:                 * before the last delimiter, or 0 if there is none,
328:                 * then copy the first basicLength code points to the output.
329:                 *
330:                 * The two following loops iterate backward.
331:                 */
332:                for (j = srcLength; j > 0;) {
333:                    if (src.charAt(--j) == DELIMITER) {
334:                        break;
335:                    }
336:                }
337:                destLength = basicLength = destCPCount = j;
338:
339:                while (j > 0) {
340:                    b = src.charAt(--j);
341:                    if (!isBasic(b)) {
342:                        throw new StringPrepParseException(
343:                                "Illegal char found",
344:                                StringPrepParseException.INVALID_CHAR_FOUND);
345:                    }
346:
347:                    if (j < destCapacity) {
348:                        dest[j] = b;
349:
350:                        if (caseFlags != null) {
351:                            caseFlags[j] = isBasicUpperCase(b);
352:                        }
353:                    }
354:                }
355:
356:                /* Initialize the state: */
357:                n = INITIAL_N;
358:                i = 0;
359:                bias = INITIAL_BIAS;
360:                firstSupplementaryIndex = 1000000000;
361:
362:                /*
363:                 * Main decoding loop:
364:                 * Start just after the last delimiter if any
365:                 * basic code points were copied; start at the beginning otherwise.
366:                 */
367:                for (in = basicLength > 0 ? basicLength + 1 : 0; in < srcLength; /* no op */) {
368:                    /*
369:                     * in is the index of the next character to be consumed, and
370:                     * destCPCount is the number of code points in the output array.
371:                     *
372:                     * Decode a generalized variable-length integer into delta,
373:                     * which gets added to i.  The overflow checking is easier
374:                     * if we increase i as we go, then subtract off its starting
375:                     * value at the end to obtain delta.
376:                     */
377:                    for (oldi = i, w = 1, k = BASE; /* no condition */; k += BASE) {
378:                        if (in >= srcLength) {
379:                            throw new StringPrepParseException(
380:                                    "Illegal char found",
381:                                    StringPrepParseException.ILLEGAL_CHAR_FOUND);
382:                        }
383:
384:                        digit = basicToDigit[src.charAt(in++) & 0xFF];
385:                        if (digit < 0) {
386:                            throw new StringPrepParseException(
387:                                    "Invalid char found",
388:                                    StringPrepParseException.INVALID_CHAR_FOUND);
389:                        }
390:                        if (digit > (0x7fffffff - i) / w) {
391:                            /* integer overflow */
392:                            throw new StringPrepParseException(
393:                                    "Illegal char found",
394:                                    StringPrepParseException.ILLEGAL_CHAR_FOUND);
395:                        }
396:
397:                        i += digit * w;
398:                        t = k - bias;
399:                        if (t < TMIN) {
400:                            t = TMIN;
401:                        } else if (k >= (bias + TMAX)) {
402:                            t = TMAX;
403:                        }
404:                        if (digit < t) {
405:                            break;
406:                        }
407:
408:                        if (w > 0x7fffffff / (BASE - t)) {
409:                            /* integer overflow */
410:                            throw new StringPrepParseException(
411:                                    "Illegal char found",
412:                                    StringPrepParseException.ILLEGAL_CHAR_FOUND);
413:                        }
414:                        w *= BASE - t;
415:                    }
416:
417:                    /*
418:                     * Modification from sample code:
419:                     * Increments destCPCount here,
420:                     * where needed instead of in for() loop tail.
421:                     */
422:                    ++destCPCount;
423:                    bias = adaptBias(i - oldi, destCPCount, (oldi == 0));
424:
425:                    /*
426:                     * i was supposed to wrap around from (incremented) destCPCount to 0,
427:                     * incrementing n each time, so we'll fix that now:
428:                     */
429:                    if (i / destCPCount > (0x7fffffff - n)) {
430:                        /* integer overflow */
431:                        throw new StringPrepParseException(
432:                                "Illegal char found",
433:                                StringPrepParseException.ILLEGAL_CHAR_FOUND);
434:                    }
435:
436:                    n += i / destCPCount;
437:                    i %= destCPCount;
438:                    /* not needed for Punycode: */
439:                    /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
440:
441:                    if (n > 0x10ffff || isSurrogate(n)) {
442:                        /* Unicode code point overflow */
443:                        throw new StringPrepParseException(
444:                                "Illegal char found",
445:                                StringPrepParseException.ILLEGAL_CHAR_FOUND);
446:                    }
447:
448:                    /* Insert n at position i of the output: */
449:                    cpLength = UTF16.getCharCount(n);
450:                    if ((destLength + cpLength) < destCapacity) {
451:                        int codeUnitIndex;
452:
453:                        /*
454:                         * Handle indexes when supplementary code points are present.
455:                         *
456:                         * In almost all cases, there will be only BMP code points before i
457:                         * and even in the entire string.
458:                         * This is handled with the same efficiency as with UTF-32.
459:                         *
460:                         * Only the rare cases with supplementary code points are handled
461:                         * more slowly - but not too bad since this is an insertion anyway.
462:                         */
463:                        if (i <= firstSupplementaryIndex) {
464:                            codeUnitIndex = i;
465:                            if (cpLength > 1) {
466:                                firstSupplementaryIndex = codeUnitIndex;
467:                            } else {
468:                                ++firstSupplementaryIndex;
469:                            }
470:                        } else {
471:                            codeUnitIndex = firstSupplementaryIndex;
472:                            codeUnitIndex = UTF16.moveCodePointOffset(dest, 0,
473:                                    destLength, codeUnitIndex, i
474:                                            - codeUnitIndex);
475:                        }
476:
477:                        /* use the UChar index codeUnitIndex instead of the code point index i */
478:                        if (codeUnitIndex < destLength) {
479:                            System.arraycopy(dest, codeUnitIndex, dest,
480:                                    codeUnitIndex + cpLength,
481:                                    (destLength - codeUnitIndex));
482:                            if (caseFlags != null) {
483:                                System.arraycopy(caseFlags, codeUnitIndex,
484:                                        caseFlags, codeUnitIndex + cpLength,
485:                                        destLength - codeUnitIndex);
486:                            }
487:                        }
488:                        if (cpLength == 1) {
489:                            /* BMP, insert one code unit */
490:                            dest[codeUnitIndex] = (char) n;
491:                        } else {
492:                            /* supplementary character, insert two code units */
493:                            dest[codeUnitIndex] = UTF16.getLeadSurrogate(n);
494:                            dest[codeUnitIndex + 1] = UTF16
495:                                    .getTrailSurrogate(n);
496:                        }
497:                        if (caseFlags != null) {
498:                            /* Case of last character determines uppercase flag: */
499:                            caseFlags[codeUnitIndex] = isBasicUpperCase(src
500:                                    .charAt(in - 1));
501:                            if (cpLength == 2) {
502:                                caseFlags[codeUnitIndex + 1] = false;
503:                            }
504:                        }
505:                    }
506:                    destLength += cpLength;
507:                    ++i;
508:                }
509:                result.append(dest, 0, destLength);
510:                return result;
511:            }
512:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.