Source Code Cross Referenced for codecs.java in » Testing » Marathon » org » python » core » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Testing » Marathon » org.python.core
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * Copyright 2000 Finn Bock
003:         *
004:         * This program contains material copyrighted by:
005:         * Copyright (c) Corporation for National Research Initiatives.
006:         * Originally written by Marc-Andre Lemburg (mal@lemburg.com).
007:         */
008:
009:        package org.python.core;
010:
011:        /**
012:         * Contains the implementation of the builtin codecs.
013:         * @since Jython 2.0
014:         */
015:
016:        public class codecs {
017:            private static char Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD;
018:
019:            private static PyList searchPath = new PyList();
020:            private static PyStringMap searchCache = new PyStringMap();
021:
022:            private static String default_encoding = "ascii";
023:
024:            public static String getDefaultEncoding() {
025:                return default_encoding;
026:            }
027:
028:            public static void setDefaultEncoding(String encoding) {
029:                lookup(encoding);
030:                default_encoding = encoding;
031:            }
032:
033:            public static void register(PyObject search_function) {
034:                if (!search_function.isCallable()) {
035:                    throw Py.TypeError("argument must be callable");
036:                }
037:                searchPath.append(search_function);
038:            }
039:
040:            public static PyTuple lookup(String encoding) {
041:                import_encodings();
042:                PyString v = new PyString(normalizestring(encoding));
043:                PyObject result = searchCache.__finditem__(v);
044:                if (result != null) {
045:                    return (PyTuple) result;
046:                }
047:
048:                if (searchPath.__len__() == 0) {
049:                    throw new PyException(Py.LookupError,
050:                            "no codec search functions registered: "
051:                                    + "can't find encoding");
052:                }
053:
054:                PyObject iter = searchPath.__iter__();
055:                PyObject func = null;
056:                while ((func = iter.__iternext__()) != null) {
057:                    result = func.__call__(v);
058:                    if (result == Py.None) {
059:                        continue;
060:                    }
061:                    if (!(result instanceof  PyTuple) || result.__len__() != 4) {
062:                        throw Py.TypeError("codec search functions must "
063:                                + "return 4-tuples");
064:                    }
065:                    break;
066:                }
067:                if (func == null) {
068:                    throw new PyException(Py.LookupError, "unknown encoding "
069:                            + encoding);
070:                }
071:                searchCache.__setitem__(v, result);
072:                return (PyTuple) result;
073:            }
074:
075:            private static String normalizestring(String string) {
076:                return string.toLowerCase().replace(' ', '-');
077:            }
078:
079:            private static boolean import_encodings_called = false;
080:
081:            private static void import_encodings() {
082:                if (!import_encodings_called) {
083:                    import_encodings_called = true;
084:                    try {
085:                        __builtin__.__import__("encodings");
086:                    } catch (PyException exc) {
087:                        if (exc.type != Py.ImportError) {
088:                            throw exc;
089:                        }
090:                    }
091:                }
092:            }
093:
094:            public static String decode(PyString v, String encoding,
095:                    String errors) {
096:                if (encoding == null) {
097:                    encoding = getDefaultEncoding();
098:                } else {
099:                    encoding = normalizestring(encoding);
100:                }
101:
102:                if (errors != null) {
103:                    errors = errors.intern();
104:                }
105:
106:                /* Shortcuts for common default encodings */
107:                /*
108:                 if (encoding.equals("utf-8"))
109:                 return utf_8_decode(v, errors).__getitem__(0).__str__();
110:                 else if (encoding.equals("latin-1"))
111:                 ; //return PyUnicode_DecodeLatin1(s, size, errors);
112:                 else if (encoding.equals("ascii"))
113:                 ; //return PyUnicode_DecodeASCII(s, size, errors);
114:                 */
115:                if (encoding.equals("ascii")) {
116:                    return PyUnicode_DecodeASCII(v.toString(), v.__len__(),
117:                            errors);
118:                }
119:
120:                /* Decode via the codec registry */
121:                PyObject decoder = getDecoder(encoding);
122:                PyObject result = null;
123:                if (errors != null) {
124:                    result = decoder.__call__(v, new PyString(errors));
125:                } else {
126:                    result = decoder.__call__(v);
127:                }
128:
129:                if (!(result instanceof  PyTuple) || result.__len__() != 2) {
130:                    throw Py.TypeError("decoder must return a tuple "
131:                            + "(object,integer)");
132:                }
133:                return result.__getitem__(0).toString();
134:            }
135:
136:            private static PyObject getDecoder(String encoding) {
137:                PyObject codecs = lookup(encoding);
138:                return codecs.__getitem__(1);
139:            }
140:
141:            public static String encode(PyString v, String encoding,
142:                    String errors) {
143:                if (encoding == null) {
144:                    encoding = getDefaultEncoding();
145:                } else {
146:                    encoding = normalizestring(encoding);
147:                }
148:
149:                if (errors != null) {
150:                    errors = errors.intern();
151:                }
152:
153:                /* Shortcuts for common default encodings */
154:                /*
155:                 if (encoding.equals("utf-8"))
156:                 return PyUnicode_DecodeUTF8(v.toString(), v.__len__(), errors);
157:                 else if (encoding.equals("latin-1"))
158:                 return PyUnicode_DecodeLatin1(v.toString(), v.__len__(), errors);
159:                 else
160:                 */
161:
162:                if (encoding.equals("ascii")) {
163:                    return PyUnicode_EncodeASCII(v.toString(), v.__len__(),
164:                            errors);
165:                }
166:
167:                /* Decode via the codec registry */
168:                PyObject encoder = getEncoder(encoding);
169:                PyObject result = null;
170:                if (errors != null) {
171:                    result = encoder.__call__(v, new PyString(errors));
172:                } else {
173:                    result = encoder.__call__(v);
174:                }
175:
176:                if (!(result instanceof  PyTuple) || result.__len__() != 2) {
177:                    throw Py.TypeError("encoder must return a tuple "
178:                            + "(object,integer)");
179:                }
180:                return result.__getitem__(0).toString();
181:            }
182:
183:            private static PyObject getEncoder(String encoding) {
184:                PyObject codecs = lookup(encoding);
185:                return codecs.__getitem__(0);
186:            }
187:
188:            /* --- UTF-8 Codec ---------------------------------------------------- */
189:            private static byte utf8_code_length[] = {
190:            /* Map UTF-8 encoded prefix byte to sequence length.  zero means
191:                illegal prefix.  see RFC 2279 for details */
192:            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
193:                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
194:                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
195:                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
196:                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
197:                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
198:                    1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199:                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200:                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201:                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
202:                    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
203:                    2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4,
204:                    4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 };
205:
206:            public static String PyUnicode_DecodeUTF8(String str, String errors) {
207:                int size = str.length();
208:                StringBuffer unicode = new StringBuffer(size);
209:
210:                /* Unpack UTF-8 encoded data */
211:                for (int i = 0; i < size;) {
212:                    int ch = str.charAt(i);
213:                    if (ch > 0xFF) {
214:                        codecs.decoding_error("utf-8", unicode, errors,
215:                                "ordinal not in range(255)");
216:                        i++;
217:                        continue;
218:                    }
219:
220:                    if (ch < 0x80) {
221:                        unicode.append((char) ch);
222:                        i++;
223:                        continue;
224:                    }
225:
226:                    int n = utf8_code_length[ch];
227:
228:                    if (i + n > size) {
229:                        codecs.decoding_error("utf-8", unicode, errors,
230:                                "unexpected end of data");
231:                        i++;
232:                        continue;
233:                    }
234:
235:                    switch (n) {
236:                    case 0:
237:                        codecs.decoding_error("utf-8", unicode, errors,
238:                                "unexpected code byte");
239:                        i++;
240:                        continue;
241:                    case 1:
242:                        codecs.decoding_error("utf-8", unicode, errors,
243:                                "internal error");
244:                        i++;
245:                        continue;
246:                    case 2:
247:                        char ch1 = str.charAt(i + 1);
248:                        if ((ch1 & 0xc0) != 0x80) {
249:                            codecs.decoding_error("utf-8", unicode, errors,
250:                                    "invalid data");
251:                            i++;
252:                            continue;
253:                        }
254:                        ch = ((ch & 0x1f) << 6) + (ch1 & 0x3f);
255:                        if (ch < 0x80) {
256:                            codecs.decoding_error("utf-8", unicode, errors,
257:                                    "illegal encoding");
258:                            i++;
259:                            continue;
260:                        } else
261:                            unicode.append((char) ch);
262:                        break;
263:
264:                    case 3:
265:                        ch1 = str.charAt(i + 1);
266:                        char ch2 = str.charAt(i + 2);
267:                        if ((ch1 & 0xc0) != 0x80 || (ch2 & 0xc0) != 0x80) {
268:                            codecs.decoding_error("utf-8", unicode, errors,
269:                                    "invalid data");
270:                            i++;
271:                            continue;
272:                        }
273:                        ch = ((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6)
274:                                + (ch2 & 0x3f);
275:                        if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000)) {
276:                            codecs.decoding_error("utf-8", unicode, errors,
277:                                    "illegal encoding");
278:                            i++;
279:                            continue;
280:                        } else
281:                            unicode.append((char) ch);
282:                        break;
283:
284:                    case 4:
285:                        ch1 = str.charAt(i + 1);
286:                        ch2 = str.charAt(i + 2);
287:                        char ch3 = str.charAt(i + 3);
288:                        if ((ch1 & 0xc0) != 0x80 || (ch2 & 0xc0) != 0x80
289:                                || (ch3 & 0xc0) != 0x80) {
290:                            codecs.decoding_error("utf-8", unicode, errors,
291:                                    "invalid data");
292:                            i++;
293:                            continue;
294:                        }
295:                        ch = ((ch & 0x7) << 18) + ((ch1 & 0x3f) << 12)
296:                                + ((ch2 & 0x3f) << 6) + (ch3 & 0x3f);
297:                        /* validate and convert to UTF-16 */
298:                        if ((ch < 0x10000) || /* minimum value allowed for 4
299:                                                                byte encoding */
300:                        (ch > 0x10ffff)) { /* maximum value allowed for
301:                                                                 UTF-16 */
302:                            codecs.decoding_error("utf-8", unicode, errors,
303:                                    "illegal encoding");
304:                            i++;
305:                            continue;
306:                        }
307:                        /*  compute and append the two surrogates: */
308:
309:                        /*  translate from 10000..10FFFF to 0..FFFF */
310:                        ch -= 0x10000;
311:
312:                        /*  high surrogate = top 10 bits added to D800 */
313:                        unicode.append((char) (0xD800 + (ch >> 10)));
314:
315:                        /*  low surrogate = bottom 10 bits added to DC00 */
316:                        unicode.append((char) (0xDC00 + (ch & ~0xFC00)));
317:                        break;
318:
319:                    default:
320:                        /* Other sizes are only needed for UCS-4 */
321:                        codecs.decoding_error("utf-8", unicode, errors,
322:                                "unsupported Unicode code range");
323:                        i++;
324:                    }
325:                    i += n;
326:                }
327:
328:                return unicode.toString();
329:            }
330:
331:            public static String PyUnicode_EncodeUTF8(String str, String errors) {
332:                int size = str.length();
333:                StringBuffer v = new StringBuffer(size * 3);
334:
335:                for (int i = 0; i < size;) {
336:                    int ch = str.charAt(i++);
337:                    if (ch < 0x80) {
338:                        v.append((char) ch);
339:                    } else if (ch < 0x0800) {
340:                        v.append((char) (0xc0 | (ch >> 6)));
341:                        v.append((char) (0x80 | (ch & 0x3f)));
342:                    } else {
343:                        if (0xD800 <= ch && ch <= 0xDFFF) {
344:                            if (i != size) {
345:                                int ch2 = str.charAt(i);
346:                                if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
347:                                    /* combine the two values */
348:                                    ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
349:
350:                                    v.append((char) ((ch >> 18) | 0xf0));
351:                                    v
352:                                            .append((char) (0x80 | ((ch >> 12) & 0x3f)));
353:                                    i++;
354:                                }
355:                            }
356:                        } else {
357:                            v.append((char) (0xe0 | (ch >> 12)));
358:                        }
359:                        v.append((char) (0x80 | ((ch >> 6) & 0x3f)));
360:                        v.append((char) (0x80 | (ch & 0x3f)));
361:                    }
362:                }
363:                return v.toString();
364:            }
365:
366:            /* --- 7-bit ASCII Codec -------------------------------------------- */
367:
368:            public static String PyUnicode_DecodeASCII(String str, int size,
369:                    String errors) {
370:                StringBuffer v = new StringBuffer(size);
371:
372:                for (int i = 0; i < size; i++) {
373:                    char ch = str.charAt(i);
374:                    if (ch < 128) {
375:                        v.append(ch);
376:                    } else {
377:                        decoding_error("ascii", v, errors,
378:                                "ordinal not in range(128)");
379:                        continue;
380:                    }
381:                }
382:
383:                return v.toString();
384:            }
385:
386:            public static String PyUnicode_EncodeASCII(String str, int size,
387:                    String errors) {
388:                StringBuffer v = new StringBuffer(size);
389:
390:                for (int i = 0; i < size; i++) {
391:                    char ch = str.charAt(i);
392:                    if (ch >= 128) {
393:                        encoding_error("ascii", v, errors,
394:                                "ordinal not in range(128)");
395:                    } else {
396:                        v.append(ch);
397:                    }
398:                }
399:                return v.toString();
400:            }
401:
402:            /* --- RawUnicodeEscape Codec ---------------------------------------- */
403:
404:            private static char[] hexdigit = "0123456789ABCDEF".toCharArray();
405:
406:            // The modified flag is used by cPickle.
407:            public static String PyUnicode_EncodeRawUnicodeEscape(String str,
408:                    String errors, boolean modifed) {
409:
410:                int size = str.length();
411:                StringBuffer v = new StringBuffer(str.length());
412:
413:                for (int i = 0; i < size; i++) {
414:                    char ch = str.charAt(i);
415:                    if (ch >= 256 || (modifed && (ch == '\n' || ch == '\\'))) {
416:                        v.append("\\u");
417:                        v.append(hexdigit[(ch >>> 12) & 0xF]);
418:                        v.append(hexdigit[(ch >>> 8) & 0xF]);
419:                        v.append(hexdigit[(ch >>> 4) & 0xF]);
420:                        v.append(hexdigit[ch & 0xF]);
421:                    } else {
422:                        v.append(ch);
423:                    }
424:                }
425:
426:                return v.toString();
427:            }
428:
429:            public static String PyUnicode_DecodeRawUnicodeEscape(String str,
430:                    String errors) {
431:                int size = str.length();
432:                StringBuffer v = new StringBuffer(size);
433:
434:                for (int i = 0; i < size;) {
435:                    char ch = str.charAt(i);
436:
437:                    /* Non-escape characters are interpreted as Unicode ordinals */
438:                    if (ch != '\\') {
439:                        v.append(ch);
440:                        i++;
441:                        continue;
442:                    }
443:
444:                    /* \\u-escapes are only interpreted iff the number of leading
445:               backslashes is odd */
446:                    int bs = i;
447:                    while (i < size) {
448:                        ch = str.charAt(i);
449:                        if (ch != '\\')
450:                            break;
451:                        v.append(ch);
452:                        i++;
453:                    }
454:                    if (((i - bs) & 1) == 0 || i >= size || ch != 'u') {
455:                        continue;
456:                    }
457:                    v.setLength(v.length() - 1);
458:                    i++;
459:
460:                    /* \\uXXXX with 4 hex digits */
461:                    int x = 0;
462:                    for (int j = 0; j < 4; j++) {
463:                        ch = str.charAt(i + j);
464:                        int d = Character.digit(ch, 16);
465:                        if (d == -1) {
466:                            codecs.decoding_error("unicode escape", v, errors,
467:                                    "truncated \\uXXXX");
468:                            break;
469:                        }
470:                        x = ((x << 4) & ~0xF) + d;
471:                    }
472:                    i += 4;
473:                    v.append((char) x);
474:                }
475:                return v.toString();
476:            }
477:
478:            /* --- Utility methods -------------------------------------------- */
479:
480:            public static void encoding_error(String type, StringBuffer dest,
481:                    String errors, String details) {
482:                if (errors == null || errors == "strict") {
483:                    throw Py.UnicodeError(type + " encoding error: " + details);
484:                } else if (errors == "ignore") {
485:                    //ignore
486:                } else if (errors == "replace") {
487:                    dest.append('?');
488:                } else {
489:                    throw Py.ValueError(type + " encoding error; "
490:                            + "unknown error handling code: " + errors);
491:                }
492:            }
493:
494:            public static void decoding_error(String type, StringBuffer dest,
495:                    String errors, String details) {
496:                if (errors == null || errors == "strict") {
497:                    throw Py.UnicodeError(type + " decoding error: " + details);
498:                } else if (errors == "ignore") {
499:                    //ignore
500:                } else if (errors == "replace") {
501:                    if (dest != null) {
502:                        dest.append(Py_UNICODE_REPLACEMENT_CHARACTER);
503:                    }
504:                } else {
505:                    throw Py.ValueError(type + " decoding error; "
506:                            + "unknown error handling code: " + errors);
507:                }
508:            }
509:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.