001: /*
002: * Copyright 2002-2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: * @(#)ISCII91.java 1.18 07/05/05
028: */
029:
030: package sun.nio.cs.ext;
031:
032: import java.nio.charset.Charset;
033: import java.nio.CharBuffer;
034: import java.nio.ByteBuffer;
035: import java.nio.charset.CharsetDecoder;
036: import java.nio.charset.CharsetEncoder;
037: import java.nio.charset.CoderResult;
038: import sun.nio.cs.Surrogate;
039: import sun.nio.cs.HistoricallyNamedCharset;
040:
041: public class ISCII91 extends Charset implements
042: HistoricallyNamedCharset {
043: private static final char NUKTA_CHAR = '\u093c';
044: private static final char HALANT_CHAR = '\u094d';
045: private static final byte NO_CHAR = (byte) 255;
046:
047: public ISCII91() {
048: super ("x-ISCII91", ExtendedCharsets.aliasesFor("x-ISCII91"));
049: }
050:
051: public String historicalName() {
052: return "ISCII91";
053: }
054:
055: public boolean contains(Charset cs) {
056: return ((cs.name().equals("US-ASCII")) || (cs instanceof ISCII91));
057: }
058:
059: public CharsetDecoder newDecoder() {
060: return new Decoder(this );
061: }
062:
063: public CharsetEncoder newEncoder() {
064: return new Encoder(this );
065: }
066:
067: private static final char[] directMapTable = { '\u0000', // ascii character
068: '\u0001', // ascii character
069: '\u0002', // ascii character
070: '\u0003', // ascii character
071: '\u0004', // ascii character
072: '\u0005', // ascii character
073: '\u0006', // ascii character
074: '\u0007', // ascii character
075: '\u0008', // ascii character
076: '\u0009', // ascii character
077: '\012', // ascii character
078: '\u000b', // ascii character
079: '\u000c', // ascii character
080: '\015', // ascii character
081: '\u000e', // ascii character
082: '\u000f', // ascii character
083: '\u0010', // ascii character
084: '\u0011', // ascii character
085: '\u0012', // ascii character
086: '\u0013', // ascii character
087: '\u0014', // ascii character
088: '\u0015', // ascii character
089: '\u0016', // ascii character
090: '\u0017', // ascii character
091: '\u0018', // ascii character
092: '\u0019', // ascii character
093: '\u001a', // ascii character
094: '\u001b', // ascii character
095: '\u001c', // ascii character
096: '\u001d', // ascii character
097: '\u001e', // ascii character
098: '\u001f', // ascii character
099: '\u0020', // ascii character
100: '\u0021', // ascii character
101: '\u0022', // ascii character
102: '\u0023', // ascii character
103: '\u0024', // ascii character
104: '\u0025', // ascii character
105: '\u0026', // ascii character
106: (char) 0x0027, // '\u0027' control -- ascii character
107: '\u0028', // ascii character
108: '\u0029', // ascii character
109: '\u002a', // ascii character
110: '\u002b', // ascii character
111: '\u002c', // ascii character
112: '\u002d', // ascii character
113: '\u002e', // ascii character
114: '\u002f', // ascii character
115: '\u0030', // ascii character
116: '\u0031', // ascii character
117: '\u0032', // ascii character
118: '\u0033', // ascii character
119: '\u0034', // ascii character
120: '\u0035', // ascii character
121: '\u0036', // ascii character
122: '\u0037', // ascii character
123: '\u0038', // ascii character
124: '\u0039', // ascii character
125: '\u003a', // ascii character
126: '\u003b', // ascii character
127: '\u003c', // ascii character
128: '\u003d', // ascii character
129: '\u003e', // ascii character
130: '\u003f', // ascii character
131: '\u0040', // ascii character
132: '\u0041', // ascii character
133: '\u0042', // ascii character
134: '\u0043', // ascii character
135: '\u0044', // ascii character
136: '\u0045', // ascii character
137: '\u0046', // ascii character
138: '\u0047', // ascii character
139: '\u0048', // ascii character
140: '\u0049', // ascii character
141: '\u004a', // ascii character
142: '\u004b', // ascii character
143: '\u004c', // ascii character
144: '\u004d', // ascii character
145: '\u004e', // ascii character
146: '\u004f', // ascii character
147: '\u0050', // ascii character
148: '\u0051', // ascii character
149: '\u0052', // ascii character
150: '\u0053', // ascii character
151: '\u0054', // ascii character
152: '\u0055', // ascii character
153: '\u0056', // ascii character
154: '\u0057', // ascii character
155: '\u0058', // ascii character
156: '\u0059', // ascii character
157: '\u005a', // ascii character
158: '\u005b', // ascii character
159: '\\',// '\u005c' -- ascii character
160: '\u005d', // ascii character
161: '\u005e', // ascii character
162: '\u005f', // ascii character
163: '\u0060', // ascii character
164: '\u0061', // ascii character
165: '\u0062', // ascii character
166: '\u0063', // ascii character
167: '\u0064', // ascii character
168: '\u0065', // ascii character
169: '\u0066', // ascii character
170: '\u0067', // ascii character
171: '\u0068', // ascii character
172: '\u0069', // ascii character
173: '\u006a', // ascii character
174: '\u006b', // ascii character
175: '\u006c', // ascii character
176: '\u006d', // ascii character
177: '\u006e', // ascii character
178: '\u006f', // ascii character
179: '\u0070', // ascii character
180: '\u0071', // ascii character
181: '\u0072', // ascii character
182: '\u0073', // ascii character
183: '\u0074', // ascii character
184: '\u0075', // ascii character
185: '\u0076', // ascii character
186: '\u0077', // ascii character
187: '\u0078', // ascii character
188: '\u0079', // ascii character
189: '\u007a', // ascii character
190: '\u007b', // ascii character
191: '\u007c', // ascii character
192: '\u007d', // ascii character
193: '\u007e', // ascii character
194: '\u007f', // ascii character
195: '\uffff', // unknown character
196: '\uffff', // unknown character
197: '\uffff', // unknown character
198: '\uffff', // unknown character
199: '\uffff', // unknown character
200: '\uffff', // unknown character
201: '\uffff', // unknown character
202: '\uffff', // unknown character
203: '\uffff', // unknown character
204: '\uffff', // unknown character
205: '\uffff', // unknown character
206: '\uffff', // unknown character
207: '\uffff', // unknown character
208: '\uffff', // unknown character
209: '\uffff', // unknown character
210: '\uffff', // unknown character
211: '\uffff', // unknown character
212: '\uffff', // unknown character
213: '\uffff', // unknown character
214: '\uffff', // unknown character
215: '\uffff', // unknown character
216: '\uffff', // unknown character
217: '\uffff', // unknown character
218: '\uffff', // unknown character
219: '\uffff', // unknown character
220: '\uffff', // unknown character
221: '\uffff', // unknown character
222: '\uffff', // unknown character
223: '\uffff', // unknown character
224: '\uffff', // unknown character
225: '\uffff', // unknown character
226: '\uffff', // unknown character
227: '\u0901', // a1 -- Vowel-modifier CHANDRABINDU
228: '\u0902', // a2 -- Vowel-modifier ANUSWAR
229: '\u0903', // a3 -- Vowel-modifier VISARG
230:
231: '\u0905', // a4 -- Vowel A
232: '\u0906', // a5 -- Vowel AA
233: '\u0907', // a6 -- Vowel I
234: '\u0908', // a7 -- Vowel II
235: '\u0909', // a8 -- Vowel U
236: '\u090a', // a9 -- Vowel UU
237: '\u090b', // aa -- Vowel RI
238: '\u090e', // ab -- Vowel E ( Southern Scripts )
239: '\u090f', // ac -- Vowel EY
240: '\u0910', // ad -- Vowel AI
241: '\u090d', // ae -- Vowel AYE ( Devanagari Script )
242: '\u0912', // af -- Vowel O ( Southern Scripts )
243: '\u0913', // b0 -- Vowel OW
244: '\u0914', // b1 -- Vowel AU
245: '\u0911', // b2 -- Vowel AWE ( Devanagari Script )
246: '\u0915', // b3 -- Consonant KA
247: '\u0916', // b4 -- Consonant KHA
248: '\u0917', // b5 -- Consonant GA
249: '\u0918', // b6 -- Consonant GHA
250: '\u0919', // b7 -- Consonant NGA
251: '\u091a', // b8 -- Consonant CHA
252: '\u091b', // b9 -- Consonant CHHA
253: '\u091c', // ba -- Consonant JA
254: '\u091d', // bb -- Consonant JHA
255: '\u091e', // bc -- Consonant JNA
256: '\u091f', // bd -- Consonant Hard TA
257: '\u0920', // be -- Consonant Hard THA
258: '\u0921', // bf -- Consonant Hard DA
259: '\u0922', // c0 -- Consonant Hard DHA
260: '\u0923', // c1 -- Consonant Hard NA
261: '\u0924', // c2 -- Consonant Soft TA
262: '\u0925', // c3 -- Consonant Soft THA
263: '\u0926', // c4 -- Consonant Soft DA
264: '\u0927', // c5 -- Consonant Soft DHA
265: '\u0928', // c6 -- Consonant Soft NA
266: '\u0929', // c7 -- Consonant NA ( Tamil )
267: '\u092a', // c8 -- Consonant PA
268: '\u092b', // c9 -- Consonant PHA
269: '\u092c', // ca -- Consonant BA
270: '\u092d', // cb -- Consonant BHA
271: '\u092e', // cc -- Consonant MA
272: '\u092f', // cd -- Consonant YA
273: '\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya )
274: '\u0930', // cf -- Consonant RA
275: '\u0931', // d0 -- Consonant Hard RA ( Southern Scripts )
276: '\u0932', // d1 -- Consonant LA
277: '\u0933', // d2 -- Consonant Hard LA
278: '\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam )
279: '\u0935', // d4 -- Consonant VA
280: '\u0936', // d5 -- Consonant SHA
281: '\u0937', // d6 -- Consonant Hard SHA
282: '\u0938', // d7 -- Consonant SA
283: '\u0939', // d8 -- Consonant HA
284:
285: '\u200d', // d9 -- Consonant INVISIBLE
286: '\u093e', // da -- Vowel Sign AA
287:
288: '\u093f', // db -- Vowel Sign I
289: '\u0940', // dc -- Vowel Sign II
290: '\u0941', // dd -- Vowel Sign U
291: '\u0942', // de -- Vowel Sign UU
292: '\u0943', // df -- Vowel Sign RI
293: '\u0946', // e0 -- Vowel Sign E ( Southern Scripts )
294: '\u0947', // e1 -- Vowel Sign EY
295: '\u0948', // e2 -- Vowel Sign AI
296: '\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script )
297: '\u094a', // e4 -- Vowel Sign O ( Southern Scripts )
298: '\u094b', // e5 -- Vowel Sign OW
299: '\u094c', // e6 -- Vowel Sign AU
300: '\u0949', // e7 -- Vowel Sign AWE ( Devanagari Script )
301:
302: '\u094d', // e8 -- Vowel Omission Sign ( Halant )
303: '\u093c', // e9 -- Diacritic Sign ( Nukta )
304: '\u0964', // ea -- Full Stop ( Viram, Northern Scripts )
305:
306: '\uffff', // eb -- This position shall not be used
307: '\uffff', // ec -- This position shall not be used
308: '\uffff', // ed -- This position shall not be used
309: '\uffff', // ee -- This position shall not be used
310:
311: '\ufffd', // ef -- Attribute Code ( ATR )
312: '\ufffd', // f0 -- Extension Code ( EXT )
313:
314: '\u0966', // f1 -- Digit 0
315: '\u0967', // f2 -- Digit 1
316: '\u0968', // f3 -- Digit 2
317: '\u0969', // f4 -- Digit 3
318: '\u096a', // f5 -- Digit 4
319: '\u096b', // f6 -- Digit 5
320: '\u096c', // f7 -- Digit 6
321: '\u096d', // f8 -- Digit 7
322: '\u096e', // f9 -- Digit 8
323: '\u096f', // fa -- Digit 9
324:
325: '\uffff', // fb -- This position shall not be used
326: '\uffff', // fc -- This position shall not be used
327: '\uffff', // fd -- This position shall not be used
328: '\uffff', // fe -- This position shall not be used
329: '\uffff' // ff -- This position shall not be used
330: }; //end of table definition
331:
332: /**
333: * This accessor is temporarily supplied while sun.io
334: * converters co-exist with the sun.nio.cs.{ext} charset coders
335: * These facilitate sharing of conversion tables between the
336: * two co-existing implementations. When sun.io converters
337: * are made extinct these will be unnecessary and should be removed
338: */
339:
340: public static char[] getDirectMapTable() {
341: return directMapTable;
342: }
343:
344: private static final byte[] encoderMappingTable = { NO_CHAR,
345: NO_CHAR, //0900 <reserved>
346: (byte) 161, NO_CHAR, //0901 -- DEVANAGARI SIGN CANDRABINDU = anunasika
347: (byte) 162, NO_CHAR, //0902 -- DEVANAGARI SIGN ANUSVARA = bindu
348: (byte) 163, NO_CHAR, //0903 -- DEVANAGARI SIGN VISARGA
349: NO_CHAR, NO_CHAR, //0904 <reserved>
350: (byte) 164, NO_CHAR, //0905 -- DEVANAGARI LETTER A
351: (byte) 165, NO_CHAR, //0906 -- DEVANAGARI LETTER AA
352: (byte) 166, NO_CHAR, //0907 -- DEVANAGARI LETTER I
353: (byte) 167, NO_CHAR, //0908 -- DEVANAGARI LETTER II
354: (byte) 168, NO_CHAR, //0909 -- DEVANAGARI LETTER U
355: (byte) 169, NO_CHAR, //090a -- DEVANAGARI LETTER UU
356: (byte) 170, NO_CHAR, //090b -- DEVANAGARI LETTER VOCALIC R
357: (byte) 166, (byte) 233, //090c -- DEVANAGARI LETTER VOVALIC L
358: (byte) 174, NO_CHAR, //090d -- DEVANAGARI LETTER CANDRA E
359: (byte) 171, NO_CHAR, //090e -- DEVANAGARI LETTER SHORT E
360: (byte) 172, NO_CHAR, //090f -- DEVANAGARI LETTER E
361: (byte) 173, NO_CHAR, //0910 -- DEVANAGARI LETTER AI
362: (byte) 178, NO_CHAR, //0911 -- DEVANAGARI LETTER CANDRA O
363: (byte) 175, NO_CHAR, //0912 -- DEVANAGARI LETTER SHORT O
364: (byte) 176, NO_CHAR, //0913 -- DEVANAGARI LETTER O
365: (byte) 177, NO_CHAR, //0914 -- DEVANAGARI LETTER AU
366: (byte) 179, NO_CHAR, //0915 -- DEVANAGARI LETTER KA
367: (byte) 180, NO_CHAR, //0916 -- DEVANAGARI LETTER KHA
368: (byte) 181, NO_CHAR, //0917 -- DEVANAGARI LETTER GA
369: (byte) 182, NO_CHAR, //0918 -- DEVANAGARI LETTER GHA
370: (byte) 183, NO_CHAR, //0919 -- DEVANAGARI LETTER NGA
371: (byte) 184, NO_CHAR, //091a -- DEVANAGARI LETTER CA
372: (byte) 185, NO_CHAR, //091b -- DEVANAGARI LETTER CHA
373: (byte) 186, NO_CHAR, //091c -- DEVANAGARI LETTER JA
374: (byte) 187, NO_CHAR, //091d -- DEVANAGARI LETTER JHA
375: (byte) 188, NO_CHAR, //091e -- DEVANAGARI LETTER NYA
376: (byte) 189, NO_CHAR, //091f -- DEVANAGARI LETTER TTA
377: (byte) 190, NO_CHAR, //0920 -- DEVANAGARI LETTER TTHA
378: (byte) 191, NO_CHAR, //0921 -- DEVANAGARI LETTER DDA
379: (byte) 192, NO_CHAR, //0922 -- DEVANAGARI LETTER DDHA
380: (byte) 193, NO_CHAR, //0923 -- DEVANAGARI LETTER NNA
381: (byte) 194, NO_CHAR, //0924 -- DEVANAGARI LETTER TA
382: (byte) 195, NO_CHAR, //0925 -- DEVANAGARI LETTER THA
383: (byte) 196, NO_CHAR, //0926 -- DEVANAGARI LETTER DA
384: (byte) 197, NO_CHAR, //0927 -- DEVANAGARI LETTER DHA
385: (byte) 198, NO_CHAR, //0928 -- DEVANAGARI LETTER NA
386: (byte) 199, NO_CHAR, //0929 -- DEVANAGARI LETTER NNNA <=> 0928 + 093C
387: (byte) 200, NO_CHAR, //092a -- DEVANAGARI LETTER PA
388: (byte) 201, NO_CHAR, //092b -- DEVANAGARI LETTER PHA
389: (byte) 202, NO_CHAR, //092c -- DEVANAGARI LETTER BA
390: (byte) 203, NO_CHAR, //092d -- DEVANAGARI LETTER BHA
391: (byte) 204, NO_CHAR, //092e -- DEVANAGARI LETTER MA
392: (byte) 205, NO_CHAR, //092f -- DEVANAGARI LETTER YA
393: (byte) 207, NO_CHAR, //0930 -- DEVANAGARI LETTER RA
394: (byte) 208, NO_CHAR, //0931 -- DEVANAGARI LETTER RRA <=> 0930 + 093C
395: (byte) 209, NO_CHAR, //0932 -- DEVANAGARI LETTER LA
396: (byte) 210, NO_CHAR, //0933 -- DEVANAGARI LETTER LLA
397: (byte) 211, NO_CHAR, //0934 -- DEVANAGARI LETTER LLLA <=> 0933 + 093C
398: (byte) 212, NO_CHAR, //0935 -- DEVANAGARI LETTER VA
399: (byte) 213, NO_CHAR, //0936 -- DEVANAGARI LETTER SHA
400: (byte) 214, NO_CHAR, //0937 -- DEVANAGARI LETTER SSA
401: (byte) 215, NO_CHAR, //0938 -- DEVANAGARI LETTER SA
402: (byte) 216, NO_CHAR, //0939 -- DEVANAGARI LETTER HA
403: NO_CHAR, NO_CHAR, //093a <reserved>
404: NO_CHAR, NO_CHAR, //093b <reserved>
405: (byte) 233, NO_CHAR, //093c -- DEVANAGARI SIGN NUKTA
406: (byte) 234, (byte) 233, //093d -- DEVANAGARI SIGN AVAGRAHA
407: (byte) 218, NO_CHAR, //093e -- DEVANAGARI VOWEL SIGN AA
408: (byte) 219, NO_CHAR, //093f -- DEVANAGARI VOWEL SIGN I
409: (byte) 220, NO_CHAR, //0940 -- DEVANAGARI VOWEL SIGN II
410: (byte) 221, NO_CHAR, //0941 -- DEVANAGARI VOWEL SIGN U
411: (byte) 222, NO_CHAR, //0942 -- DEVANAGARI VOWEL SIGN UU
412: (byte) 223, NO_CHAR, //0943 -- DEVANAGARI VOWEL SIGN VOCALIC R
413: (byte) 223, (byte) 233, //0944 -- DEVANAGARI VOWEL SIGN VOCALIC RR
414: (byte) 227, NO_CHAR, //0945 -- DEVANAGARI VOWEL SIGN CANDRA E
415: (byte) 224, NO_CHAR, //0946 -- DEVANAGARI VOWEL SIGN SHORT E
416: (byte) 225, NO_CHAR, //0947 -- DEVANAGARI VOWEL SIGN E
417: (byte) 226, NO_CHAR, //0948 -- DEVANAGARI VOWEL SIGN AI
418: (byte) 231, NO_CHAR, //0949 -- DEVANAGARI VOWEL SIGN CANDRA O
419: (byte) 228, NO_CHAR, //094a -- DEVANAGARI VOWEL SIGN SHORT O
420: (byte) 229, NO_CHAR, //094b -- DEVANAGARI VOWEL SIGN O
421: (byte) 230, NO_CHAR, //094c -- DEVANAGARI VOWEL SIGN AU
422: (byte) 232, NO_CHAR, //094d -- DEVANAGARI SIGN VIRAMA ( halant )
423: NO_CHAR, NO_CHAR, //094e <reserved>
424: NO_CHAR, NO_CHAR, //094f <reserved>
425: (byte) 161, (byte) 233, //0950 -- DEVANAGARI OM
426: (byte) 240, (byte) 181, //0951 -- DEVANAGARI STRESS SIGN UDATTA
427: (byte) 240, (byte) 184, //0952 -- DEVANAGARI STRESS SIGN ANUDATTA
428: (byte) 254, NO_CHAR, //0953 -- DEVANAGARI GRAVE ACCENT || MISSING
429: (byte) 254, NO_CHAR, //0954 -- DEVANAGARI ACUTE ACCENT || MISSING
430: NO_CHAR, NO_CHAR, //0955 <reserved>
431: NO_CHAR, NO_CHAR, //0956 <reserved>
432: NO_CHAR, NO_CHAR, //0957 <reserved>
433: (byte) 179, (byte) 233, //0958 -- DEVANAGARI LETTER QA <=> 0915 + 093C
434: (byte) 180, (byte) 233, //0959 -- DEVANAGARI LETTER KHHA <=> 0916 + 093C
435: (byte) 181, (byte) 233, //095a -- DEVANAGARI LETTER GHHA <=> 0917 + 093C
436: (byte) 186, (byte) 233, //095b -- DEVANAGARI LETTER ZA <=> 091C + 093C
437: (byte) 191, (byte) 233, //095c -- DEVANAGARI LETTER DDDHA <=> 0921 + 093C
438: (byte) 192, (byte) 233, //095d -- DEVANAGARI LETTER RHA <=> 0922 + 093C
439: (byte) 201, (byte) 233, //095e -- DEVANAGARI LETTER FA <=> 092B + 093C
440: (byte) 206, NO_CHAR, //095f -- DEVANAGARI LETTER YYA <=> 092F + 093C
441: (byte) 170, (byte) 233, //0960 -- DEVANAGARI LETTER VOCALIC RR
442: (byte) 167, (byte) 233, //0961 -- DEVANAGARI LETTER VOCALIC LL
443: (byte) 219, (byte) 233, //0962 -- DEVANAGARI VOWEL SIGN VOCALIC L
444: (byte) 220, (byte) 233, //0963 -- DEVANAGARI VOWEL SIGN VOCALIC LL
445: (byte) 234, NO_CHAR, //0964 -- DEVANAGARI DANDA ( phrase separator )
446: (byte) 234, (byte) 234, //0965 -- DEVANAGARI DOUBLE DANDA
447: (byte) 241, NO_CHAR, //0966 -- DEVANAGARI DIGIT ZERO
448: (byte) 242, NO_CHAR, //0967 -- DEVANAGARI DIGIT ONE
449: (byte) 243, NO_CHAR, //0968 -- DEVANAGARI DIGIT TWO
450: (byte) 244, NO_CHAR, //0969 -- DEVANAGARI DIGIT THREE
451: (byte) 245, NO_CHAR, //096a -- DEVANAGARI DIGIT FOUR
452: (byte) 246, NO_CHAR, //096b -- DEVANAGARI DIGIT FIVE
453: (byte) 247, NO_CHAR, //096c -- DEVANAGARI DIGIT SIX
454: (byte) 248, NO_CHAR, //096d -- DEVANAGARI DIGIT SEVEN
455: (byte) 249, NO_CHAR, //096e -- DEVANAGARI DIGIT EIGHT
456: (byte) 250, NO_CHAR, //096f -- DEVANAGARI DIGIT NINE
457: (byte) 240, (byte) 191, //0970 -- DEVANAGARI ABBREVIATION SIGN
458: NO_CHAR, NO_CHAR, //0971 -- reserved
459: NO_CHAR, NO_CHAR, //0972 -- reserved
460: NO_CHAR, NO_CHAR, //0973 -- reserved
461: NO_CHAR, NO_CHAR, //0974 -- reserved
462: NO_CHAR, NO_CHAR, //0975 -- reserved
463: NO_CHAR, NO_CHAR, //0976 -- reserved
464: NO_CHAR, NO_CHAR, //0977 -- reserved
465: NO_CHAR, NO_CHAR, //0978 -- reserved
466: NO_CHAR, NO_CHAR, //0979 -- reserved
467: NO_CHAR, NO_CHAR, //097a -- reserved
468: NO_CHAR, NO_CHAR, //097b -- reserved
469: NO_CHAR, NO_CHAR, //097c -- reserved
470: NO_CHAR, NO_CHAR, //097d -- reserved
471: NO_CHAR, NO_CHAR, //097e -- reserved
472: NO_CHAR, NO_CHAR //097f -- reserved
473: }; //end of table definition
474:
475: /**
476: * This accessor is temporarily supplied while sun.io
477: * converters co-exist with the sun.nio.cs.{ext} charset coders
478: * These facilitate sharing of conversion tables between the
479: * two co-existing implementations. When sun.io converters
480: * are made extinct these will be unnecessary and should be removed
481: */
482:
483: public static byte[] getEncoderMappingTable() {
484: return encoderMappingTable;
485: }
486:
487: private static class Decoder extends CharsetDecoder {
488:
489: private static final char ZWNJ_CHAR = '\u200c';
490: private static final char ZWJ_CHAR = '\u200d';
491: private static final char INVALID_CHAR = '\uffff';
492:
493: private char contextChar = INVALID_CHAR;
494: private boolean needFlushing = false;
495:
496: private Decoder(Charset cs) {
497: super (cs, 1.0f, 1.0f);
498: }
499:
500: protected CoderResult implFlush(CharBuffer out) {
501: if (needFlushing) {
502: if (out.remaining() < 1) {
503: return CoderResult.OVERFLOW;
504: } else {
505: out.put(contextChar);
506: }
507: }
508: contextChar = INVALID_CHAR;
509: needFlushing = false;
510: return CoderResult.UNDERFLOW;
511: }
512:
513: /*Rules:
514: * 1)ATR,EXT,following character to be replaced with '\ufffd'
515: * 2)Halant + Halant => '\u094d' (Virama) + '\u200c'(ZWNJ)
516: * 3)Halant + Nukta => '\u094d' (Virama) + '\u200d'(ZWJ)
517: */
518: private CoderResult decodeArrayLoop(ByteBuffer src,
519: CharBuffer dst) {
520: byte[] sa = src.array();
521: int sp = src.arrayOffset() + src.position();
522: int sl = src.arrayOffset() + src.limit();
523: assert (sp <= sl);
524: sp = (sp <= sl ? sp : sl);
525:
526: char[] da = dst.array();
527: int dp = dst.arrayOffset() + dst.position();
528: int dl = dst.arrayOffset() + dst.limit();
529: assert (dp <= dl);
530: dp = (dp <= dl ? dp : dl);
531:
532: try {
533: while (sp < sl) {
534: int index = sa[sp];
535: index = (index < 0) ? (index + 255) : index;
536: char currentChar = directMapTable[index];
537:
538: // if the contextChar is either ATR || EXT
539: // set the output to '\ufffd'
540: if (contextChar == '\ufffd') {
541: if (dl - dp < 1)
542: return CoderResult.OVERFLOW;
543: da[dp++] = '\ufffd';
544: contextChar = INVALID_CHAR;
545: needFlushing = false;
546: sp++;
547: continue;
548: }
549:
550: switch (currentChar) {
551: case '\u0901':
552: case '\u0907':
553: case '\u0908':
554: case '\u090b':
555: case '\u093f':
556: case '\u0940':
557: case '\u0943':
558: case '\u0964':
559: if (needFlushing) {
560: if (dl - dp < 1)
561: return CoderResult.OVERFLOW;
562: da[dp++] = contextChar;
563: contextChar = currentChar;
564: sp++;
565: continue;
566: }
567: contextChar = currentChar;
568: needFlushing = true;
569: sp++;
570: continue;
571: case NUKTA_CHAR:
572: if (dl - dp < 1)
573: return CoderResult.OVERFLOW;
574: switch (contextChar) {
575: case '\u0901':
576: da[dp++] = '\u0950';
577: break;
578: case '\u0907':
579: da[dp++] = '\u090c';
580: break;
581: case '\u0908':
582: da[dp++] = '\u0961';
583: break;
584: case '\u090b':
585: da[dp++] = '\u0960';
586: break;
587: case '\u093f':
588: da[dp++] = '\u0962';
589: break;
590: case '\u0940':
591: da[dp++] = '\u0963';
592: break;
593: case '\u0943':
594: da[dp++] = '\u0944';
595: break;
596: case '\u0964':
597: da[dp++] = '\u093d';
598: break;
599: case HALANT_CHAR:
600: if (needFlushing) {
601: da[dp++] = contextChar;
602: contextChar = currentChar;
603: sp++;
604: continue;
605: }
606: da[dp++] = ZWJ_CHAR;
607: break;
608: default:
609: if (needFlushing) {
610: da[dp++] = contextChar;
611: contextChar = currentChar;
612: sp++;
613: continue;
614: }
615: da[dp++] = NUKTA_CHAR;
616: }
617: break;
618: case HALANT_CHAR:
619: if (dl - dp < 1)
620: return CoderResult.OVERFLOW;
621: if (needFlushing) {
622: da[dp++] = contextChar;
623: contextChar = currentChar;
624: sp++;
625: continue;
626: }
627: if (contextChar == HALANT_CHAR) {
628: da[dp++] = ZWNJ_CHAR;
629: break;
630: }
631: da[dp++] = HALANT_CHAR;
632: break;
633: case INVALID_CHAR:
634: if (needFlushing) {
635: if (dl - dp < 1)
636: return CoderResult.OVERFLOW;
637: da[dp++] = contextChar;
638: contextChar = currentChar;
639: sp++;
640: continue;
641: }
642: return CoderResult.unmappableForLength(1);
643: default:
644: if (dl - dp < 1)
645: return CoderResult.OVERFLOW;
646: if (needFlushing) {
647: da[dp++] = contextChar;
648: contextChar = currentChar;
649: sp++;
650: continue;
651: }
652: da[dp++] = currentChar;
653: break;
654: }//end switch
655:
656: contextChar = currentChar;
657: needFlushing = false;
658: sp++;
659: }
660: return CoderResult.UNDERFLOW;
661: } finally {
662: src.position(sp - src.arrayOffset());
663: dst.position(dp - dst.arrayOffset());
664: }
665: }
666:
667: private CoderResult decodeBufferLoop(ByteBuffer src,
668: CharBuffer dst) {
669: int mark = src.position();
670:
671: try {
672: while (src.hasRemaining()) {
673: int index = src.get();
674: index = (index < 0) ? (index + 255) : index;
675: char currentChar = directMapTable[index];
676:
677: // if the contextChar is either ATR || EXT
678: // set the output to '\ufffd'
679: if (contextChar == '\ufffd') {
680: if (dst.remaining() < 1)
681: return CoderResult.OVERFLOW;
682: dst.put('\ufffd');
683: contextChar = INVALID_CHAR;
684: needFlushing = false;
685: mark++;
686: continue;
687: }
688:
689: switch (currentChar) {
690: case '\u0901':
691: case '\u0907':
692: case '\u0908':
693: case '\u090b':
694: case '\u093f':
695: case '\u0940':
696: case '\u0943':
697: case '\u0964':
698: if (needFlushing) {
699: if (dst.remaining() < 1)
700: return CoderResult.OVERFLOW;
701: dst.put(contextChar);
702: contextChar = currentChar;
703: mark++;
704: continue;
705: }
706: contextChar = currentChar;
707: needFlushing = true;
708: mark++;
709: continue;
710: case NUKTA_CHAR:
711: if (dst.remaining() < 1)
712: return CoderResult.OVERFLOW;
713: switch (contextChar) {
714: case '\u0901':
715: dst.put('\u0950');
716: break;
717: case '\u0907':
718: dst.put('\u090c');
719: break;
720: case '\u0908':
721: dst.put('\u0961');
722: break;
723: case '\u090b':
724: dst.put('\u0960');
725: break;
726: case '\u093f':
727: dst.put('\u0962');
728: break;
729: case '\u0940':
730: dst.put('\u0963');
731: break;
732: case '\u0943':
733: dst.put('\u0944');
734: break;
735: case '\u0964':
736: dst.put('\u093d');
737: break;
738: case HALANT_CHAR:
739: if (needFlushing) {
740: dst.put(contextChar);
741: contextChar = currentChar;
742: mark++;
743: continue;
744: }
745: dst.put(ZWJ_CHAR);
746: break;
747: default:
748: if (needFlushing) {
749: dst.put(contextChar);
750: contextChar = currentChar;
751: mark++;
752: continue;
753: }
754: dst.put(NUKTA_CHAR);
755: }
756: break;
757: case HALANT_CHAR:
758: if (dst.remaining() < 1)
759: return CoderResult.OVERFLOW;
760: if (needFlushing) {
761: dst.put(contextChar);
762: contextChar = currentChar;
763: mark++;
764: continue;
765: }
766: if (contextChar == HALANT_CHAR) {
767: dst.put(ZWNJ_CHAR);
768: break;
769: }
770: dst.put(HALANT_CHAR);
771: break;
772: case INVALID_CHAR:
773: if (needFlushing) {
774: if (dst.remaining() < 1)
775: return CoderResult.OVERFLOW;
776: dst.put(contextChar);
777: contextChar = currentChar;
778: mark++;
779: continue;
780: }
781: return CoderResult.unmappableForLength(1);
782: default:
783: if (dst.remaining() < 1)
784: return CoderResult.OVERFLOW;
785: if (needFlushing) {
786: dst.put(contextChar);
787: contextChar = currentChar;
788: mark++;
789: continue;
790: }
791: dst.put(currentChar);
792: break;
793: }//end switch
794: contextChar = currentChar;
795: needFlushing = false;
796: mark++;
797: }
798: return CoderResult.UNDERFLOW;
799: } finally {
800: src.position(mark);
801: }
802: }
803:
804: protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
805: if (src.hasArray() && dst.hasArray())
806: return decodeArrayLoop(src, dst);
807: else
808: return decodeBufferLoop(src, dst);
809: }
810: }
811:
812: private static class Encoder extends CharsetEncoder {
813:
814: private static final byte NO_CHAR = (byte) 255;
815:
816: //private static CharToByteISCII91 c2b = new CharToByteISCII91();
817: //private static final byte[] directMapTable = c2b.getISCIIEncoderMap();
818:
819: private final Surrogate.Parser sgp = new Surrogate.Parser();
820:
821: private Encoder(Charset cs) {
822: super (cs, 2.0f, 2.0f);
823: }
824:
825: public boolean canEncode(char ch) {
826: //check for Devanagari range,ZWJ,ZWNJ and ASCII range.
827: return ((ch >= '\u0900' && ch <= '\u097f' && encoderMappingTable[2 * (ch - '\u0900')] != NO_CHAR)
828: || (ch == '\u200d') || (ch == '\u200c') || (ch <= '\u007f'));
829: }
830:
831: private CoderResult encodeArrayLoop(CharBuffer src,
832: ByteBuffer dst) {
833: char[] sa = src.array();
834: int sp = src.arrayOffset() + src.position();
835: int sl = src.arrayOffset() + src.limit();
836: assert (sp <= sl);
837: sp = (sp <= sl ? sp : sl);
838: byte[] da = dst.array();
839: int dp = dst.arrayOffset() + dst.position();
840: int dl = dst.arrayOffset() + dst.limit();
841: assert (dp <= dl);
842: dp = (dp <= dl ? dp : dl);
843:
844: int outputSize = 0;
845:
846: try {
847: char inputChar;
848: while (sp < sl) {
849: int index = Integer.MIN_VALUE;
850: inputChar = sa[sp];
851:
852: if (inputChar >= 0x0000 && inputChar <= 0x007f) {
853: if (dl - dp < 1)
854: return CoderResult.OVERFLOW;
855: da[dp++] = (byte) inputChar;
856: sp++;
857: continue;
858: }
859:
860: // if inputChar == ZWJ replace it with halant
861: // if inputChar == ZWNJ replace it with Nukta
862:
863: if (inputChar == 0x200c) {
864: inputChar = HALANT_CHAR;
865: } else if (inputChar == 0x200d) {
866: inputChar = NUKTA_CHAR;
867: }
868:
869: if (inputChar >= 0x0900 && inputChar <= 0x097f) {
870: index = ((int) (inputChar) - 0x0900) * 2;
871: }
872:
873: if (Surrogate.is(inputChar)) {
874: if (sgp.parse(inputChar, sa, sp, sl) < 0)
875: return sgp.error();
876: return sgp.unmappableResult();
877: }
878:
879: if (index == Integer.MIN_VALUE
880: || encoderMappingTable[index] == NO_CHAR) {
881: return CoderResult.unmappableForLength(1);
882: } else {
883: if (encoderMappingTable[index + 1] == NO_CHAR) {
884: if (dl - dp < 1)
885: return CoderResult.OVERFLOW;
886: da[dp++] = encoderMappingTable[index];
887: } else {
888: if (dl - dp < 2)
889: return CoderResult.OVERFLOW;
890: da[dp++] = encoderMappingTable[index];
891: da[dp++] = encoderMappingTable[index + 1];
892: }
893: sp++;
894: }
895: }
896: return CoderResult.UNDERFLOW;
897: } finally {
898: src.position(sp - src.arrayOffset());
899: dst.position(dp - dst.arrayOffset());
900: }
901: }
902:
903: private CoderResult encodeBufferLoop(CharBuffer src,
904: ByteBuffer dst) {
905: int mark = src.position();
906:
907: try {
908: char inputChar;
909: while (src.hasRemaining()) {
910: int index = Integer.MIN_VALUE;
911: inputChar = src.get();
912:
913: if (inputChar >= 0x0000 && inputChar <= 0x007f) {
914: if (dst.remaining() < 1)
915: return CoderResult.OVERFLOW;
916: dst.put((byte) inputChar);
917: mark++;
918: continue;
919: }
920:
921: // if inputChar == ZWJ replace it with halant
922: // if inputChar == ZWNJ replace it with Nukta
923:
924: if (inputChar == 0x200c) {
925: inputChar = HALANT_CHAR;
926: } else if (inputChar == 0x200d) {
927: inputChar = NUKTA_CHAR;
928: }
929:
930: if (inputChar >= 0x0900 && inputChar <= 0x097f) {
931: index = ((int) (inputChar) - 0x0900) * 2;
932: }
933:
934: if (Surrogate.is(inputChar)) {
935: if (sgp.parse(inputChar, src) < 0)
936: return sgp.error();
937: return sgp.unmappableResult();
938: }
939:
940: if (index == Integer.MIN_VALUE
941: || encoderMappingTable[index] == NO_CHAR) {
942: return CoderResult.unmappableForLength(1);
943: } else {
944: if (encoderMappingTable[index + 1] == NO_CHAR) {
945: if (dst.remaining() < 1)
946: return CoderResult.OVERFLOW;
947: dst.put(encoderMappingTable[index]);
948: } else {
949: if (dst.remaining() < 2)
950: return CoderResult.OVERFLOW;
951: dst.put(encoderMappingTable[index]);
952: dst.put(encoderMappingTable[index + 1]);
953: }
954: }
955: mark++;
956: }
957: return CoderResult.UNDERFLOW;
958: } finally {
959: src.position(mark);
960: }
961: }
962:
963: protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
964: if (src.hasArray() && dst.hasArray())
965: return encodeArrayLoop(src, dst);
966: else
967: return encodeBufferLoop(src, dst);
968: }
969: }
970: }
|