0001: /**
0002: *******************************************************************************
0003: * Copyright (C) 2006, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: *
0007: *******************************************************************************
0008: */package com.ibm.icu.dev.test.charset;
0009:
0010: import java.nio.ByteBuffer;
0011: import java.nio.CharBuffer;
0012: import java.nio.charset.CharacterCodingException;
0013: import java.nio.charset.Charset;
0014: import java.nio.charset.CharsetDecoder;
0015: import java.nio.charset.CharsetEncoder;
0016: import java.nio.charset.CoderResult;
0017: import java.nio.charset.CodingErrorAction;
0018: import java.nio.charset.UnsupportedCharsetException;
0019: import java.nio.charset.spi.CharsetProvider;
0020: import java.util.Iterator;
0021: import java.util.MissingResourceException;
0022: import java.util.Set;
0023: import java.util.SortedMap;
0024:
0025: import com.ibm.icu.charset.*;
0026: import com.ibm.icu.dev.test.TestFmwk;
0027: import com.ibm.icu.text.UTF16;
0028:
0029: public class TestCharset extends TestFmwk {
0030: private String encoding = "UTF-16";
0031: CharsetDecoder decoder = null;
0032: CharsetEncoder encoder = null;
0033: Charset charset = null;
0034: static final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n";
0035: static final byte[] byteStr = { (byte) 0x00, (byte) 'a',
0036: (byte) 0x00, (byte) 'b', (byte) 0x00, (byte) 'c',
0037: (byte) 0x00, (byte) 'd', (byte) 0xd8, (byte) 0x00,
0038: (byte) 0xdc, (byte) 0x00, (byte) 0x12, (byte) 0x34,
0039: (byte) 0x00, (byte) 0xa5, (byte) 0x30, (byte) 0x00,
0040: (byte) 0x00, (byte) 0x0d, (byte) 0x00, (byte) 0x0a };
0041: static final byte[] expectedByteStr = { (byte) 0xFE, (byte) 0xFF,
0042: (byte) 0x00, (byte) 'a', (byte) 0x00, (byte) 'b',
0043: (byte) 0x00, (byte) 'c', (byte) 0x00, (byte) 'd',
0044: (byte) 0xd8, (byte) 0x00, (byte) 0xdc, (byte) 0x00,
0045: (byte) 0x12, (byte) 0x34, (byte) 0x00, (byte) 0xa5,
0046: (byte) 0x30, (byte) 0x00, (byte) 0x00, (byte) 0x0d,
0047: (byte) 0x00, (byte) 0x0a };
0048:
0049: protected void init() {
0050: try {
0051: CharsetProviderICU provider = new CharsetProviderICU();
0052: //Charset charset = CharsetICU.forName(encoding);
0053: charset = provider.charsetForName(encoding);
0054: decoder = (CharsetDecoder) charset.newDecoder();
0055: encoder = (CharsetEncoder) charset.newEncoder();
0056: } catch (MissingResourceException ex) {
0057: warnln("Could not load charset data");
0058: }
0059: }
0060:
0061: public static void main(String[] args) throws Exception {
0062: new TestCharset().run(args);
0063: }
0064:
0065: public void TestUTF16Converter() {
0066: CharsetProvider icu = new CharsetProviderICU();
0067: Charset cs1 = icu.charsetForName("UTF-16");
0068: CharsetEncoder e1 = cs1.newEncoder();
0069: CharsetDecoder d1 = cs1.newDecoder();
0070:
0071: Charset cs2 = icu.charsetForName("UTF-16LE");
0072: CharsetEncoder e2 = cs2.newEncoder();
0073: CharsetDecoder d2 = cs2.newDecoder();
0074:
0075: for (int i = 0x0000; i < 0x10FFFF; i += 0xFF) {
0076: CharBuffer us = CharBuffer.allocate(0xFF * 2);
0077: ByteBuffer bs1 = ByteBuffer.allocate(0xFF * 8);
0078: ByteBuffer bs2 = ByteBuffer.allocate(0xFF * 8);
0079: for (int j = 0; j < 0xFF; j++) {
0080: int c = i + j;
0081:
0082: if ((c >= 0xd800 && c <= 0xdFFF) || c > 0x10FFFF) {
0083: continue;
0084: }
0085:
0086: if (c > 0xFFFF) {
0087: char lead = UTF16.getLeadSurrogate(c);
0088: char trail = UTF16.getTrailSurrogate(c);
0089: if (!UTF16.isLeadSurrogate(lead)) {
0090: errln("lead is not lead!" + lead
0091: + " for cp: \\U"
0092: + Integer.toHexString(c));
0093: continue;
0094: }
0095: if (!UTF16.isTrailSurrogate(trail)) {
0096: errln("trail is not trail!" + trail);
0097: continue;
0098: }
0099: us.put(lead);
0100: us.put(trail);
0101: bs1.put((byte) (lead >> 8));
0102: bs1.put((byte) (lead & 0xFF));
0103: bs1.put((byte) (trail >> 8));
0104: bs1.put((byte) (trail & 0xFF));
0105:
0106: bs2.put((byte) (lead & 0xFF));
0107: bs2.put((byte) (lead >> 8));
0108: bs2.put((byte) (trail & 0xFF));
0109: bs2.put((byte) (trail >> 8));
0110: } else {
0111:
0112: if (c < 0xFF) {
0113: bs1.put((byte) 0x00);
0114: bs1.put((byte) (c));
0115: bs2.put((byte) (c));
0116: bs2.put((byte) 0x00);
0117: } else {
0118: bs1.put((byte) (c >> 8));
0119: bs1.put((byte) (c & 0xFF));
0120:
0121: bs2.put((byte) (c & 0xFF));
0122: bs2.put((byte) (c >> 8));
0123: }
0124: us.put((char) c);
0125: }
0126: }
0127:
0128: us.limit(us.position());
0129: us.position(0);
0130: if (us.length() == 0) {
0131: continue;
0132: }
0133:
0134: bs1.limit(bs1.position());
0135: bs1.position(0);
0136: ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
0137: newBS.put((byte) 0xFE);
0138: newBS.put((byte) 0xFF);
0139: newBS.put(bs1);
0140: bs1.position(0);
0141: smBufDecode(d1, "UTF-16", bs1, us);
0142: smBufEncode(e1, "UTF-16", us, newBS);
0143:
0144: bs2.limit(bs2.position());
0145: bs2.position(0);
0146: newBS.clear();
0147: newBS.put((byte) 0xFF);
0148: newBS.put((byte) 0xFE);
0149: newBS.put(bs2);
0150: bs2.position(0);
0151: smBufDecode(d2, "UTF16-LE", bs2, us);
0152: smBufEncode(e2, "UTF-16LE", us, newBS);
0153:
0154: }
0155:
0156: }
0157:
0158: public void TestUTF32Converter() {
0159: CharsetProvider icu = new CharsetProviderICU();
0160: Charset cs1 = icu.charsetForName("UTF-32");
0161: CharsetEncoder e1 = cs1.newEncoder();
0162: CharsetDecoder d1 = cs1.newDecoder();
0163:
0164: Charset cs2 = icu.charsetForName("UTF-32LE");
0165: CharsetEncoder e2 = cs2.newEncoder();
0166: CharsetDecoder d2 = cs2.newDecoder();
0167:
0168: for (int i = 0x1d827; i < 0x10FFFF; i += 0xFF) {
0169: CharBuffer us = CharBuffer.allocate(0xFF * 2);
0170: ByteBuffer bs1 = ByteBuffer.allocate(0xFF * 8);
0171: ByteBuffer bs2 = ByteBuffer.allocate(0xFF * 8);
0172: for (int j = 0; j < 0xFF; j++) {
0173: int c = i + j;
0174:
0175: if ((c >= 0xd800 && c <= 0xdFFF) || c > 0x10FFFF) {
0176: continue;
0177: }
0178:
0179: if (c > 0xFFFF) {
0180: char lead = UTF16.getLeadSurrogate(c);
0181: char trail = UTF16.getTrailSurrogate(c);
0182:
0183: us.put(lead);
0184: us.put(trail);
0185: } else {
0186: us.put((char) c);
0187: }
0188: bs1.put((byte) (c >>> 24));
0189: bs1.put((byte) (c >>> 16));
0190: bs1.put((byte) (c >>> 8));
0191: bs1.put((byte) (c & 0xFF));
0192:
0193: bs2.put((byte) (c & 0xFF));
0194: bs2.put((byte) (c >>> 8));
0195: bs2.put((byte) (c >>> 16));
0196: bs2.put((byte) (c >>> 24));
0197: }
0198: bs1.limit(bs1.position());
0199: bs1.position(0);
0200: bs2.limit(bs2.position());
0201: bs2.position(0);
0202: us.limit(us.position());
0203: us.position(0);
0204: if (us.length() == 0) {
0205: continue;
0206: }
0207:
0208: ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
0209:
0210: newBS.put((byte) 0x00);
0211: newBS.put((byte) 0x00);
0212: newBS.put((byte) 0xFE);
0213: newBS.put((byte) 0xFF);
0214: newBS.put(bs1);
0215: bs1.position(0);
0216: smBufDecode(d1, "UTF-32", bs1, us);
0217: smBufEncode(e1, "UTF-32", us, newBS);
0218:
0219: newBS.clear();
0220: newBS.put((byte) 0xFF);
0221: newBS.put((byte) 0xFE);
0222: newBS.put((byte) 0x00);
0223: newBS.put((byte) 0x00);
0224: newBS.put(bs2);
0225: bs2.position(0);
0226: smBufDecode(d2, "UTF-32LE", bs2, us);
0227: smBufEncode(e2, "UTF-32LE", us, newBS);
0228: }
0229:
0230: }
0231:
0232: public void TestASCIIConverter() {
0233: CharsetProvider icu = new CharsetProviderICU();
0234: Charset icuChar = icu.charsetForName("ASCII");
0235: CharsetEncoder encoder = icuChar.newEncoder();
0236: CharsetDecoder decoder = icuChar.newDecoder();
0237:
0238: CharBuffer us = CharBuffer.allocate(0x90);
0239: ByteBuffer bs = ByteBuffer.allocate(0x90);
0240: for (int j = 0; j <= 0x7f; j++) {
0241: us.put((char) j);
0242: bs.put((byte) j);
0243: }
0244: bs.limit(bs.position());
0245: bs.position(0);
0246: us.limit(us.position());
0247: us.position(0);
0248: smBufDecode(decoder, "ASCII", bs, us);
0249: smBufEncode(encoder, "ASCII", us, bs);
0250:
0251: }
0252:
0253: public void Test88591Converter() {
0254: CharsetProvider icu = new CharsetProviderICU();
0255: Charset icuChar = icu.charsetForName("iso-8859-1");
0256: CharsetEncoder encoder = icuChar.newEncoder();
0257: CharsetDecoder decoder = icuChar.newDecoder();
0258:
0259: CharBuffer us = CharBuffer.allocate(0x100);
0260: ByteBuffer bs = ByteBuffer.allocate(0x100);
0261: for (int j = 0; j <= 0xFf; j++) {
0262: us.put((char) j);
0263: bs.put((byte) j);
0264: }
0265: bs.limit(bs.position());
0266: bs.position(0);
0267: us.limit(us.position());
0268: us.position(0);
0269: smBufDecode(decoder, "iso-8859-1", bs, us);
0270: smBufEncode(encoder, "iso-8859-1", us, bs);
0271:
0272: }
0273:
0274: public void TestAPISemantics(/*String encoding*/) throws Exception {
0275: int rc;
0276: ByteBuffer byes = ByteBuffer.wrap(byteStr);
0277: CharBuffer uniVal = CharBuffer.wrap(unistr);
0278: ByteBuffer expected = ByteBuffer.wrap(expectedByteStr);
0279:
0280: rc = 0;
0281: if (decoder == null) {
0282: warnln("Could not load decoder.");
0283: return;
0284: }
0285: decoder.reset();
0286: /* Convert the whole buffer to Unicode */
0287: try {
0288: CharBuffer chars = CharBuffer.allocate(unistr.length());
0289: CoderResult result = decoder.decode(byes, chars, false);
0290:
0291: if (result.isError()) {
0292: errln("ToChars encountered Error");
0293: rc = 1;
0294: }
0295: if (result.isOverflow()) {
0296: errln("ToChars encountered overflow exception");
0297: rc = 1;
0298: }
0299: if (!equals(chars, unistr)) {
0300: errln("ToChars does not match");
0301: printchars(chars);
0302: errln("Expected : ");
0303: printchars(unistr);
0304: rc = 2;
0305: }
0306:
0307: } catch (Exception e) {
0308: errln("ToChars - exception in buffer");
0309: rc = 5;
0310: }
0311:
0312: /* Convert single bytes to Unicode */
0313: try {
0314: CharBuffer chars = CharBuffer.allocate(unistr.length());
0315: ByteBuffer b = ByteBuffer.wrap(byteStr);
0316: decoder.reset();
0317: CoderResult result = null;
0318: for (int i = 1; i <= byteStr.length; i++) {
0319: b.limit(i);
0320: result = decoder.decode(b, chars, false);
0321: if (result.isOverflow()) {
0322: errln("ToChars single threw an overflow exception");
0323: }
0324: if (result.isError()) {
0325: errln("ToChars single the result is an error "
0326: + result.toString());
0327: }
0328: }
0329: if (unistr.length() != (chars.limit())) {
0330: errln("ToChars single len does not match");
0331: rc = 3;
0332: }
0333: if (!equals(chars, unistr)) {
0334: errln("ToChars single does not match");
0335: printchars(chars);
0336: rc = 4;
0337: }
0338: } catch (Exception e) {
0339: errln("ToChars - exception in single");
0340: //e.printStackTrace();
0341: rc = 6;
0342: }
0343:
0344: /* Convert the buffer one at a time to Unicode */
0345: try {
0346: CharBuffer chars = CharBuffer.allocate(unistr.length());
0347: decoder.reset();
0348: byes.rewind();
0349: for (int i = 1; i <= byteStr.length; i++) {
0350: byes.limit(i);
0351: CoderResult result = decoder.decode(byes, chars, false);
0352: if (result.isError()) {
0353: errln("Error while decoding: " + result.toString());
0354: }
0355: if (result.isOverflow()) {
0356: errln("ToChars Simple threw an overflow exception");
0357: }
0358: }
0359: if (chars.limit() != unistr.length()) {
0360: errln("ToChars Simple buffer len does not match");
0361: rc = 7;
0362: }
0363: if (!equals(chars, unistr)) {
0364: errln("ToChars Simple buffer does not match");
0365: printchars(chars);
0366: err(" Expected : ");
0367: printchars(unistr);
0368: rc = 8;
0369: }
0370: } catch (Exception e) {
0371: errln("ToChars - exception in single buffer");
0372: //e.printStackTrace(System.err);
0373: rc = 9;
0374: }
0375: if (rc != 0) {
0376: errln("Test Simple ToChars for encoding : FAILED");
0377: }
0378:
0379: rc = 0;
0380: /* Convert the whole buffer from unicode */
0381: try {
0382: ByteBuffer bytes = ByteBuffer
0383: .allocate(expectedByteStr.length);
0384: encoder.reset();
0385: CoderResult result = encoder.encode(uniVal, bytes, false);
0386: if (result.isError()) {
0387: errln("FromChars reported error: " + result.toString());
0388: rc = 1;
0389: }
0390: if (result.isOverflow()) {
0391: errln("FromChars threw an overflow exception");
0392: }
0393: bytes.position(0);
0394: if (!bytes.equals(expected)) {
0395: errln("FromChars does not match");
0396: printbytes(bytes);
0397: rc = 2;
0398: }
0399: } catch (Exception e) {
0400: errln("FromChars - exception in buffer");
0401: //e.printStackTrace(System.err);
0402: rc = 5;
0403: }
0404:
0405: /* Convert the buffer one char at a time to unicode */
0406: try {
0407: ByteBuffer bytes = ByteBuffer
0408: .allocate(expectedByteStr.length);
0409: CharBuffer c = CharBuffer.wrap(unistr);
0410: encoder.reset();
0411: CoderResult result = null;
0412: for (int i = 1; i <= unistr.length(); i++) {
0413: c.limit(i);
0414: result = encoder.encode(c, bytes, false);
0415: if (result.isOverflow()) {
0416: errln("FromChars single threw an overflow exception");
0417: }
0418: if (result.isError()) {
0419: errln("FromChars single threw an error: "
0420: + result.toString());
0421: }
0422: }
0423: if (expectedByteStr.length != bytes.limit()) {
0424: errln("FromChars single len does not match");
0425: rc = 3;
0426: }
0427:
0428: bytes.position(0);
0429: if (!bytes.equals(expected)) {
0430: errln("FromChars single does not match");
0431: printbytes(bytes);
0432: rc = 4;
0433: }
0434:
0435: } catch (Exception e) {
0436: errln("FromChars - exception in single");
0437: //e.printStackTrace(System.err);
0438: rc = 6;
0439: }
0440:
0441: /* Convert one char at a time to unicode */
0442: try {
0443: ByteBuffer bytes = ByteBuffer
0444: .allocate(expectedByteStr.length);
0445: encoder.reset();
0446: char[] temp = unistr.toCharArray();
0447: CoderResult result = null;
0448: for (int i = 0; i <= temp.length; i++) {
0449: uniVal.limit(i);
0450: result = encoder.encode(uniVal, bytes, false);
0451: if (result.isOverflow()) {
0452: errln("FromChars simple threw an overflow exception");
0453: }
0454: if (result.isError()) {
0455: errln("FromChars simple threw an error: "
0456: + result.toString());
0457: }
0458: }
0459: if (bytes.limit() != expectedByteStr.length) {
0460: errln("FromChars Simple len does not match");
0461: rc = 7;
0462: }
0463: if (!bytes.equals(byes)) {
0464: errln("FromChars Simple does not match");
0465: printbytes(bytes);
0466: rc = 8;
0467: }
0468: } catch (Exception e) {
0469: errln("FromChars - exception in single buffer");
0470: //e.printStackTrace(System.err);
0471: rc = 9;
0472: }
0473: if (rc != 0) {
0474: errln("Test Simple FromChars " + encoding + " --FAILED");
0475: }
0476: }
0477:
0478: void printchars(CharBuffer buf) {
0479: int i;
0480: char[] chars = new char[buf.limit()];
0481: //save the current position
0482: int pos = buf.position();
0483: buf.position(0);
0484: buf.get(chars);
0485: //reset to old position
0486: buf.position(pos);
0487: for (i = 0; i < chars.length; i++) {
0488: err(hex(chars[i]) + " ");
0489: }
0490: errln("");
0491: }
0492:
0493: void printchars(String str) {
0494: char[] chars = str.toCharArray();
0495: for (int i = 0; i < chars.length; i++) {
0496: err(hex(chars[i]) + " ");
0497: }
0498: errln("");
0499: }
0500:
0501: void printbytes(ByteBuffer buf) {
0502: int i;
0503: byte[] bytes = new byte[buf.limit()];
0504: //save the current position
0505: int pos = buf.position();
0506: buf.position(0);
0507: buf.get(bytes);
0508: //reset to old position
0509: buf.position(pos);
0510: for (i = 0; i < bytes.length; i++) {
0511: System.out.print(hex(bytes[i]) + " ");
0512: }
0513: errln("");
0514: }
0515:
0516: public boolean equals(CharBuffer buf, String str) {
0517: return equals(buf, str.toCharArray());
0518: }
0519:
0520: public boolean equals(CharBuffer buf, CharBuffer str) {
0521: return equals(buf.array(), str.array());
0522: }
0523:
0524: public boolean equals(CharBuffer buf, char[] compareTo) {
0525: char[] chars = new char[buf.limit()];
0526: //save the current position
0527: int pos = buf.position();
0528: buf.position(0);
0529: buf.get(chars);
0530: //reset to old position
0531: buf.position(pos);
0532: return equals(chars, compareTo);
0533: }
0534:
0535: public boolean equals(char[] chars, char[] compareTo) {
0536: if (chars.length != compareTo.length) {
0537: errln("Length does not match chars: " + chars.length
0538: + " compareTo: " + compareTo.length);
0539: return false;
0540: } else {
0541: boolean result = true;
0542: for (int i = 0; i < chars.length; i++) {
0543: if (chars[i] != compareTo[i]) {
0544: logln("Got: " + hex(chars[i]) + " Expected: "
0545: + hex(compareTo[i]) + " At: " + i);
0546: result = false;
0547: }
0548: }
0549: return result;
0550: }
0551: }
0552:
0553: public boolean equals(ByteBuffer buf, byte[] compareTo) {
0554: byte[] chars = new byte[buf.limit()];
0555: //save the current position
0556: int pos = buf.position();
0557: buf.position(0);
0558: buf.get(chars);
0559: //reset to old position
0560: buf.position(pos);
0561: return equals(chars, compareTo);
0562: }
0563:
0564: public boolean equals(ByteBuffer buf, ByteBuffer compareTo) {
0565: return equals(buf.array(), compareTo.array());
0566: }
0567:
0568: public boolean equals(byte[] chars, byte[] compareTo) {
0569: if (chars.length != compareTo.length) {
0570: errln("Length does not match chars: " + chars.length
0571: + " compareTo: " + compareTo.length);
0572: return false;
0573: } else {
0574: boolean result = true;
0575: for (int i = 0; i < chars.length; i++) {
0576: if (chars[i] != compareTo[i]) {
0577: logln("Got: " + hex(chars[i]) + " Expected: "
0578: + hex(compareTo[i]) + " At: " + i);
0579: result = false;
0580: }
0581: }
0582: return result;
0583: }
0584: }
0585:
0586: // TODO
0587: /*
0588: public void TestCallback(String encoding) throws Exception {
0589:
0590: byte[] gbSource =
0591: {
0592: (byte) 0x81,
0593: (byte) 0x36,
0594: (byte) 0xDE,
0595: (byte) 0x36,
0596: (byte) 0x81,
0597: (byte) 0x36,
0598: (byte) 0xDE,
0599: (byte) 0x37,
0600: (byte) 0x81,
0601: (byte) 0x36,
0602: (byte) 0xDE,
0603: (byte) 0x38,
0604: (byte) 0xe3,
0605: (byte) 0x32,
0606: (byte) 0x9a,
0607: (byte) 0x36 };
0608:
0609: char[] subChars = { 'P', 'I' };
0610:
0611: decoder.reset();
0612:
0613: decoder.replaceWith(new String(subChars));
0614: ByteBuffer mySource = ByteBuffer.wrap(gbSource);
0615: CharBuffer myTarget = CharBuffer.allocate(5);
0616:
0617: decoder.decode(mySource, myTarget, true);
0618: char[] expectedResult =
0619: { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', };
0620:
0621: if (!equals(myTarget, new String(expectedResult))) {
0622: errln("Test callback GB18030 to Unicode : FAILED");
0623: }
0624:
0625: }
0626: */
0627: public void TestCanConvert(/*String encoding*/) throws Exception {
0628: char[] mySource = { '\ud800', '\udc00',/*surrogate pair */
0629: '\u22A6', '\u22A7', '\u22A8', '\u22A9', '\u22AA', '\u22AB',
0630: '\u22AC', '\u22AD', '\u22AE', '\u22AF', '\u22B0',
0631: '\u22B1', '\u22B2', '\u22B3', '\u22B4', '\ud800',
0632: '\udc00',/*surrogate pair */
0633: '\u22B5', '\u22B6', '\u22B7', '\u22B8', '\u22B9',
0634: '\u22BA', '\u22BB', '\u22BC', '\u22BD', '\u22BE' };
0635: if (encoder == null) {
0636: warnln("Could not load encoder.");
0637: return;
0638: }
0639: encoder.reset();
0640: if (!encoder.canEncode(new String(mySource))) {
0641: errln("Test canConvert() " + encoding + " failed. "
0642: + encoder);
0643: }
0644:
0645: }
0646:
0647: public void TestAvailableCharsets() {
0648: SortedMap map = Charset.availableCharsets();
0649: Set keySet = map.keySet();
0650: Iterator iter = keySet.iterator();
0651: while (iter.hasNext()) {
0652: logln("Charset name: " + iter.next().toString());
0653: }
0654: Object[] charsets = CharsetProviderICU.getAvailableNames();
0655: int mapSize = map.size();
0656: if (mapSize < charsets.length) {
0657: errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: "
0658: + charsets.length + " JDK: " + mapSize);
0659: }
0660: logln("Total Number of chasets = " + map.size());
0661: }
0662:
0663: public void TestWindows936() {
0664: CharsetProviderICU icu = new CharsetProviderICU();
0665: Charset cs = icu.charsetForName("windows-936-2000");
0666: String canonicalName = cs.name();
0667: if (!canonicalName.equals("GBK")) {
0668: errln("Did not get the expected canonical name. Got: "
0669: + canonicalName); //get the canonical name
0670: }
0671: }
0672:
0673: public void TestICUAvailableCharsets() {
0674: CharsetProviderICU icu = new CharsetProviderICU();
0675: Object[] charsets = CharsetProviderICU.getAvailableNames();
0676: for (int i = 0; i < charsets.length; i++) {
0677: Charset cs = icu.charsetForName((String) charsets[i]);
0678: try {
0679: CharsetEncoder encoder = cs.newEncoder();
0680: if (encoder != null) {
0681: logln("Creation of encoder succeeded. "
0682: + cs.toString());
0683: }
0684: } catch (Exception ex) {
0685: errln("Could not instantiate encoder for "
0686: + charsets[i] + ". Error: " + ex.toString());
0687: }
0688: try {
0689: CharsetDecoder decoder = cs.newDecoder();
0690: if (decoder != null) {
0691: logln("Creation of decoder succeeded. "
0692: + cs.toString());
0693: }
0694: } catch (Exception ex) {
0695: errln("Could not instantiate decoder for "
0696: + charsets[i] + ". Error: " + ex.toString());
0697: }
0698: }
0699: }
0700:
0701: /* jitterbug 4312 */
0702: public void TestUnsupportedCharset() {
0703: CharsetProvider icu = new CharsetProviderICU();
0704: Charset icuChar = icu.charsetForName("impossible");
0705: if (icuChar != null) {
0706: errln("ICU does not conform to the spec");
0707: }
0708: }
0709:
0710: public void TestEncoderCreation() {
0711: try {
0712: Charset cs = Charset.forName("GB_2312-80");
0713: CharsetEncoder enc = cs.newEncoder();
0714: if (enc != null && (enc instanceof CharsetEncoderICU)) {
0715: logln("Successfully created the encoder: " + enc);
0716: } else {
0717: errln("Error creating charset encoder.");
0718: }
0719: } catch (Exception e) {
0720: warnln("Error creating charset encoder." + e.toString());
0721: // e.printStackTrace();
0722: }
0723: try {
0724: Charset cs = Charset.forName("x-ibm-971_P100-1995");
0725: CharsetEncoder enc = cs.newEncoder();
0726: if (enc != null && (enc instanceof CharsetEncoderICU)) {
0727: logln("Successfully created the encoder: " + enc);
0728: } else {
0729: errln("Error creating charset encoder.");
0730: }
0731: } catch (Exception e) {
0732: warnln("Error creating charset encoder." + e.toString());
0733: }
0734: }
0735:
0736: public void TestSubBytes() {
0737: try {
0738: //create utf-8 decoder
0739: CharsetDecoder decoder = new CharsetProviderICU()
0740: .charsetForName("utf-8").newDecoder();
0741:
0742: //create a valid byte array, which can be decoded to " buffer"
0743: byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075,
0744: 0x0066, 0x0066, 0x0065, 0x0072 };
0745:
0746: ByteBuffer buffer = ByteBuffer.allocate(20);
0747:
0748: //add a evil byte to make the byte buffer be malformed input
0749: buffer.put((byte) 0xd8);
0750:
0751: //put the valid byte array
0752: buffer.put(unibytes);
0753:
0754: //reset postion
0755: buffer.flip();
0756:
0757: decoder.onMalformedInput(CodingErrorAction.REPLACE);
0758: CharBuffer out = decoder.decode(buffer);
0759: String expected = "\ufffd buffer";
0760: if (!expected.equals(new String(out.array()))) {
0761: errln("Did not get the expected result for substitution chars. Got: "
0762: + new String(out.array())
0763: + "("
0764: + hex(out.array()) + ")");
0765: }
0766: logln("Output: " + new String(out.array()) + "("
0767: + hex(out.array()) + ")");
0768: } catch (CharacterCodingException ex) {
0769: errln("Unexpected exception: " + ex.toString());
0770: }
0771: }
0772:
0773: /*
0774: public void TestImplFlushFailure(){
0775:
0776: try{
0777: CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F");
0778: CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder();
0779: ByteBuffer out = ByteBuffer.allocate(30);
0780: encoder.encode(in, out, true);
0781: encoder.flush(out);
0782: if(out.position()!= 20){
0783: errln("Did not get the expected position from flush");
0784: }
0785:
0786: }catch (Exception ex){
0787: errln("Could not create encoder for iso-2022-jp exception: "+ex.toString());
0788: }
0789: }
0790: */
0791: public void TestISO88591() {
0792:
0793: Charset cs = new CharsetProviderICU()
0794: .charsetForName("iso-8859-1");
0795: if (cs != null) {
0796: CharsetEncoder encoder = cs.newEncoder();
0797: if (encoder != null) {
0798: encoder.canEncode("\uc2a3");
0799: } else {
0800: errln("Could not create encoder for iso-8859-1");
0801: }
0802: } else {
0803: errln("Could not create Charset for iso-8859-1");
0804: }
0805:
0806: }
0807:
0808: public void TestUTF8Encode() {
0809: CharsetEncoder encoderICU = new CharsetProviderICU()
0810: .charsetForName("utf-8").newEncoder();
0811: ByteBuffer out = ByteBuffer.allocate(30);
0812: CoderResult result = encoderICU.encode(CharBuffer
0813: .wrap("\ud800"), out, true);
0814:
0815: if (result.isMalformed()) {
0816: logln("\\ud800 is malformed for ICU4JNI utf-8 encoder");
0817: } else if (result.isUnderflow()) {
0818: errln("\\ud800 is OK for ICU4JNI utf-8 encoder");
0819: }
0820:
0821: CharsetEncoder encoderJDK = Charset.forName("utf-8")
0822: .newEncoder();
0823: result = encoderJDK.encode(CharBuffer.wrap("\ud800"),
0824: ByteBuffer.allocate(10), true);
0825: if (result.isUnderflow()) {
0826: errln("\\ud800 is OK for JDK utf-8 encoder");
0827: } else if (result.isMalformed()) {
0828: logln("\\ud800 is malformed for JDK utf-8 encoder");
0829: }
0830: }
0831:
0832: private void printCB(CharBuffer buf) {
0833: buf.rewind();
0834: while (buf.hasRemaining()) {
0835: System.out.println(hex(buf.get()));
0836: }
0837: buf.rewind();
0838: }
0839:
0840: /*
0841: public void TestUTF8() throws CharacterCodingException{
0842: try{
0843: CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder();
0844: encoderICU.encode(CharBuffer.wrap("\ud800"));
0845: errln("\\ud800 is OK for ICU4JNI utf-8 encoder");
0846: }catch (MalformedInputException e) {
0847: logln("\\ud800 is malformed for JDK utf-8 encoder");
0848: //e.printStackTrace();
0849: }
0850:
0851: CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder();
0852: try {
0853: encoderJDK.encode(CharBuffer.wrap("\ud800"));
0854: errln("\\ud800 is OK for JDK utf-8 encoder");
0855: } catch (MalformedInputException e) {
0856: logln("\\ud800 is malformed for JDK utf-8 encoder");
0857: //e.printStackTrace();
0858: }
0859: }
0860: */
0861: public void TestUTF16Bom() {
0862:
0863: Charset cs = (new CharsetProviderICU())
0864: .charsetForName("UTF-16");
0865: char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433,
0866: 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 };
0867: CharBuffer inBuf = CharBuffer.allocate(in.length);
0868: inBuf.put(in);
0869: CharsetEncoder encoder = cs.newEncoder();
0870: ByteBuffer outBuf = ByteBuffer.allocate(in.length * 2);
0871: inBuf.rewind();
0872: encoder.encode(inBuf, outBuf, true);
0873: outBuf.rewind();
0874: if (outBuf.remaining() > in.length * 2) {
0875: errln("The UTF16 encoder appended bom. Length returned: "
0876: + outBuf.remaining());
0877: }
0878: while (outBuf.hasRemaining()) {
0879: logln("0x" + hex(outBuf.get()));
0880: }
0881: CharsetDecoder decoder = cs.newDecoder();
0882: outBuf.rewind();
0883: CharBuffer rt = CharBuffer.allocate(in.length);
0884: decoder.decode(outBuf, rt, true);
0885: }
0886:
0887: private void smBufDecode(CharsetDecoder decoder, String encoding,
0888: ByteBuffer source, CharBuffer target) {
0889:
0890: ByteBuffer mySource = source.duplicate();
0891: CharBuffer myTarget = CharBuffer.allocate(target.capacity());
0892: {
0893: decoder.reset();
0894: myTarget.limit(target.limit());
0895: mySource.limit(source.limit());
0896: mySource.position(source.position());
0897: CoderResult result = CoderResult.UNDERFLOW;
0898: result = decoder.decode(mySource, myTarget, true);
0899: if (result.isError()) {
0900: errln("Test complete buffers while decoding failed. "
0901: + result.toString());
0902: return;
0903: }
0904: if (result.isOverflow()) {
0905: errln("Test complete buffers while decoding threw overflow exception");
0906: return;
0907: }
0908: myTarget.limit(myTarget.position());
0909: myTarget.position(0);
0910: target.position(0);
0911: if (result.isUnderflow() && !equals(myTarget, target)) {
0912: errln(" Test complete buffers while decoding "
0913: + encoding + " TO Unicode--failed");
0914: }
0915: }
0916: if (isQuick()) {
0917: return;
0918: }
0919: {
0920: decoder.reset();
0921: myTarget.limit(target.limit());
0922: mySource.limit(source.limit());
0923: mySource.position(source.position());
0924: myTarget.clear();
0925: myTarget.position(0);
0926:
0927: int inputLen = mySource.remaining();
0928:
0929: CoderResult result = CoderResult.UNDERFLOW;
0930: for (int i = 1; i <= inputLen; i++) {
0931: mySource.limit(i);
0932: if (i == inputLen) {
0933: result = decoder.decode(mySource, myTarget, true);
0934: } else {
0935: result = decoder.decode(mySource, myTarget, false);
0936: }
0937: if (result.isError()) {
0938: errln("Test small input buffers while decoding failed. "
0939: + result.toString());
0940: break;
0941: }
0942: if (result.isOverflow()) {
0943: errln("Test small input buffers while decoding threw overflow exception");
0944: break;
0945: }
0946:
0947: }
0948: myTarget.limit(myTarget.position());
0949: myTarget.position(0);
0950: target.position(0);
0951: if (result.isUnderflow() && !equals(myTarget, target)) {
0952: errln("Test small input buffers while decoding "
0953: + encoding + " TO Unicode--failed");
0954: }
0955: }
0956: {
0957: decoder.reset();
0958: myTarget.limit(target.limit());
0959: mySource.limit(source.limit());
0960: mySource.position(source.position());
0961: myTarget.clear();
0962: while (true) {
0963: int pos = myTarget.position();
0964: myTarget.limit(++pos);
0965: CoderResult result = decoder.decode(mySource, myTarget,
0966: false);
0967: if (result.isError()) {
0968: errln("Test small output buffers while decoding "
0969: + result.toString());
0970: }
0971: if (mySource.position() == mySource.limit()) {
0972: result = decoder.decode(mySource, myTarget, true);
0973: if (result.isError()) {
0974: errln("Test small output buffers while decoding "
0975: + result.toString());
0976: }
0977: result = decoder.flush(myTarget);
0978: if (result.isError()) {
0979: errln("Test small output buffers while decoding "
0980: + result.toString());
0981: }
0982: break;
0983: }
0984: }
0985:
0986: if (!equals(myTarget, target)) {
0987: errln("Test small output buffers " + encoding
0988: + " TO Unicode failed");
0989: }
0990: }
0991: }
0992:
0993: private void smBufEncode(CharsetEncoder encoder, String encoding,
0994: CharBuffer source, ByteBuffer target) {
0995: logln("Running smBufEncode for " + encoding + " with class "
0996: + encoder);
0997: CharBuffer mySource = source.duplicate();
0998: ByteBuffer myTarget = ByteBuffer.allocate(target.capacity());
0999: {
1000: logln("Running tests on small input buffers for "
1001: + encoding);
1002: encoder.reset();
1003: myTarget.limit(target.limit());
1004: mySource.limit(source.limit());
1005: mySource.position(source.position());
1006: CoderResult result = null;
1007:
1008: result = encoder.encode(mySource, myTarget, true);
1009:
1010: if (result.isError()) {
1011: errln("Test complete while encoding failed. "
1012: + result.toString());
1013: }
1014: if (result.isOverflow()) {
1015: errln("Test complete while encoding threw overflow exception");
1016: }
1017: if (!equals(myTarget, target)) {
1018:
1019: errln("Test complete buffers while encoding for "
1020: + encoding + " failed");
1021:
1022: } else {
1023: logln("Tests complete buffers for " + encoding
1024: + " passed");
1025: }
1026: }
1027: if (isQuick()) {
1028: return;
1029: }
1030: {
1031: logln("Running tests on small input buffers for "
1032: + encoding);
1033: encoder.reset();
1034: myTarget.clear();
1035: myTarget.limit(target.limit());
1036: mySource.limit(source.limit());
1037: mySource.position(source.position());
1038: int inputLen = mySource.limit();
1039: CoderResult result = null;
1040: for (int i = 1; i <= inputLen; i++) {
1041: mySource.limit(i);
1042: result = encoder.encode(mySource, myTarget, false);
1043: if (result.isError()) {
1044: errln("Test small input buffers while encoding failed. "
1045: + result.toString());
1046: }
1047: if (result.isOverflow()) {
1048: errln("Test small input buffers while encoding threw overflow exception");
1049: }
1050: }
1051: if (!equals(myTarget, target)) {
1052: errln("Test small input buffers " + encoding
1053: + " From Unicode failed");
1054: } else {
1055: logln("Tests on small input buffers for " + encoding
1056: + " passed");
1057: }
1058: }
1059: {
1060: logln("Running tests on small output buffers for "
1061: + encoding);
1062: encoder.reset();
1063: myTarget.clear();
1064: myTarget.limit(target.limit());
1065: mySource.limit(source.limit());
1066: mySource.position(source.position());
1067: mySource.position(0);
1068: myTarget.position(0);
1069: logln("myTarget.limit: " + myTarget.limit()
1070: + " myTarget.capcity: " + myTarget.capacity());
1071:
1072: while (true) {
1073: int pos = myTarget.position();
1074:
1075: CoderResult result = encoder.encode(mySource, myTarget,
1076: false);
1077: logln("myTarget.Position: " + pos + " myTarget.limit: "
1078: + myTarget.limit());
1079: logln("mySource.position: " + mySource.position()
1080: + " mySource.limit: " + mySource.limit());
1081:
1082: if (result.isError()) {
1083: errln("Test small output buffers while encoding "
1084: + result.toString());
1085: }
1086: if (mySource.position() == mySource.limit()) {
1087: result = encoder.encode(mySource, myTarget, true);
1088: if (result.isError()) {
1089: errln("Test small output buffers while encoding "
1090: + result.toString());
1091: }
1092:
1093: myTarget.limit(myTarget.capacity());
1094: result = encoder.flush(myTarget);
1095: if (result.isError()) {
1096: errln("Test small output buffers while encoding "
1097: + result.toString());
1098: }
1099: break;
1100: }
1101: }
1102: if (!equals(target, myTarget)) {
1103: errln("Test small output buffers " + encoding
1104: + " From Unicode failed.");
1105: }
1106: logln("Tests on small output buffers for " + encoding
1107: + " passed");
1108:
1109: }
1110: }
1111:
1112: public void convertAllTest(ByteBuffer bSource, CharBuffer uSource)
1113: throws Exception {
1114: {
1115: try {
1116: decoder.reset();
1117: ByteBuffer mySource = bSource.duplicate();
1118: CharBuffer myTarget = decoder.decode(mySource);
1119: if (!equals(myTarget, uSource)) {
1120: errln("--Test convertAll() " + encoding
1121: + " to Unicode --FAILED");
1122: }
1123: } catch (Exception e) {
1124: //e.printStackTrace();
1125: errln(e.getMessage());
1126: }
1127: }
1128: {
1129: try {
1130: encoder.reset();
1131: CharBuffer mySource = CharBuffer.wrap(uSource);
1132: ByteBuffer myTarget = encoder.encode(mySource);
1133: if (!equals(myTarget, bSource)) {
1134: errln("--Test convertAll() " + encoding
1135: + " to Unicode --FAILED");
1136: }
1137: } catch (Exception e) {
1138: //e.printStackTrace();
1139: errln("encoder.encode() failed " + e.getMessage() + " "
1140: + e.toString());
1141: }
1142: }
1143:
1144: }
1145:
1146: //TODO
1147: /*
1148: public void testString(ByteBuffer bSource, CharBuffer uSource) {
1149: try {
1150: {
1151: String source = new String(uSource);
1152: byte[] target = source.getBytes(encoding);
1153: if (!equals(target, bSource)) {
1154: errln("encode using string API failed");
1155: }
1156: }
1157: {
1158:
1159: String target = new String(getByteArray(gbSource), encoding);
1160: if (!equals(uSource, target.toCharArray())) {
1161: errln("decode using string API failed");
1162: }
1163: }
1164: } catch (Exception e) {
1165: //e.printStackTrace();
1166: errln(e.getMessage());
1167: }
1168: }
1169:
1170: private void fromUnicodeTest() throws Exception {
1171:
1172: logln("Loaded Charset: " + charset.getClass().toString());
1173: logln("Loaded CharsetEncoder: " + encoder.getClass().toString());
1174: logln("Loaded CharsetDecoder: " + decoder.getClass().toString());
1175:
1176: ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length);
1177: logln("Created ByteBuffer of length: " + uSource.length);
1178: CharBuffer mySource = CharBuffer.wrap(uSource);
1179: logln("Wrapped ByteBuffer with CharBuffer ");
1180: encoder.reset();
1181: logln("Test Unicode to " + encoding );
1182: encoder.encode(mySource, myTarget, true);
1183: if (!equals(myTarget, gbSource)) {
1184: errln("--Test Unicode to " + encoding + ": FAILED");
1185: }
1186: logln("Test Unicode to " + encoding +" passed");
1187: }
1188:
1189: public void TestToUnicode( ) throws Exception {
1190:
1191: logln("Loaded Charset: " + charset.getClass().toString());
1192: logln("Loaded CharsetEncoder: " + encoder.getClass().toString());
1193: logln("Loaded CharsetDecoder: " + decoder.getClass().toString());
1194:
1195: CharBuffer myTarget = CharBuffer.allocate(uSource.length);
1196: ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource));
1197: decoder.reset();
1198: CoderResult result = decoder.decode(mySource, myTarget, true);
1199: if (result.isError()) {
1200: errln("Test ToUnicode -- FAILED");
1201: }
1202: if (!equals(myTarget, uSource)) {
1203: errln("--Test " + encoding + " to Unicode :FAILED");
1204: }
1205: }
1206:
1207: public static byte[] getByteArray(char[] source) {
1208: byte[] target = new byte[source.length];
1209: int i = source.length;
1210: for (; --i >= 0;) {
1211: target[i] = (byte) source[i];
1212: }
1213: return target;
1214: }
1215: /*
1216: private void smBufCharset(Charset charset) {
1217: try {
1218: ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource));
1219: CharBuffer uTarget =
1220: charset.decode(ByteBuffer.wrap(getByteArray(gbSource)));
1221:
1222: if (!equals(uTarget, uSource)) {
1223: errln("Test " + charset.toString() + " to Unicode :FAILED");
1224: }
1225: if (!equals(bTarget, gbSource)) {
1226: errln("Test " + charset.toString() + " from Unicode :FAILED");
1227: }
1228: } catch (Exception ex) {
1229: errln("Encountered exception in smBufCharset");
1230: }
1231: }
1232:
1233: public void TestMultithreaded() throws Exception {
1234: final Charset cs = Charset.forName(encoding);
1235: if (cs == charset) {
1236: errln("The objects are equal");
1237: }
1238: smBufCharset(cs);
1239: try {
1240: final Thread t1 = new Thread() {
1241: public void run() {
1242: // commented out since the mehtods on
1243: // Charset API are supposed to be thread
1244: // safe ... to test it we dont sync
1245:
1246: // synchronized(charset){
1247: while (!interrupted()) {
1248: try {
1249: smBufCharset(cs);
1250: } catch (UnsupportedCharsetException ueEx) {
1251: errln(ueEx.toString());
1252: }
1253: }
1254:
1255: // }
1256: }
1257: };
1258: final Thread t2 = new Thread() {
1259: public void run() {
1260: // synchronized(charset){
1261: while (!interrupted()) {
1262: try {
1263: smBufCharset(cs);
1264: } catch (UnsupportedCharsetException ueEx) {
1265: errln(ueEx.toString());
1266: }
1267: }
1268:
1269: //}
1270: }
1271: };
1272: t1.start();
1273: t2.start();
1274: int i = 0;
1275: for (;;) {
1276: if (i > 1000000000) {
1277: try {
1278: t1.interrupt();
1279: } catch (Exception e) {
1280: }
1281: try {
1282: t2.interrupt();
1283: } catch (Exception e) {
1284: }
1285: break;
1286: }
1287: i++;
1288: }
1289: } catch (Exception e) {
1290: throw e;
1291: }
1292: }
1293:
1294: public void TestSynchronizedMultithreaded() throws Exception {
1295: // Methods on CharsetDecoder and CharsetEncoder classes
1296: // are inherently unsafe if accessed by multiple concurrent
1297: // thread so we synchronize them
1298: final Charset charset = Charset.forName(encoding);
1299: final CharsetDecoder decoder = charset.newDecoder();
1300: final CharsetEncoder encoder = charset.newEncoder();
1301: try {
1302: final Thread t1 = new Thread() {
1303: public void run() {
1304: while (!interrupted()) {
1305: try {
1306: synchronized (encoder) {
1307: smBufEncode(encoder, encoding);
1308: }
1309: synchronized (decoder) {
1310: smBufDecode(decoder, encoding);
1311: }
1312: } catch (UnsupportedCharsetException ueEx) {
1313: errln(ueEx.toString());
1314: }
1315: }
1316:
1317: }
1318: };
1319: final Thread t2 = new Thread() {
1320: public void run() {
1321: while (!interrupted()) {
1322: try {
1323: synchronized (encoder) {
1324: smBufEncode(encoder, encoding);
1325: }
1326: synchronized (decoder) {
1327: smBufDecode(decoder, encoding);
1328: }
1329: } catch (UnsupportedCharsetException ueEx) {
1330: errln(ueEx.toString());
1331: }
1332: }
1333: }
1334: };
1335: t1.start();
1336: t2.start();
1337: int i = 0;
1338: for (;;) {
1339: if (i > 1000000000) {
1340: try {
1341: t1.interrupt();
1342: } catch (Exception e) {
1343: }
1344: try {
1345: t2.interrupt();
1346: } catch (Exception e) {
1347: }
1348: break;
1349: }
1350: i++;
1351: }
1352: } catch (Exception e) {
1353: throw e;
1354: }
1355: }
1356: */
1357:
1358: public void TestMBCS() {
1359: {
1360: // Encoder: from Unicode conversion
1361: CharsetEncoder encoderICU = new CharsetProviderICU()
1362: .charsetForName("ibm-971").newEncoder();
1363: ByteBuffer out = ByteBuffer.allocate(6);
1364: encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE);
1365: CoderResult result = encoderICU.encode(CharBuffer
1366: .wrap("\u0131\u0061\u00a1"), out, true);
1367: if (!result.isError()) {
1368: byte[] expected = { (byte) 0xA9, (byte) 0xA5,
1369: (byte) 0xAF, (byte) 0xFE, (byte) 0xA2,
1370: (byte) 0xAE };
1371: if (!equals(expected, out.array())) {
1372: errln("Did not get the expected result for substitution bytes. Got: "
1373: + hex(out.array()));
1374: }
1375: logln("Output: " + hex(out.array()));
1376: } else {
1377: errln("Encode operation failed for encoder: "
1378: + encoderICU.toString());
1379: }
1380: }
1381: {
1382: // Decoder: to Unicode conversion
1383: CharsetDecoder decoderICU = new CharsetProviderICU()
1384: .charsetForName("ibm-971").newDecoder();
1385: CharBuffer out = CharBuffer.allocate(3);
1386: decoderICU.onMalformedInput(CodingErrorAction.REPLACE);
1387: CoderResult result = decoderICU.decode(ByteBuffer
1388: .wrap(new byte[] { (byte) 0xA2, (byte) 0xAE,
1389: (byte) 0x12, (byte) 0x34, (byte) 0xEF,
1390: (byte) 0xDC }), out, true);
1391: if (!result.isError()) {
1392: char[] expected = { '\u00a1', '\ufffd', '\u6676' };
1393: if (!equals(expected, out.array())) {
1394: errln("Did not get the expected result for substitution chars. Got: "
1395: + hex(out.array()));
1396: }
1397: logln("Output: " + hex(out.array()));
1398: } else {
1399: errln("Decode operation failed for encoder: "
1400: + decoderICU.toString());
1401: }
1402: }
1403: }
1404:
1405: public void TestJB4897() {
1406: CharsetProviderICU provider = new CharsetProviderICU();
1407: Charset charset = provider.charsetForName("x-abracadabra");
1408: if (charset != null && charset.canEncode() == true) {
1409: errln("provider.charsetForName() does not validate the charset names");
1410: }
1411: }
1412:
1413: public void TestJB5027() {
1414: CharsetProviderICU provider = new CharsetProviderICU();
1415:
1416: Charset fake = provider.charsetForName("doesNotExist");
1417: if (fake != null) {
1418: errln("\"doesNotExist\" returned " + fake);
1419: }
1420: Charset xfake = provider.charsetForName("x-doesNotExist");
1421: if (xfake != null) {
1422: errln("\"x-doesNotExist\" returned " + xfake);
1423: }
1424: }
1425:
1426: //test to make sure that number of aliases and canonical names are in the charsets that are in
1427: public void TestAllNames() {
1428:
1429: CharsetProviderICU provider = new CharsetProviderICU();
1430: Object[] available = CharsetProviderICU.getAvailableNames();
1431: for (int i = 0; i < available.length; i++) {
1432: try {
1433: String canon = CharsetProviderICU
1434: .getICUCanonicalName((String) available[i]);
1435:
1436: // ',' is not allowed by Java's charset name checker
1437: if (canon.indexOf(',') >= 0) {
1438: continue;
1439: }
1440: Charset cs = provider
1441: .charsetForName((String) available[i]);
1442:
1443: Object[] javaAliases = cs.aliases().toArray();
1444: //seach for ICU canonical name in javaAliases
1445: boolean inAliasList = false;
1446: for (int j = 0; j < javaAliases.length; j++) {
1447: String java = (String) javaAliases[j];
1448: if (java.equals(canon)) {
1449: logln("javaAlias: " + java + " canon: " + canon);
1450: inAliasList = true;
1451: }
1452: }
1453: if (inAliasList == false) {
1454: errln("Could not find ICU canonical name: " + canon
1455: + " for java canonical name: "
1456: + available[i] + " " + i);
1457: }
1458: } catch (UnsupportedCharsetException ex) {
1459: errln("could no load charset " + available[i] + " "
1460: + ex.getMessage());
1461: continue;
1462: }
1463: }
1464: }
1465:
1466: public void TestDecoderImplFlush() {
1467: CharsetProviderICU provider = new CharsetProviderICU();
1468: Charset ics = provider.charsetForName("UTF-16");
1469: Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset
1470: execDecoder(jcs);
1471: execDecoder(ics);
1472: }
1473:
1474: public void TestEncoderImplFlush() {
1475: CharsetProviderICU provider = new CharsetProviderICU();
1476: Charset ics = provider.charsetForName("UTF-16");
1477: Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset
1478: execEncoder(jcs);
1479: execEncoder(ics);
1480: }
1481:
1482: private void execDecoder(Charset cs) {
1483: CharsetDecoder decoder = cs.newDecoder();
1484: decoder.onMalformedInput(CodingErrorAction.REPORT);
1485: decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
1486: CharBuffer out = CharBuffer.allocate(10);
1487: CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] {
1488: -1, -2, 32, 0, 98 }), out, false);
1489: result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }),
1490: out, true);
1491:
1492: logln(cs.getClass().toString() + ":" + result.toString());
1493: try {
1494: result = decoder.flush(out);
1495: logln(cs.getClass().toString() + ":" + result.toString());
1496: } catch (Exception e) {
1497: errln(e.getMessage() + " " + cs.getClass().toString());
1498: }
1499: }
1500:
1501: private void execEncoder(Charset cs) {
1502: CharsetEncoder encoder = cs.newEncoder();
1503: encoder.onMalformedInput(CodingErrorAction.REPORT);
1504: encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
1505: ByteBuffer out = ByteBuffer.allocate(10);
1506: CoderResult result = encoder.encode(CharBuffer.wrap(new char[] {
1507: '\uFFFF', '\u2345', 32, 98 }), out, false);
1508: logln(cs.getClass().toString() + ":" + result.toString());
1509: result = encoder.encode(CharBuffer.wrap(new char[] { 98 }),
1510: out, true);
1511:
1512: logln(cs.getClass().toString() + ":" + result.toString());
1513: try {
1514: result = encoder.flush(out);
1515: logln(cs.getClass().toString() + ":" + result.toString());
1516: } catch (Exception e) {
1517: errln(e.getMessage() + " " + cs.getClass().toString());
1518: }
1519: }
1520:
1521: public void TestDecodeMalformed() {
1522: CharsetProviderICU provider = new CharsetProviderICU();
1523: Charset ics = provider.charsetForName("UTF-16BE");
1524: //Use SUN's charset
1525: Charset jcs = Charset.forName("UTF-16");
1526: CoderResult ir = execMalformed(ics);
1527: CoderResult jr = execMalformed(jcs);
1528: if (ir != jr) {
1529: errln("ICU's decoder did not return the same result as Sun. ICU: "
1530: + ir.toString() + " Sun: " + jr.toString());
1531: }
1532: }
1533:
1534: private CoderResult execMalformed(Charset cs) {
1535: CharsetDecoder decoder = cs.newDecoder();
1536: decoder.onMalformedInput(CodingErrorAction.IGNORE);
1537: decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
1538: ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00,
1539: 0x42, 0x01 });
1540: CharBuffer out = CharBuffer.allocate(3);
1541: return decoder.decode(in, out, true);
1542: }
1543:
1544: public void TestJavaUTF16Decoder() {
1545: CharsetProviderICU provider = new CharsetProviderICU();
1546: Charset ics = provider.charsetForName("UTF-16BE");
1547: //Use SUN's charset
1548: Charset jcs = Charset.forName("UTF-16");
1549: Exception ie = execConvertAll(ics);
1550: Exception je = execConvertAll(jcs);
1551: if (ie != je) {
1552: errln("ICU's decoder did not return the same result as Sun. ICU: "
1553: + ie.toString() + " Sun: " + je.toString());
1554: }
1555: }
1556:
1557: private Exception execConvertAll(Charset cs) {
1558: ByteBuffer in = ByteBuffer.allocate(400);
1559: int i = 0;
1560: while (in.position() != in.capacity()) {
1561: in.put((byte) 0xD8);
1562: in.put((byte) i);
1563: in.put((byte) 0xDC);
1564: in.put((byte) i);
1565: i++;
1566: }
1567: in.limit(in.position());
1568: in.position(0);
1569: CharsetDecoder decoder = cs.newDecoder();
1570: decoder.onMalformedInput(CodingErrorAction.IGNORE);
1571: decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
1572: try {
1573: CharBuffer out = decoder.decode(in);
1574: if (out != null) {
1575: logln(cs.toString() + " encoing succeeded as expected!");
1576: }
1577: } catch (Exception ex) {
1578: errln("Did not get expected exception for encoding: "
1579: + cs.toString());
1580: return ex;
1581: }
1582: return null;
1583: }
1584: }
|