0001: /*
0002: *******************************************************************************
0003: * Copyright (C) 2002-2006, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: *
0007: *******************************************************************************
0008: */
0009:
0010: package com.ibm.icu.dev.test.charset;
0011:
0012: import java.nio.ByteBuffer;
0013: import java.nio.CharBuffer;
0014: import java.nio.charset.Charset;
0015: import java.nio.charset.CharsetDecoder;
0016: import java.nio.charset.CharsetEncoder;
0017: import java.nio.charset.CoderResult;
0018: import java.nio.charset.CodingErrorAction;
0019: import java.util.Iterator;
0020:
0021: import com.ibm.icu.charset.CharsetProviderICU;
0022: import com.ibm.icu.dev.test.ModuleTest;
0023: import com.ibm.icu.dev.test.TestDataModule.DataMap;
0024: import com.ibm.icu.impl.ICUResourceBundle;
0025:
0026: /**
0027: * This maps to convtest.c which tests the test file for data-driven conversion tests.
0028: *
0029: */
0030: public class TestConversion extends ModuleTest {
0031: /**
0032: * This maps to the C struct of conversion case in convtest.h that stores the
0033: * data for a conversion test
0034: *
0035: */
0036: private class ConversionCase {
0037: int caseNr; // testcase index
0038: String option = null; // callback options
0039: CodingErrorAction cbErrorAction = null; // callback action type
0040: CharBuffer toUnicodeResult = null;
0041: ByteBuffer fromUnicodeResult = null;
0042:
0043: // data retrieved from a test case conversion.txt
0044: String charset; // charset
0045: String unicode; // unicode string
0046: ByteBuffer bytes; // byte
0047: int[] offsets; // offsets
0048: boolean finalFlush; // flush
0049: boolean fallbacks; // fallback
0050: String outErrorCode; // errorCode
0051: String cbopt; // callback
0052:
0053: // TestGetUnicodeSet variables
0054: String map;
0055: String mapnot;
0056: int which;
0057: }
0058:
0059: // public methods --------------------------------------------------------
0060:
0061: public static void main(String[] args) throws Exception {
0062: new TestConversion().run(args);
0063: }
0064:
0065: public TestConversion() {
0066: super ("com/ibm/icu/dev/data/testdata/", "conversion");
0067: }
0068:
0069: /*
0070: * This method maps to the convtest.cpp runIndexedTest() method to run each
0071: * type of conversion.
0072: */
0073: public void processModules() {
0074: try {
0075: int testFromUnicode = 0;
0076: int testToUnicode = 0;
0077: String testName = t.getName().toString();
0078:
0079: // Iterate through and get each of the test case to process
0080: for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
0081: DataMap testcase = (DataMap) iter.next();
0082:
0083: if (testName.equalsIgnoreCase("toUnicode")) {
0084: TestToUnicode(testcase, testToUnicode);
0085: testToUnicode++;
0086:
0087: } else if (testName.equalsIgnoreCase("fromUnicode")) {
0088: TestFromUnicode(testcase, testFromUnicode);
0089: testFromUnicode++;
0090: } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
0091: TestGetUnicodeSet(testcase);
0092: } else {
0093: warnln("Could not load the test cases for conversion");
0094: continue;
0095: }
0096: }
0097: } catch (Exception e) {
0098: e.printStackTrace();
0099: }
0100:
0101: }
0102:
0103: // private methods -------------------------------------------------------
0104:
0105: // fromUnicode test worker functions ---------------------------------------
0106: private void TestFromUnicode(DataMap testcase, int caseNr) {
0107:
0108: ConversionCase cc = new ConversionCase();
0109: cc.caseNr = caseNr;
0110:
0111: try {
0112: // retrieve test case data
0113: cc.charset = ((ICUResourceBundle) testcase
0114: .getObject("charset")).getString();
0115: cc.unicode = ((ICUResourceBundle) testcase
0116: .getObject("unicode")).getString();
0117: cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
0118: .getBinary();
0119: cc.offsets = ((ICUResourceBundle) testcase
0120: .getObject("offsets")).getIntVector();
0121: cc.finalFlush = ((ICUResourceBundle) testcase
0122: .getObject("flush")).getUInt() != 0;
0123: cc.fallbacks = ((ICUResourceBundle) testcase
0124: .getObject("fallbacks")).getUInt() != 0;
0125: cc.outErrorCode = ((ICUResourceBundle) testcase
0126: .getObject("errorCode")).getString();
0127: cc.cbopt = ((ICUResourceBundle) testcase
0128: .getObject("callback")).getString();
0129:
0130: } catch (Exception e) {
0131: errln("Skipping test:");
0132: errln("error parsing conversion/toUnicode test case "
0133: + cc.caseNr);
0134: return;
0135: }
0136:
0137: // ----for debugging only
0138: logln("\nTestFromUnicode[" + caseNr + "] " + cc.charset + " ");
0139: logln("Unicode: " + cc.unicode);
0140: logln("Bytes:");
0141: printbytes(cc.bytes, cc.bytes.limit());
0142: logln("");
0143: logln("Callback: (" + cc.cbopt + ")");
0144: logln("...............................................");
0145:
0146: // ----for debugging only
0147: // TODO: ***Currently skipping test for charset ibm-1390, gb18030,
0148: // ibm-930 due to external mapping need to be fix
0149: if (cc.charset.equalsIgnoreCase("ibm-1390")
0150: || cc.charset.equalsIgnoreCase("gb18030")
0151: || cc.charset.equalsIgnoreCase("ibm-970")) {
0152: logln("Skipping test:("
0153: + cc.charset
0154: + ") due to ICU Charset external mapping not supported at this time");
0155: return;
0156: }
0157:
0158: // process the retrieved test data case
0159: if (cc.offsets.length == 0) {
0160: cc.offsets = null;
0161: } else if (cc.offsets.length != cc.bytes.limit()) {
0162: errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
0163: + "] and offsets[" + cc.offsets.length
0164: + "] must have the same length");
0165: return;
0166: }
0167:
0168: // check the callback replacement value
0169: if (cc.cbopt.length() > 0) {
0170:
0171: switch ((cc.cbopt).charAt(0)) {
0172: case '?':
0173: cc.cbErrorAction = CodingErrorAction.REPLACE;
0174: break;
0175: case '0':
0176: cc.cbErrorAction = CodingErrorAction.IGNORE;
0177: break;
0178: case '.':
0179: cc.cbErrorAction = CodingErrorAction.REPORT;
0180: break;
0181: case '&':
0182: cc.cbErrorAction = CodingErrorAction.REPORT;
0183: break;
0184: default:
0185: cc.cbErrorAction = null;
0186: break;
0187: }
0188:
0189: // check for any options for the callback value --
0190: cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt
0191: .substring(1);
0192: if (cc.option == null) {
0193: cc.option = null;
0194: }
0195: }
0196: logln("TestFromUnicode[" + cc.caseNr + "] " + cc.charset);
0197: FromUnicodeCase(cc);
0198:
0199: return;
0200:
0201: }
0202:
0203: private void FromUnicodeCase(ConversionCase cc) {
0204:
0205: // create charset encoder for conversion test
0206: CharsetProviderICU provider = new CharsetProviderICU();
0207: CharsetEncoder encoder = null;
0208: Charset charset = null;
0209: try {
0210: charset = (Charset) provider.charsetForName(cc.charset);
0211: encoder = (CharsetEncoder) charset.newEncoder();
0212: encoder.onMalformedInput(CodingErrorAction.REPLACE);
0213: encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
0214:
0215: } catch (Exception e) {
0216:
0217: logln("Skipping test:(" + cc.charset
0218: + ") due to ICU Charset not supported at this time");
0219: return;
0220:
0221: }
0222:
0223: // set the callback for the encoder
0224: if (cc.cbErrorAction != null) {
0225: encoder.onUnmappableCharacter(cc.cbErrorAction);
0226: encoder.onMalformedInput(cc.cbErrorAction);
0227:
0228: // if action has an option, put in the option for the case
0229: if (cc.option.equals("i")) {
0230: encoder.onMalformedInput(CodingErrorAction.REPORT);
0231: }
0232:
0233: // if callback action is replace, and there is a subchar
0234: // replace the decoder's default replacement value
0235: // if substring, skip test due to current api not supporting
0236: // substring
0237: if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
0238: if (cc.cbopt.length() > 1) {
0239: if (cc.cbopt.length() > 1
0240: && cc.cbopt.charAt(1) == '=') {
0241: logln("Skipping test due to limitation in Java API - substitution string not supported");
0242: return;
0243: } else {
0244: // // read NUL-separated subchar first, if any
0245: // copy the subchar from Latin-1 characters
0246: // start after the NUL
0247: if (cc.cbopt.charAt(1) == 0x00) {
0248: cc.cbopt = cc.cbopt.substring(2);
0249:
0250: try {
0251: encoder
0252: .replaceWith(toByteArray(cc.cbopt));
0253: } catch (Exception e) {
0254: logln("Skipping test due to limitation in Java API - substitution character sequence size error");
0255: return;
0256: }
0257: }
0258: }
0259: }
0260: }
0261: }
0262:
0263: // do charset encoding from unicode
0264:
0265: // testing by steps using charset.encoder(in,out,flush)
0266: int resultLength;
0267: boolean ok;
0268: String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
0269: { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
0270: int i, step;
0271:
0272: ok = true;
0273:
0274: for (i = 0; i < steps.length && ok; ++i) {
0275: step = Integer.parseInt(steps[i][0]);
0276:
0277: logln("Testing step:[" + step + "]");
0278: resultLength = stepFromUnicode(cc, encoder, step);
0279: ok = checkFromUnicode(cc, resultLength);
0280:
0281: }
0282: // testing by whole buffer using out = charset.encoder(in)
0283: while (ok && cc.finalFlush) {
0284: logln("Testing java API charset.encoder(in):");
0285: cc.fromUnicodeResult = null;
0286: ByteBuffer out = null;
0287:
0288: try {
0289: out = encoder.encode(CharBuffer.wrap(cc.unicode
0290: .toCharArray()));
0291: out.position(out.limit());
0292: if (out.limit() != out.capacity()) {
0293: int pos = out.position();
0294: byte[] temp = out.array();
0295: out = ByteBuffer.allocate(temp.length * 4);
0296: out.put(temp);
0297: out.position(pos);
0298: CoderResult cr = encoder.flush(out);
0299: if (cr.isOverflow()) {
0300: logln("Overflow error with flushing encoder");
0301: }
0302: }
0303: cc.fromUnicodeResult = out;
0304:
0305: ok = checkFromUnicode(cc, out.limit());
0306: if (!ok) {
0307: break;
0308: }
0309: } catch (Exception e) {
0310: //check the error code to see if it matches cc.errorCode
0311: logln("Encoder returned an error code");
0312: logln("ErrorCode expected is: " + cc.outErrorCode);
0313: logln("Error Result is: " + e.toString());
0314: }
0315: break;
0316: }
0317:
0318: return;
0319:
0320: }
0321:
0322: private int stepFromUnicode(ConversionCase cc,
0323: CharsetEncoder encoder, int step) {
0324:
0325: CharBuffer source;
0326: ByteBuffer target;
0327: int sourceLen;
0328: boolean flush;
0329: source = CharBuffer.wrap(cc.unicode.toCharArray());
0330: sourceLen = cc.unicode.length();
0331:
0332: target = ByteBuffer
0333: .allocate(cc.bytes.capacity() + 4/* for BOM */);
0334: target.position(0);
0335: source.position(0);
0336: cc.fromUnicodeResult = null;
0337: encoder.reset();
0338:
0339: if (step >= 0) {
0340:
0341: int iStep = step;
0342: int oStep = step;
0343:
0344: for (;;) {
0345:
0346: if (step != 0) {
0347: source.limit((iStep < sourceLen) ? iStep
0348: : sourceLen);
0349: target.limit((oStep < target.capacity()) ? oStep
0350: : target.capacity());
0351: flush = (cc.finalFlush && source.limit() == sourceLen);
0352: } else {
0353: source.limit(sourceLen);
0354: target.limit(target.capacity());
0355: flush = cc.finalFlush;
0356: }
0357: CoderResult cr = null;
0358: // convert
0359: if (source.hasRemaining()) {
0360:
0361: cr = encoder.encode(source, target, flush);
0362:
0363: // check pointers and errors
0364: if (cr.isOverflow()) {
0365: // the partial target is filled, set a new limit, reset
0366: // the error and continue
0367: target
0368: .limit(((target.position() + step) < target
0369: .capacity()) ? target
0370: .position()
0371: + step : target.capacity());
0372:
0373: } else if (cr.isError()) {
0374: // check the error code to see if it matches
0375: // cc.errorCode
0376: logln("Encoder returned an error code");
0377: logln("ErrorCode expected is: "
0378: + cc.outErrorCode);
0379: logln("Error Result is: " + cr.toString());
0380: break;
0381: }
0382: } else {
0383:
0384: if (source.limit() == sourceLen) {
0385: cr = encoder.encode(source, target, true);
0386: if (target.limit() != target.capacity()) {
0387: target.limit(target.capacity());
0388: }
0389: cr = encoder.flush(target);
0390:
0391: if (cr.isError()) {
0392: errln("Flush operation failed");
0393: }
0394: break;
0395: }
0396: }
0397: iStep += step;
0398: oStep += step;
0399: }
0400: }
0401: cc.fromUnicodeResult = target;
0402: return target.position();
0403: }
0404:
0405: private boolean checkFromUnicode(ConversionCase cc, int resultLength) {
0406:
0407: // check everything that might have gone wrong
0408: if (cc.bytes.limit() != resultLength) {
0409: if (checkResultsFromUnicode(cc, cc.bytes,
0410: cc.fromUnicodeResult)) {
0411: return true;
0412: }
0413: logln("fromUnicode[" + cc.caseNr + "](" + cc.charset
0414: + ") callback:" + cc.cbopt + " failed: +"
0415: + "wrong result length" + "\n");
0416: return false;
0417: }
0418: if (!checkResultsFromUnicode(cc, cc.bytes, cc.fromUnicodeResult)) {
0419: logln("fromUnicode[" + cc.caseNr + "](" + cc.charset
0420: + ") callback:" + cc.cbopt + " failed: +"
0421: + "wrong result string" + "\n");
0422: return false;
0423: }
0424:
0425: return true;
0426: }
0427:
0428: // toUnicode test worker functions ----------------------------------------- ***
0429:
0430: private void TestToUnicode(DataMap testcase, int caseNr) {
0431: // create Conversion case to store the test case data
0432: ConversionCase cc = new ConversionCase();
0433:
0434: try {
0435: // retrieve test case data
0436: cc.caseNr = caseNr;
0437: cc.charset = ((ICUResourceBundle) testcase
0438: .getObject("charset")).getString();
0439: cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
0440: .getBinary();
0441: cc.unicode = ((ICUResourceBundle) testcase
0442: .getObject("unicode")).getString();
0443: cc.offsets = ((ICUResourceBundle) testcase
0444: .getObject("offsets")).getIntVector();
0445: cc.finalFlush = ((ICUResourceBundle) testcase
0446: .getObject("flush")).getUInt() != 0;
0447: cc.fallbacks = ((ICUResourceBundle) testcase
0448: .getObject("fallbacks")).getUInt() != 0;
0449: cc.outErrorCode = ((ICUResourceBundle) testcase
0450: .getObject("errorCode")).getString();
0451: cc.cbopt = ((ICUResourceBundle) testcase
0452: .getObject("callback")).getString();
0453:
0454: } catch (Exception e) {
0455: errln("Skipping test: error parsing conversion/toUnicode test case "
0456: +
0457:
0458: cc.caseNr);
0459: return;
0460: }
0461:
0462: // ----for debugging only
0463: logln("\nTestToUnicode[" + caseNr + "] " + cc.charset + " ");
0464: logln("Bytes:");
0465: printbytes(cc.bytes, cc.bytes.limit());
0466: logln("");
0467: logln("Unicode: " + hex(cc.unicode));
0468: logln("Callback: (" + cc.cbopt + ")");
0469: ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
0470: printbytes(c, c.limit());
0471: logln("\n...............................................");
0472:
0473: // ----for debugging only
0474:
0475: // TODO: This test case is skipped due to limitation in java's API for
0476: // decoder replacement
0477: // { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 },
0478: // :int{1}, :int{0}, "", "?", :bin{""} }
0479: if (cc.caseNr == 63) {
0480: logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
0481: logln("Skipping test due to limitation in Java API - callback replacement value");
0482: return;
0483: }
0484: // process the retrieved test data case
0485: if (cc.offsets.length == 0) {
0486: cc.offsets = null;
0487: } else if (cc.offsets.length != cc.unicode.length()) {
0488: errln("Skipping test: toUnicode[" + cc.caseNr
0489: + "] unicode[" + cc.unicode.length()
0490: + "] and offsets[" + cc.offsets.length
0491: + "] must have the same length");
0492: return;
0493: }
0494: // check for the callback replacement value for unmappable
0495: // characters or malformed errors
0496: if (cc.cbopt.length() > 0) {
0497: switch ((cc.cbopt).charAt(0)) {
0498: case '?': // CALLBACK_SUBSTITUTE
0499: cc.cbErrorAction = CodingErrorAction.REPLACE;
0500: break;
0501: case '0': // CALLBACK_SKIP
0502: cc.cbErrorAction = CodingErrorAction.IGNORE;
0503: break;
0504: case '.': // CALLBACK_STOP
0505: cc.cbErrorAction = CodingErrorAction.REPORT;
0506: break;
0507: case '&': // CALLBACK_ESCAPE
0508: cc.cbErrorAction = CodingErrorAction.REPORT;
0509: break;
0510: default:
0511: cc.cbErrorAction = null;
0512: break;
0513: }
0514: }
0515: // check for any options for the callback value
0516: cc.option = cc.cbErrorAction == null ? null : cc.cbopt
0517: .substring(1);
0518: if (cc.option == null) {
0519: cc.option = null;
0520: }
0521:
0522: logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
0523: ToUnicodeCase(cc);
0524:
0525: }
0526:
0527: private void ToUnicodeCase(ConversionCase cc) {
0528:
0529: // create converter for charset and decoder for each test case
0530: CharsetProviderICU provider = new CharsetProviderICU();
0531: CharsetDecoder decoder = null;
0532: Charset charset = null;
0533:
0534: try {
0535: charset = (Charset) provider.charsetForName(cc.charset);
0536: decoder = (CharsetDecoder) charset.newDecoder();
0537: decoder.onMalformedInput(CodingErrorAction.REPLACE);
0538: decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
0539:
0540: } catch (Exception e) {
0541:
0542: logln("Skipping test:(" + cc.charset
0543: + ") due to ICU Charset not supported at this time");
0544: return;
0545: }
0546:
0547: // set the callback for the decoder
0548: if (cc.cbErrorAction != null) {
0549: decoder.onMalformedInput(cc.cbErrorAction);
0550: decoder.onUnmappableCharacter(cc.cbErrorAction);
0551:
0552: // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
0553: if (cc.option.equals("i")) {
0554: decoder.onMalformedInput(CodingErrorAction.REPORT);
0555: }
0556:
0557: // if callback action is replace, and there is a subchar
0558: // replace the decoder's default replacement value
0559: // if substring, skip test due to current api not supporting
0560: // substring replacement
0561: if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
0562: if (cc.cbopt.length() > 1) {
0563: if (cc.cbopt.charAt(1) == '=') {
0564: logln("Skipping test due to limitation in Java API - substitution string not supported");
0565:
0566: } else {
0567: // // read NUL-separated subchar first, if any
0568: // copy the subchar from Latin-1 characters
0569: // start after the NUL
0570: if (cc.cbopt.charAt(1) == 0x00) {
0571: cc.cbopt = cc.cbopt.substring(2);
0572:
0573: try {
0574: decoder.replaceWith(cc.cbopt);
0575: } catch (Exception e) {
0576: logln("Skipping test due to limitation in Java API - substitution character sequence size error");
0577:
0578: }
0579: }
0580: }
0581: }
0582: }
0583: }
0584:
0585: // Check the step to unicode
0586: boolean ok;
0587: int resultLength;
0588:
0589: String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
0590: { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
0591: /* TODO: currently not supported test steps, getNext API is not supported for now
0592: { "-1", "getNext" },
0593: { "-2", "toU(bulk)+getNext" },
0594: { "-3", "getNext+toU(bulk)" },
0595: { "-4", "toU(1)+getNext" },
0596: { "-5", "getNext+toU(1)" },
0597: { "-12", "toU(5)+getNext" },
0598: { "-13", "getNext+toU(5)" }};*/
0599:
0600: ok = true;
0601: int step;
0602: // testing by steps using the CoderResult cr = charset.decoder(in,out,flush) api
0603: for (int i = 0; i < steps.length && ok; ++i) {
0604: step = Integer.parseInt(steps[i][0]);
0605:
0606: if (step < 0 && !cc.finalFlush) {
0607: continue;
0608: }
0609: logln("Testing step:[" + step + "]");
0610: resultLength = stepToUnicode(cc, decoder, step);
0611: ok = checkToUnicode(cc, resultLength);
0612: }
0613:
0614: //testing the java's out = charset.decoder(in) api
0615: while (ok && cc.finalFlush) {
0616: logln("Testing java charset.decoder(in):");
0617: cc.toUnicodeResult = null;
0618: CharBuffer out = null;
0619:
0620: try {
0621: out = decoder.decode(ByteBuffer.wrap(cc.bytes.array()));
0622: out.position(out.limit());
0623: if (out.limit() < cc.unicode.length()) {
0624: int pos = out.position();
0625: char[] temp = out.array();
0626: out = CharBuffer.allocate(cc.bytes.limit());
0627: out.put(temp);
0628: out.position(pos);
0629: CoderResult cr = decoder.flush(out);
0630: if (cr.isOverflow()) {
0631: logln("Overflow error with flushing decodering");
0632: }
0633: }
0634:
0635: cc.toUnicodeResult = out;
0636:
0637: ok = checkToUnicode(cc, out.limit());
0638: if (!ok) {
0639: break;
0640: }
0641: } catch (Exception e) {
0642: //check the error code to see if it matches cc.errorCode
0643: logln("Decoder returned an error code");
0644: logln("ErrorCode expected is: " + cc.outErrorCode);
0645: logln("Error Result is: " + e.toString());
0646: }
0647: break;
0648: }
0649:
0650: return;
0651: }
0652:
0653: private int stepToUnicode(ConversionCase cc,
0654: CharsetDecoder decoder, int step)
0655:
0656: {
0657: ByteBuffer source;
0658: CharBuffer target;
0659: boolean flush = false;
0660: int sourceLen;
0661: source = cc.bytes;
0662: sourceLen = cc.bytes.limit();
0663: source.position(0);
0664: target = CharBuffer.allocate(cc.unicode.length() + 4);
0665: target.position(0);
0666: cc.toUnicodeResult = null;
0667: decoder.reset();
0668:
0669: if (step >= 0) {
0670:
0671: int iStep = step;
0672: int oStep = step;
0673:
0674: for (;;) {
0675:
0676: if (step != 0) {
0677: source.limit((iStep <= sourceLen) ? iStep
0678: : sourceLen);
0679: target.limit((oStep <= target.capacity()) ? oStep
0680: : target.capacity());
0681: flush = (cc.finalFlush && source.limit() == sourceLen);
0682:
0683: } else {
0684: //bulk mode
0685: source.limit(sourceLen);
0686: target.limit(target.capacity());
0687: flush = cc.finalFlush;
0688: }
0689: // convert
0690: CoderResult cr = null;
0691: if (source.hasRemaining()) {
0692:
0693: cr = decoder.decode(source, target, flush);
0694: // check pointers and errors
0695: if (cr.isOverflow()) {
0696: // the partial target is filled, set a new limit,
0697: oStep = (target.position() + step);
0698: target
0699: .limit((oStep < target.capacity()) ? oStep
0700: : target.capacity());
0701: if (target.limit() > target.capacity()) {
0702: //target has reached its limit, an error occurred or test case has an error code
0703: //check error code
0704: logln("UnExpected error: Target Buffer is larger than capacity");
0705: break;
0706: }
0707:
0708: } else if (cr.isError()) {
0709: //check the error code to see if it matches cc.errorCode
0710: logln("Decoder returned an error code");
0711: logln("ErrorCode expected is: "
0712: + cc.outErrorCode);
0713: logln("Error Result is: " + cr.toString());
0714: break;
0715: }
0716:
0717: } else {
0718: if (source.limit() == sourceLen) {
0719:
0720: cr = decoder.decode(source, target, true);
0721:
0722: //due to limitation of the API we need to check for target limit for expected
0723: if (target.limit() != cc.unicode.length()) {
0724: target.limit(cc.unicode.length());
0725: cr = decoder.flush(target);
0726: if (cr.isError()) {
0727: errln("Flush operation failed");
0728: }
0729: }
0730: break;
0731: }
0732: }
0733: iStep += step;
0734: oStep += step;
0735:
0736: }
0737:
0738: }// if(step ==0)
0739:
0740: //--------------------------------------------------------------------------
0741: else /* step<0 */{
0742: /*
0743: * step==-1: call only ucnv_getNextUChar()
0744: * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
0745: * if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
0746: * else give it at most (-step-2)/2 bytes
0747: */
0748:
0749: for (;;) {
0750: // convert
0751: if ((step & 1) != 0 /* odd: -1, -3, -5, ... */) {
0752:
0753: target
0754: .limit(target.position() < target
0755: .capacity() ? target.position() + 1
0756: : target.capacity());
0757:
0758: // decode behavior is return to output target 1 character
0759: CoderResult cr = null;
0760:
0761: //similar to getNextUChar() , input is the whole string, while outputs only 1 character
0762: source.limit(sourceLen);
0763: while (target.position() != target.limit()
0764: && source.hasRemaining()) {
0765: cr = decoder.decode(source, target, source
0766: .limit() == sourceLen);
0767:
0768: if (cr.isOverflow()) {
0769:
0770: if (target.limit() >= target.capacity()) {
0771: // target has reached its limit, an error occurred
0772: logln("UnExpected error: Target Buffer is larger than capacity");
0773: break;
0774: } else {
0775: //1 character has been consumed
0776: target.limit(target.position() + 1);
0777: break;
0778: }
0779: } else if (cr.isError()) {
0780: logln("Decoder returned an error code");
0781: logln("ErrorCode expected is: "
0782: + cc.outErrorCode);
0783: logln("Error Result is: " + cr.toString());
0784:
0785: cc.toUnicodeResult = target;
0786: return target.position();
0787: }
0788:
0789: else {
0790: // one character has been consumed
0791: if (target.limit() == target.position()) {
0792: target.limit(target.position() + 1);
0793: break;
0794: }
0795: }
0796:
0797: }
0798: if (source.position() == sourceLen) {
0799:
0800: // due to limitation of the API we need to check
0801: // for target limit for expected
0802: cr = decoder.decode(source, target, true);
0803: if (target.position() != cc.unicode.length()) {
0804:
0805: target.limit(cc.unicode.length());
0806: cr = decoder.flush(target);
0807: if (cr.isError()) {
0808: errln("Flush operation failed");
0809: }
0810: }
0811: break;
0812: }
0813: // alternate between -n-1 and -n but leave -1 alone
0814: if (step < -1) {
0815: ++step;
0816: }
0817: } else {/* step is even */
0818: // allow only one UChar output
0819:
0820: target
0821: .limit(target.position() < target
0822: .capacity() ? target.position() + 1
0823: : target.capacity());
0824: if (step == -2) {
0825: source.limit(sourceLen);
0826: } else {
0827: source.limit(source.position() + (-step - 2)
0828: / 2);
0829: if (source.limit() > sourceLen) {
0830: source.limit(sourceLen);
0831: }
0832: }
0833: CoderResult cr = decoder.decode(source, target,
0834: source.limit() == sourceLen);
0835: // check pointers and errors
0836: if (cr.isOverflow()) {
0837: // one character has been consumed
0838: if (target.limit() >= target.capacity()) {
0839: // target has reached its limit, an error occurred
0840: logln("Unexpected error: Target Buffer is larger than capacity");
0841: break;
0842: }
0843: } else if (cr.isError()) {
0844: logln("Decoder returned an error code");
0845: logln("ErrorCode expected is: "
0846: + cc.outErrorCode);
0847: logln("Error Result is: " + cr.toString());
0848: break;
0849: }
0850:
0851: --step;
0852: }
0853: }
0854: }
0855:
0856: //--------------------------------------------------------------------------
0857:
0858: cc.toUnicodeResult = target;
0859: return target.position();
0860: }
0861:
0862: private boolean checkToUnicode(ConversionCase cc, int resultLength) {
0863:
0864: // check everything that might have gone wrong
0865: if (cc.unicode.length() != resultLength) {
0866: logln("toUnicode[" + cc.caseNr + "](" + cc.charset
0867: + ") callback:" + cc.cbopt + " failed: +"
0868: + "wrong result length" + "\n");
0869: checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult);
0870: return false;
0871: }
0872: if (!checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult)) {
0873: logln("toUnicode[" + cc.caseNr + "](" + cc.charset
0874: + ") callback:" + cc.cbopt + " failed: +"
0875: + "wrong result string" + "\n");
0876: return false;
0877: }
0878:
0879: return true;
0880:
0881: }
0882:
0883: private void TestGetUnicodeSet(DataMap testcase) {
0884: /*
0885: * charset - will be opened, and ucnv_getUnicodeSet() called on it //
0886: * map - set of code points and strings that must be in the returned set //
0887: * mapnot - set of code points and strings that must *not* be in the //
0888: * returned set // which - numeric UConverterUnicodeSet value Headers {
0889: * "charset", "map", "mapnot", "which" }
0890: */
0891: ConversionCase cc = new ConversionCase();
0892: // retrieve test case data
0893: cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
0894: .getString();
0895: cc.map = ((ICUResourceBundle) testcase.getObject("map"))
0896: .getString();
0897: cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
0898: .getString();
0899: cc.which = ((ICUResourceBundle) testcase.getObject("which"))
0900: .getUInt();
0901:
0902: // create charset and encoder for each test case
0903: logln("Test not supported at this time");
0904:
0905: }
0906:
0907: /**
0908: * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
0909: * start of the stream for example U+FEFF (the Unicode BOM/signature
0910: * character) that can be ignored.
0911: *
0912: * Detects Unicode signature byte sequences at the start of the byte stream
0913: * and returns number of bytes of the BOM of the indicated Unicode charset.
0914: * 0 is returned when no Unicode signature is recognized.
0915: *
0916: */
0917:
0918: private String detectUnicodeSignature(ByteBuffer source) {
0919: int signatureLength = 0; // number of bytes of the signature
0920: final int SIG_MAX_LEN = 5;
0921: String sigUniCharset = null; // states what unicode charset is the BOM
0922: int i = 0;
0923:
0924: /*
0925: * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
0926: * don't misdetect something
0927: */
0928: byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
0929: (byte) 0xa5, (byte) 0xa5 };
0930:
0931: while (i < source.remaining() && i < SIG_MAX_LEN) {
0932: start[i] = source.get(i);
0933: i++;
0934: }
0935:
0936: if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
0937: signatureLength = 2;
0938: sigUniCharset = "UTF-16BE";
0939: source.position(signatureLength);
0940: return sigUniCharset;
0941: } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
0942: if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
0943: signatureLength = 4;
0944: sigUniCharset = "UTF-32LE";
0945: source.position(signatureLength);
0946: return sigUniCharset;
0947: } else {
0948: signatureLength = 2;
0949: sigUniCharset = "UTF-16LE";
0950: source.position(signatureLength);
0951: return sigUniCharset;
0952: }
0953: } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
0954: && start[2] == (byte) 0xBF) {
0955: signatureLength = 3;
0956: sigUniCharset = "UTF-8";
0957: source.position(signatureLength);
0958: return sigUniCharset;
0959: } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
0960: && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
0961: signatureLength = 4;
0962: sigUniCharset = "UTF-32BE";
0963: source.position(signatureLength);
0964: return sigUniCharset;
0965: } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
0966: && start[2] == (byte) 0xFF) {
0967: signatureLength = 3;
0968: sigUniCharset = "SCSU";
0969: source.position(signatureLength);
0970: return sigUniCharset;
0971: } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
0972: && start[2] == (byte) 0x28) {
0973: signatureLength = 3;
0974: sigUniCharset = "BOCU-1";
0975: source.position(signatureLength);
0976: return sigUniCharset;
0977: } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
0978: && start[2] == (byte) 0x76) {
0979:
0980: if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
0981: signatureLength = 5;
0982: sigUniCharset = "UTF-7";
0983: source.position(signatureLength);
0984: return sigUniCharset;
0985: } else if (start[3] == (byte) 0x38
0986: || start[3] == (byte) 0x39
0987: || start[3] == (byte) 0x2B
0988: || start[3] == (byte) 0x2F) {
0989: signatureLength = 4;
0990: sigUniCharset = "UTF-7";
0991: source.position(signatureLength);
0992: return sigUniCharset;
0993: }
0994: } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
0995: && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
0996: signatureLength = 4;
0997: sigUniCharset = "UTF-EBCDIC";
0998: source.position(signatureLength);
0999: return sigUniCharset;
1000: }
1001:
1002: /* no known Unicode signature byte sequence recognized */
1003: return null;
1004: }
1005:
1006: void printbytes(ByteBuffer buf, int pos) {
1007: int cur = buf.position();
1008: log(" (" + pos + ")==[");
1009: for (int i = 0; i < pos; i++) {
1010: log("(" + i + ")" + hex(buf.get(i) & 0xff) + " ");
1011: }
1012: log("]");
1013: buf.position(cur);
1014: }
1015:
1016: void printchar(CharBuffer buf, int pos) {
1017: int cur = buf.position();
1018: log(" (" + pos + ")==[");
1019: for (int i = 0; i < pos; i++) {
1020: log("(" + i + ")" + hex(buf.get(i)) + " ");
1021: }
1022: log("]");
1023: buf.position(cur);
1024: }
1025:
1026: private boolean checkResultsFromUnicode(ConversionCase cc,
1027: ByteBuffer source,
1028:
1029: ByteBuffer target) {
1030:
1031: int len = target.position();
1032: target.limit(len); //added to stop where data ends
1033: source.rewind();
1034: target.rewind();
1035:
1036: // remove any BOM signature before checking
1037: /* String BOM =*/detectUnicodeSignature(target);
1038:
1039: len = len - target.position();
1040:
1041: if (len != source.remaining()) {
1042: errln("Test failed: output does not match expected\n");
1043: logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1044: printbytes(target, len);
1045: logln("");
1046: return false;
1047: }
1048: source.rewind();
1049: for (int i = 0; i < source.remaining(); i++) {
1050: if (target.get() != source.get()) {
1051: errln("Test failed: output does not match expected\n");
1052: logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1053: printbytes(target, len);
1054: logln("");
1055: return false;
1056: }
1057: }
1058: logln("[" + cc.caseNr + "]:" + cc.charset);
1059: log("output=");
1060: printbytes(target, len);
1061: logln("\nPassed\n");
1062: return true;
1063: }
1064:
1065: private boolean checkResultsToUnicode(ConversionCase cc,
1066: String source, CharBuffer target) {
1067:
1068: int len = target.position();
1069: target.rewind();
1070:
1071: // test to see if the conversion matches actual results
1072: if (len != source.length()) {
1073: errln("Test failed: output does not match expected\n");
1074: logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1075: printchar(target, len);
1076: return false;
1077: }
1078: for (int i = 0; i < source.length(); i++) {
1079: if (!(hex(target.get(i)).equals(hex(source.charAt(i))))) {
1080: errln("Test failed: output does not match expected\n");
1081: logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1082: printchar(target, len);
1083: return false;
1084: }
1085: }
1086: logln("[" + cc.caseNr + "]:" + cc.charset);
1087: log("output=");
1088: printchar(target, len);
1089: logln("\nPassed\n");
1090: return true;
1091: }
1092:
1093: private byte[] toByteArray(String str) {
1094: byte[] ret = new byte[str.length()];
1095: for (int i = 0; i < ret.length; i++) {
1096: char ch = str.charAt(i);
1097: if (ch <= 0xFF) {
1098: ret[i] = (byte) ch;
1099: } else {
1100: throw new IllegalArgumentException(
1101: " byte value out of range: " + ch);
1102: }
1103: }
1104: return ret;
1105: }
1106: }
|