Source Code Cross Referenced for TestConversion.java in » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » charset » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.charset
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         *******************************************************************************
0003:         * Copyright (C) 2002-2006, International Business Machines Corporation and    *
0004:         * others. All Rights Reserved.                                                *
0005:         *******************************************************************************
0006:         *
0007:         *******************************************************************************
0008:         */
0009:
0010:        package com.ibm.icu.dev.test.charset;
0011:
0012:        import java.nio.ByteBuffer;
0013:        import java.nio.CharBuffer;
0014:        import java.nio.charset.Charset;
0015:        import java.nio.charset.CharsetDecoder;
0016:        import java.nio.charset.CharsetEncoder;
0017:        import java.nio.charset.CoderResult;
0018:        import java.nio.charset.CodingErrorAction;
0019:        import java.util.Iterator;
0020:
0021:        import com.ibm.icu.charset.CharsetProviderICU;
0022:        import com.ibm.icu.dev.test.ModuleTest;
0023:        import com.ibm.icu.dev.test.TestDataModule.DataMap;
0024:        import com.ibm.icu.impl.ICUResourceBundle;
0025:
0026:        /**
0027:         * This maps to convtest.c which tests the test file for data-driven conversion tests. 
0028:         * 
0029:         */
0030:        public class TestConversion extends ModuleTest {
0031:            /**
0032:             * This maps to the C struct of conversion case in convtest.h that stores the
0033:             * data for a conversion test
0034:             * 
0035:             */
0036:            private class ConversionCase {
0037:                int caseNr; // testcase index   
0038:                String option = null; // callback options
0039:                CodingErrorAction cbErrorAction = null; // callback action type
0040:                CharBuffer toUnicodeResult = null;
0041:                ByteBuffer fromUnicodeResult = null;
0042:
0043:                // data retrieved from a test case conversion.txt
0044:                String charset; // charset
0045:                String unicode; // unicode string
0046:                ByteBuffer bytes; // byte
0047:                int[] offsets; // offsets
0048:                boolean finalFlush; // flush
0049:                boolean fallbacks; // fallback
0050:                String outErrorCode; // errorCode
0051:                String cbopt; // callback 
0052:
0053:                // TestGetUnicodeSet variables
0054:                String map;
0055:                String mapnot;
0056:                int which;
0057:            }
0058:
0059:            // public methods --------------------------------------------------------
0060:
0061:            public static void main(String[] args) throws Exception {
0062:                new TestConversion().run(args);
0063:            }
0064:
0065:            public TestConversion() {
0066:                super ("com/ibm/icu/dev/data/testdata/", "conversion");
0067:            }
0068:
0069:            /*
0070:             * This method maps to the convtest.cpp runIndexedTest() method to run each
0071:             * type of conversion.
0072:             */
0073:            public void processModules() {
0074:                try {
0075:                    int testFromUnicode = 0;
0076:                    int testToUnicode = 0;
0077:                    String testName = t.getName().toString();
0078:
0079:                    // Iterate through and get each of the test case to process
0080:                    for (Iterator iter = t.getDataIterator(); iter.hasNext();) {
0081:                        DataMap testcase = (DataMap) iter.next();
0082:
0083:                        if (testName.equalsIgnoreCase("toUnicode")) {
0084:                            TestToUnicode(testcase, testToUnicode);
0085:                            testToUnicode++;
0086:
0087:                        } else if (testName.equalsIgnoreCase("fromUnicode")) {
0088:                            TestFromUnicode(testcase, testFromUnicode);
0089:                            testFromUnicode++;
0090:                        } else if (testName.equalsIgnoreCase("getUnicodeSet")) {
0091:                            TestGetUnicodeSet(testcase);
0092:                        } else {
0093:                            warnln("Could not load the test cases for conversion");
0094:                            continue;
0095:                        }
0096:                    }
0097:                } catch (Exception e) {
0098:                    e.printStackTrace();
0099:                }
0100:
0101:            }
0102:
0103:            // private methods -------------------------------------------------------
0104:
0105:            // fromUnicode test worker functions --------------------------------------- 
0106:            private void TestFromUnicode(DataMap testcase, int caseNr) {
0107:
0108:                ConversionCase cc = new ConversionCase();
0109:                cc.caseNr = caseNr;
0110:
0111:                try {
0112:                    // retrieve test case data
0113:                    cc.charset = ((ICUResourceBundle) testcase
0114:                            .getObject("charset")).getString();
0115:                    cc.unicode = ((ICUResourceBundle) testcase
0116:                            .getObject("unicode")).getString();
0117:                    cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
0118:                            .getBinary();
0119:                    cc.offsets = ((ICUResourceBundle) testcase
0120:                            .getObject("offsets")).getIntVector();
0121:                    cc.finalFlush = ((ICUResourceBundle) testcase
0122:                            .getObject("flush")).getUInt() != 0;
0123:                    cc.fallbacks = ((ICUResourceBundle) testcase
0124:                            .getObject("fallbacks")).getUInt() != 0;
0125:                    cc.outErrorCode = ((ICUResourceBundle) testcase
0126:                            .getObject("errorCode")).getString();
0127:                    cc.cbopt = ((ICUResourceBundle) testcase
0128:                            .getObject("callback")).getString();
0129:
0130:                } catch (Exception e) {
0131:                    errln("Skipping test:");
0132:                    errln("error parsing conversion/toUnicode test case "
0133:                            + cc.caseNr);
0134:                    return;
0135:                }
0136:
0137:                // ----for debugging only
0138:                logln("\nTestFromUnicode[" + caseNr + "] " + cc.charset + " ");
0139:                logln("Unicode: " + cc.unicode);
0140:                logln("Bytes:");
0141:                printbytes(cc.bytes, cc.bytes.limit());
0142:                logln("");
0143:                logln("Callback: (" + cc.cbopt + ")");
0144:                logln("...............................................");
0145:
0146:                //         ----for debugging only
0147:                // TODO: ***Currently skipping test for charset ibm-1390, gb18030,
0148:                // ibm-930 due to external mapping need to be fix
0149:                if (cc.charset.equalsIgnoreCase("ibm-1390")
0150:                        || cc.charset.equalsIgnoreCase("gb18030")
0151:                        || cc.charset.equalsIgnoreCase("ibm-970")) {
0152:                    logln("Skipping test:("
0153:                            + cc.charset
0154:                            + ") due to ICU Charset external mapping not supported at this time");
0155:                    return;
0156:                }
0157:
0158:                // process the retrieved test data case
0159:                if (cc.offsets.length == 0) {
0160:                    cc.offsets = null;
0161:                } else if (cc.offsets.length != cc.bytes.limit()) {
0162:                    errln("fromUnicode[" + cc.caseNr + "] bytes[" + cc.bytes
0163:                            + "] and offsets[" + cc.offsets.length
0164:                            + "] must have the same length");
0165:                    return;
0166:                }
0167:
0168:                // check the callback replacement value
0169:                if (cc.cbopt.length() > 0) {
0170:
0171:                    switch ((cc.cbopt).charAt(0)) {
0172:                    case '?':
0173:                        cc.cbErrorAction = CodingErrorAction.REPLACE;
0174:                        break;
0175:                    case '0':
0176:                        cc.cbErrorAction = CodingErrorAction.IGNORE;
0177:                        break;
0178:                    case '.':
0179:                        cc.cbErrorAction = CodingErrorAction.REPORT;
0180:                        break;
0181:                    case '&':
0182:                        cc.cbErrorAction = CodingErrorAction.REPORT;
0183:                        break;
0184:                    default:
0185:                        cc.cbErrorAction = null;
0186:                        break;
0187:                    }
0188:
0189:                    // check for any options for the callback value -- 
0190:                    cc.option = cc.cbErrorAction == null ? cc.cbopt : cc.cbopt
0191:                            .substring(1);
0192:                    if (cc.option == null) {
0193:                        cc.option = null;
0194:                    }
0195:                }
0196:                logln("TestFromUnicode[" + cc.caseNr + "] " + cc.charset);
0197:                FromUnicodeCase(cc);
0198:
0199:                return;
0200:
0201:            }
0202:
0203:            private void FromUnicodeCase(ConversionCase cc) {
0204:
0205:                // create charset encoder for conversion test
0206:                CharsetProviderICU provider = new CharsetProviderICU();
0207:                CharsetEncoder encoder = null;
0208:                Charset charset = null;
0209:                try {
0210:                    charset = (Charset) provider.charsetForName(cc.charset);
0211:                    encoder = (CharsetEncoder) charset.newEncoder();
0212:                    encoder.onMalformedInput(CodingErrorAction.REPLACE);
0213:                    encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
0214:
0215:                } catch (Exception e) {
0216:
0217:                    logln("Skipping test:(" + cc.charset
0218:                            + ") due to ICU Charset not supported at this time");
0219:                    return;
0220:
0221:                }
0222:
0223:                // set the callback for the encoder 
0224:                if (cc.cbErrorAction != null) {
0225:                    encoder.onUnmappableCharacter(cc.cbErrorAction);
0226:                    encoder.onMalformedInput(cc.cbErrorAction);
0227:
0228:                    // if action has an option, put in the option for the case
0229:                    if (cc.option.equals("i")) {
0230:                        encoder.onMalformedInput(CodingErrorAction.REPORT);
0231:                    }
0232:
0233:                    // if callback action is replace, and there is a subchar
0234:                    // replace the decoder's default replacement value
0235:                    // if substring, skip test due to current api not supporting
0236:                    // substring
0237:                    if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
0238:                        if (cc.cbopt.length() > 1) {
0239:                            if (cc.cbopt.length() > 1
0240:                                    && cc.cbopt.charAt(1) == '=') {
0241:                                logln("Skipping test due to limitation in Java API - substitution string not supported");
0242:                                return;
0243:                            } else {
0244:                                // // read NUL-separated subchar first, if any
0245:                                // copy the subchar from Latin-1 characters
0246:                                // start after the NUL
0247:                                if (cc.cbopt.charAt(1) == 0x00) {
0248:                                    cc.cbopt = cc.cbopt.substring(2);
0249:
0250:                                    try {
0251:                                        encoder
0252:                                                .replaceWith(toByteArray(cc.cbopt));
0253:                                    } catch (Exception e) {
0254:                                        logln("Skipping test due to limitation in Java API - substitution character sequence size error");
0255:                                        return;
0256:                                    }
0257:                                }
0258:                            }
0259:                        }
0260:                    }
0261:                }
0262:
0263:                // do charset encoding from unicode
0264:
0265:                // testing by steps using charset.encoder(in,out,flush)
0266:                int resultLength;
0267:                boolean ok;
0268:                String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
0269:                        { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
0270:                int i, step;
0271:
0272:                ok = true;
0273:
0274:                for (i = 0; i < steps.length && ok; ++i) {
0275:                    step = Integer.parseInt(steps[i][0]);
0276:
0277:                    logln("Testing step:[" + step + "]");
0278:                    resultLength = stepFromUnicode(cc, encoder, step);
0279:                    ok = checkFromUnicode(cc, resultLength);
0280:
0281:                }
0282:                // testing by whole buffer using out = charset.encoder(in)
0283:                while (ok && cc.finalFlush) {
0284:                    logln("Testing java API charset.encoder(in):");
0285:                    cc.fromUnicodeResult = null;
0286:                    ByteBuffer out = null;
0287:
0288:                    try {
0289:                        out = encoder.encode(CharBuffer.wrap(cc.unicode
0290:                                .toCharArray()));
0291:                        out.position(out.limit());
0292:                        if (out.limit() != out.capacity()) {
0293:                            int pos = out.position();
0294:                            byte[] temp = out.array();
0295:                            out = ByteBuffer.allocate(temp.length * 4);
0296:                            out.put(temp);
0297:                            out.position(pos);
0298:                            CoderResult cr = encoder.flush(out);
0299:                            if (cr.isOverflow()) {
0300:                                logln("Overflow error with flushing encoder");
0301:                            }
0302:                        }
0303:                        cc.fromUnicodeResult = out;
0304:
0305:                        ok = checkFromUnicode(cc, out.limit());
0306:                        if (!ok) {
0307:                            break;
0308:                        }
0309:                    } catch (Exception e) {
0310:                        //check the error code to see if it matches cc.errorCode
0311:                        logln("Encoder returned an error code");
0312:                        logln("ErrorCode expected is: " + cc.outErrorCode);
0313:                        logln("Error Result is: " + e.toString());
0314:                    }
0315:                    break;
0316:                }
0317:
0318:                return;
0319:
0320:            }
0321:
0322:            private int stepFromUnicode(ConversionCase cc,
0323:                    CharsetEncoder encoder, int step) {
0324:
0325:                CharBuffer source;
0326:                ByteBuffer target;
0327:                int sourceLen;
0328:                boolean flush;
0329:                source = CharBuffer.wrap(cc.unicode.toCharArray());
0330:                sourceLen = cc.unicode.length();
0331:
0332:                target = ByteBuffer
0333:                        .allocate(cc.bytes.capacity() + 4/* for BOM */);
0334:                target.position(0);
0335:                source.position(0);
0336:                cc.fromUnicodeResult = null;
0337:                encoder.reset();
0338:
0339:                if (step >= 0) {
0340:
0341:                    int iStep = step;
0342:                    int oStep = step;
0343:
0344:                    for (;;) {
0345:
0346:                        if (step != 0) {
0347:                            source.limit((iStep < sourceLen) ? iStep
0348:                                    : sourceLen);
0349:                            target.limit((oStep < target.capacity()) ? oStep
0350:                                    : target.capacity());
0351:                            flush = (cc.finalFlush && source.limit() == sourceLen);
0352:                        } else {
0353:                            source.limit(sourceLen);
0354:                            target.limit(target.capacity());
0355:                            flush = cc.finalFlush;
0356:                        }
0357:                        CoderResult cr = null;
0358:                        // convert
0359:                        if (source.hasRemaining()) {
0360:
0361:                            cr = encoder.encode(source, target, flush);
0362:
0363:                            // check pointers and errors
0364:                            if (cr.isOverflow()) {
0365:                                // the partial target is filled, set a new limit, reset
0366:                                // the error and continue
0367:                                target
0368:                                        .limit(((target.position() + step) < target
0369:                                                .capacity()) ? target
0370:                                                .position()
0371:                                                + step : target.capacity());
0372:
0373:                            } else if (cr.isError()) {
0374:                                // check the error code to see if it matches
0375:                                // cc.errorCode
0376:                                logln("Encoder returned an error code");
0377:                                logln("ErrorCode expected is: "
0378:                                        + cc.outErrorCode);
0379:                                logln("Error Result is: " + cr.toString());
0380:                                break;
0381:                            }
0382:                        } else {
0383:
0384:                            if (source.limit() == sourceLen) {
0385:                                cr = encoder.encode(source, target, true);
0386:                                if (target.limit() != target.capacity()) {
0387:                                    target.limit(target.capacity());
0388:                                }
0389:                                cr = encoder.flush(target);
0390:
0391:                                if (cr.isError()) {
0392:                                    errln("Flush operation failed");
0393:                                }
0394:                                break;
0395:                            }
0396:                        }
0397:                        iStep += step;
0398:                        oStep += step;
0399:                    }
0400:                }
0401:                cc.fromUnicodeResult = target;
0402:                return target.position();
0403:            }
0404:
0405:            private boolean checkFromUnicode(ConversionCase cc, int resultLength) {
0406:
0407:                // check everything that might have gone wrong
0408:                if (cc.bytes.limit() != resultLength) {
0409:                    if (checkResultsFromUnicode(cc, cc.bytes,
0410:                            cc.fromUnicodeResult)) {
0411:                        return true;
0412:                    }
0413:                    logln("fromUnicode[" + cc.caseNr + "](" + cc.charset
0414:                            + ") callback:" + cc.cbopt + " failed: +"
0415:                            + "wrong result length" + "\n");
0416:                    return false;
0417:                }
0418:                if (!checkResultsFromUnicode(cc, cc.bytes, cc.fromUnicodeResult)) {
0419:                    logln("fromUnicode[" + cc.caseNr + "](" + cc.charset
0420:                            + ") callback:" + cc.cbopt + " failed: +"
0421:                            + "wrong result string" + "\n");
0422:                    return false;
0423:                }
0424:
0425:                return true;
0426:            }
0427:
0428:            // toUnicode test worker functions ----------------------------------------- ***
0429:
0430:            private void TestToUnicode(DataMap testcase, int caseNr) {
0431:                // create Conversion case to store the test case data
0432:                ConversionCase cc = new ConversionCase();
0433:
0434:                try {
0435:                    // retrieve test case data
0436:                    cc.caseNr = caseNr;
0437:                    cc.charset = ((ICUResourceBundle) testcase
0438:                            .getObject("charset")).getString();
0439:                    cc.bytes = ((ICUResourceBundle) testcase.getObject("bytes"))
0440:                            .getBinary();
0441:                    cc.unicode = ((ICUResourceBundle) testcase
0442:                            .getObject("unicode")).getString();
0443:                    cc.offsets = ((ICUResourceBundle) testcase
0444:                            .getObject("offsets")).getIntVector();
0445:                    cc.finalFlush = ((ICUResourceBundle) testcase
0446:                            .getObject("flush")).getUInt() != 0;
0447:                    cc.fallbacks = ((ICUResourceBundle) testcase
0448:                            .getObject("fallbacks")).getUInt() != 0;
0449:                    cc.outErrorCode = ((ICUResourceBundle) testcase
0450:                            .getObject("errorCode")).getString();
0451:                    cc.cbopt = ((ICUResourceBundle) testcase
0452:                            .getObject("callback")).getString();
0453:
0454:                } catch (Exception e) {
0455:                    errln("Skipping test: error parsing conversion/toUnicode test case "
0456:                            +
0457:
0458:                            cc.caseNr);
0459:                    return;
0460:                }
0461:
0462:                // ----for debugging only
0463:                logln("\nTestToUnicode[" + caseNr + "] " + cc.charset + " ");
0464:                logln("Bytes:");
0465:                printbytes(cc.bytes, cc.bytes.limit());
0466:                logln("");
0467:                logln("Unicode: " + hex(cc.unicode));
0468:                logln("Callback: (" + cc.cbopt + ")");
0469:                ByteBuffer c = ByteBuffer.wrap(cc.cbopt.getBytes());
0470:                printbytes(c, c.limit());
0471:                logln("\n...............................................");
0472:
0473:                // ----for debugging only
0474:
0475:                // TODO: This test case is skipped due to limitation in java's API for
0476:                // decoder replacement
0477:                // { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 },
0478:                // :int{1}, :int{0}, "", "?", :bin{""} }
0479:                if (cc.caseNr == 63) {
0480:                    logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
0481:                    logln("Skipping test due to limitation in Java API - callback replacement value");
0482:                    return;
0483:                }
0484:                // process the retrieved test data case
0485:                if (cc.offsets.length == 0) {
0486:                    cc.offsets = null;
0487:                } else if (cc.offsets.length != cc.unicode.length()) {
0488:                    errln("Skipping test: toUnicode[" + cc.caseNr
0489:                            + "] unicode[" + cc.unicode.length()
0490:                            + "] and offsets[" + cc.offsets.length
0491:                            + "] must have the same length");
0492:                    return;
0493:                }
0494:                // check for the callback replacement value for unmappable
0495:                // characters or malformed errors
0496:                if (cc.cbopt.length() > 0) {
0497:                    switch ((cc.cbopt).charAt(0)) {
0498:                    case '?': // CALLBACK_SUBSTITUTE
0499:                        cc.cbErrorAction = CodingErrorAction.REPLACE;
0500:                        break;
0501:                    case '0': // CALLBACK_SKIP
0502:                        cc.cbErrorAction = CodingErrorAction.IGNORE;
0503:                        break;
0504:                    case '.': // CALLBACK_STOP
0505:                        cc.cbErrorAction = CodingErrorAction.REPORT;
0506:                        break;
0507:                    case '&': // CALLBACK_ESCAPE
0508:                        cc.cbErrorAction = CodingErrorAction.REPORT;
0509:                        break;
0510:                    default:
0511:                        cc.cbErrorAction = null;
0512:                        break;
0513:                    }
0514:                }
0515:                // check for any options for the callback value
0516:                cc.option = cc.cbErrorAction == null ? null : cc.cbopt
0517:                        .substring(1);
0518:                if (cc.option == null) {
0519:                    cc.option = null;
0520:                }
0521:
0522:                logln("TestToUnicode[" + cc.caseNr + "] " + cc.charset);
0523:                ToUnicodeCase(cc);
0524:
0525:            }
0526:
0527:            private void ToUnicodeCase(ConversionCase cc) {
0528:
0529:                // create converter for charset and decoder for each test case
0530:                CharsetProviderICU provider = new CharsetProviderICU();
0531:                CharsetDecoder decoder = null;
0532:                Charset charset = null;
0533:
0534:                try {
0535:                    charset = (Charset) provider.charsetForName(cc.charset);
0536:                    decoder = (CharsetDecoder) charset.newDecoder();
0537:                    decoder.onMalformedInput(CodingErrorAction.REPLACE);
0538:                    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
0539:
0540:                } catch (Exception e) {
0541:
0542:                    logln("Skipping test:(" + cc.charset
0543:                            + ") due to ICU Charset not supported at this time");
0544:                    return;
0545:                }
0546:
0547:                // set the callback for the decoder
0548:                if (cc.cbErrorAction != null) {
0549:                    decoder.onMalformedInput(cc.cbErrorAction);
0550:                    decoder.onUnmappableCharacter(cc.cbErrorAction);
0551:
0552:                    // set the options (if any: SKIP_STOP_ON_ILLEGAL) for callback
0553:                    if (cc.option.equals("i")) {
0554:                        decoder.onMalformedInput(CodingErrorAction.REPORT);
0555:                    }
0556:
0557:                    // if callback action is replace, and there is a subchar
0558:                    // replace the decoder's default replacement value
0559:                    // if substring, skip test due to current api not supporting
0560:                    // substring replacement
0561:                    if (cc.cbErrorAction.equals(CodingErrorAction.REPLACE)) {
0562:                        if (cc.cbopt.length() > 1) {
0563:                            if (cc.cbopt.charAt(1) == '=') {
0564:                                logln("Skipping test due to limitation in Java API - substitution string not supported");
0565:
0566:                            } else {
0567:                                // // read NUL-separated subchar first, if any
0568:                                // copy the subchar from Latin-1 characters
0569:                                // start after the NUL
0570:                                if (cc.cbopt.charAt(1) == 0x00) {
0571:                                    cc.cbopt = cc.cbopt.substring(2);
0572:
0573:                                    try {
0574:                                        decoder.replaceWith(cc.cbopt);
0575:                                    } catch (Exception e) {
0576:                                        logln("Skipping test due to limitation in Java API - substitution character sequence size error");
0577:
0578:                                    }
0579:                                }
0580:                            }
0581:                        }
0582:                    }
0583:                }
0584:
0585:                //      Check the step to unicode    
0586:                boolean ok;
0587:                int resultLength;
0588:
0589:                String steps[][] = { { "0", "bulk" }, // must be first for offsets to be checked
0590:                        { "1", "step=1" }, { "3", "step=3" }, { "7", "step=7" } };
0591:                /* TODO: currently not supported test steps, getNext API is not supported for now  
0592:                 { "-1", "getNext" },
0593:                 { "-2", "toU(bulk)+getNext" },
0594:                 { "-3", "getNext+toU(bulk)" },
0595:                 { "-4", "toU(1)+getNext" },
0596:                 { "-5", "getNext+toU(1)" },
0597:                 { "-12", "toU(5)+getNext" },
0598:                 { "-13", "getNext+toU(5)" }};*/
0599:
0600:                ok = true;
0601:                int step;
0602:                // testing by steps using the CoderResult cr = charset.decoder(in,out,flush) api
0603:                for (int i = 0; i < steps.length && ok; ++i) {
0604:                    step = Integer.parseInt(steps[i][0]);
0605:
0606:                    if (step < 0 && !cc.finalFlush) {
0607:                        continue;
0608:                    }
0609:                    logln("Testing step:[" + step + "]");
0610:                    resultLength = stepToUnicode(cc, decoder, step);
0611:                    ok = checkToUnicode(cc, resultLength);
0612:                }
0613:
0614:                //testing the java's out = charset.decoder(in) api
0615:                while (ok && cc.finalFlush) {
0616:                    logln("Testing java charset.decoder(in):");
0617:                    cc.toUnicodeResult = null;
0618:                    CharBuffer out = null;
0619:
0620:                    try {
0621:                        out = decoder.decode(ByteBuffer.wrap(cc.bytes.array()));
0622:                        out.position(out.limit());
0623:                        if (out.limit() < cc.unicode.length()) {
0624:                            int pos = out.position();
0625:                            char[] temp = out.array();
0626:                            out = CharBuffer.allocate(cc.bytes.limit());
0627:                            out.put(temp);
0628:                            out.position(pos);
0629:                            CoderResult cr = decoder.flush(out);
0630:                            if (cr.isOverflow()) {
0631:                                logln("Overflow error with flushing decodering");
0632:                            }
0633:                        }
0634:
0635:                        cc.toUnicodeResult = out;
0636:
0637:                        ok = checkToUnicode(cc, out.limit());
0638:                        if (!ok) {
0639:                            break;
0640:                        }
0641:                    } catch (Exception e) {
0642:                        //check the error code to see if it matches cc.errorCode
0643:                        logln("Decoder returned an error code");
0644:                        logln("ErrorCode expected is: " + cc.outErrorCode);
0645:                        logln("Error Result is: " + e.toString());
0646:                    }
0647:                    break;
0648:                }
0649:
0650:                return;
0651:            }
0652:
0653:            private int stepToUnicode(ConversionCase cc,
0654:                    CharsetDecoder decoder, int step)
0655:
0656:            {
0657:                ByteBuffer source;
0658:                CharBuffer target;
0659:                boolean flush = false;
0660:                int sourceLen;
0661:                source = cc.bytes;
0662:                sourceLen = cc.bytes.limit();
0663:                source.position(0);
0664:                target = CharBuffer.allocate(cc.unicode.length() + 4);
0665:                target.position(0);
0666:                cc.toUnicodeResult = null;
0667:                decoder.reset();
0668:
0669:                if (step >= 0) {
0670:
0671:                    int iStep = step;
0672:                    int oStep = step;
0673:
0674:                    for (;;) {
0675:
0676:                        if (step != 0) {
0677:                            source.limit((iStep <= sourceLen) ? iStep
0678:                                    : sourceLen);
0679:                            target.limit((oStep <= target.capacity()) ? oStep
0680:                                    : target.capacity());
0681:                            flush = (cc.finalFlush && source.limit() == sourceLen);
0682:
0683:                        } else {
0684:                            //bulk mode
0685:                            source.limit(sourceLen);
0686:                            target.limit(target.capacity());
0687:                            flush = cc.finalFlush;
0688:                        }
0689:                        // convert 
0690:                        CoderResult cr = null;
0691:                        if (source.hasRemaining()) {
0692:
0693:                            cr = decoder.decode(source, target, flush);
0694:                            // check pointers and errors
0695:                            if (cr.isOverflow()) {
0696:                                // the partial target is filled, set a new limit, 
0697:                                oStep = (target.position() + step);
0698:                                target
0699:                                        .limit((oStep < target.capacity()) ? oStep
0700:                                                : target.capacity());
0701:                                if (target.limit() > target.capacity()) {
0702:                                    //target has reached its limit, an error occurred or test case has an error code
0703:                                    //check error code
0704:                                    logln("UnExpected error: Target Buffer is larger than capacity");
0705:                                    break;
0706:                                }
0707:
0708:                            } else if (cr.isError()) {
0709:                                //check the error code to see if it matches cc.errorCode
0710:                                logln("Decoder returned an error code");
0711:                                logln("ErrorCode expected is: "
0712:                                        + cc.outErrorCode);
0713:                                logln("Error Result is: " + cr.toString());
0714:                                break;
0715:                            }
0716:
0717:                        } else {
0718:                            if (source.limit() == sourceLen) {
0719:
0720:                                cr = decoder.decode(source, target, true);
0721:
0722:                                //due to limitation of the API we need to check for target limit for expected 
0723:                                if (target.limit() != cc.unicode.length()) {
0724:                                    target.limit(cc.unicode.length());
0725:                                    cr = decoder.flush(target);
0726:                                    if (cr.isError()) {
0727:                                        errln("Flush operation failed");
0728:                                    }
0729:                                }
0730:                                break;
0731:                            }
0732:                        }
0733:                        iStep += step;
0734:                        oStep += step;
0735:
0736:                    }
0737:
0738:                }// if(step ==0)
0739:
0740:                //--------------------------------------------------------------------------
0741:                else /* step<0 */{
0742:                    /*
0743:                     * step==-1: call only ucnv_getNextUChar()
0744:                     * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
0745:                     *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
0746:                     *   else give it at most (-step-2)/2 bytes
0747:                     */
0748:
0749:                    for (;;) {
0750:                        // convert
0751:                        if ((step & 1) != 0 /* odd: -1, -3, -5, ... */) {
0752:
0753:                            target
0754:                                    .limit(target.position() < target
0755:                                            .capacity() ? target.position() + 1
0756:                                            : target.capacity());
0757:
0758:                            // decode behavior is return to output target 1 character
0759:                            CoderResult cr = null;
0760:
0761:                            //similar to getNextUChar() , input is the whole string, while outputs only 1 character
0762:                            source.limit(sourceLen);
0763:                            while (target.position() != target.limit()
0764:                                    && source.hasRemaining()) {
0765:                                cr = decoder.decode(source, target, source
0766:                                        .limit() == sourceLen);
0767:
0768:                                if (cr.isOverflow()) {
0769:
0770:                                    if (target.limit() >= target.capacity()) {
0771:                                        // target has reached its limit, an error occurred 
0772:                                        logln("UnExpected error: Target Buffer is larger than capacity");
0773:                                        break;
0774:                                    } else {
0775:                                        //1 character has been consumed
0776:                                        target.limit(target.position() + 1);
0777:                                        break;
0778:                                    }
0779:                                } else if (cr.isError()) {
0780:                                    logln("Decoder returned an error code");
0781:                                    logln("ErrorCode expected is: "
0782:                                            + cc.outErrorCode);
0783:                                    logln("Error Result is: " + cr.toString());
0784:
0785:                                    cc.toUnicodeResult = target;
0786:                                    return target.position();
0787:                                }
0788:
0789:                                else {
0790:                                    // one character has been consumed
0791:                                    if (target.limit() == target.position()) {
0792:                                        target.limit(target.position() + 1);
0793:                                        break;
0794:                                    }
0795:                                }
0796:
0797:                            }
0798:                            if (source.position() == sourceLen) {
0799:
0800:                                // due to limitation of the API we need to check
0801:                                // for target limit for expected
0802:                                cr = decoder.decode(source, target, true);
0803:                                if (target.position() != cc.unicode.length()) {
0804:
0805:                                    target.limit(cc.unicode.length());
0806:                                    cr = decoder.flush(target);
0807:                                    if (cr.isError()) {
0808:                                        errln("Flush operation failed");
0809:                                    }
0810:                                }
0811:                                break;
0812:                            }
0813:                            // alternate between -n-1 and -n but leave -1 alone
0814:                            if (step < -1) {
0815:                                ++step;
0816:                            }
0817:                        } else {/* step is even */
0818:                            // allow only one UChar output
0819:
0820:                            target
0821:                                    .limit(target.position() < target
0822:                                            .capacity() ? target.position() + 1
0823:                                            : target.capacity());
0824:                            if (step == -2) {
0825:                                source.limit(sourceLen);
0826:                            } else {
0827:                                source.limit(source.position() + (-step - 2)
0828:                                        / 2);
0829:                                if (source.limit() > sourceLen) {
0830:                                    source.limit(sourceLen);
0831:                                }
0832:                            }
0833:                            CoderResult cr = decoder.decode(source, target,
0834:                                    source.limit() == sourceLen);
0835:                            // check pointers and errors 
0836:                            if (cr.isOverflow()) {
0837:                                // one character has been consumed
0838:                                if (target.limit() >= target.capacity()) {
0839:                                    // target has reached its limit, an error occurred
0840:                                    logln("Unexpected error: Target Buffer is larger than capacity");
0841:                                    break;
0842:                                }
0843:                            } else if (cr.isError()) {
0844:                                logln("Decoder returned an error code");
0845:                                logln("ErrorCode expected is: "
0846:                                        + cc.outErrorCode);
0847:                                logln("Error Result is: " + cr.toString());
0848:                                break;
0849:                            }
0850:
0851:                            --step;
0852:                        }
0853:                    }
0854:                }
0855:
0856:                //--------------------------------------------------------------------------
0857:
0858:                cc.toUnicodeResult = target;
0859:                return target.position();
0860:            }
0861:
0862:            private boolean checkToUnicode(ConversionCase cc, int resultLength) {
0863:
0864:                // check everything that might have gone wrong
0865:                if (cc.unicode.length() != resultLength) {
0866:                    logln("toUnicode[" + cc.caseNr + "](" + cc.charset
0867:                            + ") callback:" + cc.cbopt + " failed: +"
0868:                            + "wrong result length" + "\n");
0869:                    checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult);
0870:                    return false;
0871:                }
0872:                if (!checkResultsToUnicode(cc, cc.unicode, cc.toUnicodeResult)) {
0873:                    logln("toUnicode[" + cc.caseNr + "](" + cc.charset
0874:                            + ") callback:" + cc.cbopt + " failed: +"
0875:                            + "wrong result string" + "\n");
0876:                    return false;
0877:                }
0878:
0879:                return true;
0880:
0881:            }
0882:
0883:            private void TestGetUnicodeSet(DataMap testcase) {
0884:                /*
0885:                 * charset - will be opened, and ucnv_getUnicodeSet() called on it //
0886:                 * map - set of code points and strings that must be in the returned set //
0887:                 * mapnot - set of code points and strings that must *not* be in the //
0888:                 * returned set // which - numeric UConverterUnicodeSet value Headers {
0889:                 * "charset", "map", "mapnot", "which" }
0890:                 */
0891:                ConversionCase cc = new ConversionCase();
0892:                // retrieve test case data
0893:                cc.charset = ((ICUResourceBundle) testcase.getObject("charset"))
0894:                        .getString();
0895:                cc.map = ((ICUResourceBundle) testcase.getObject("map"))
0896:                        .getString();
0897:                cc.mapnot = ((ICUResourceBundle) testcase.getObject("mapnot"))
0898:                        .getString();
0899:                cc.which = ((ICUResourceBundle) testcase.getObject("which"))
0900:                        .getUInt();
0901:
0902:                // create charset and encoder for each test case
0903:                logln("Test not supported at this time");
0904:
0905:            }
0906:
0907:            /**
0908:             * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
0909:             * start of the stream for example U+FEFF (the Unicode BOM/signature
0910:             * character) that can be ignored.
0911:             * 
0912:             * Detects Unicode signature byte sequences at the start of the byte stream
0913:             * and returns number of bytes of the BOM of the indicated Unicode charset.
0914:             * 0 is returned when no Unicode signature is recognized.
0915:             * 
0916:             */
0917:
0918:            private String detectUnicodeSignature(ByteBuffer source) {
0919:                int signatureLength = 0; // number of bytes of the signature
0920:                final int SIG_MAX_LEN = 5;
0921:                String sigUniCharset = null; // states what unicode charset is the BOM
0922:                int i = 0;
0923:
0924:                /*
0925:                 * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
0926:                 * don't misdetect something
0927:                 */
0928:                byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
0929:                        (byte) 0xa5, (byte) 0xa5 };
0930:
0931:                while (i < source.remaining() && i < SIG_MAX_LEN) {
0932:                    start[i] = source.get(i);
0933:                    i++;
0934:                }
0935:
0936:                if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
0937:                    signatureLength = 2;
0938:                    sigUniCharset = "UTF-16BE";
0939:                    source.position(signatureLength);
0940:                    return sigUniCharset;
0941:                } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
0942:                    if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
0943:                        signatureLength = 4;
0944:                        sigUniCharset = "UTF-32LE";
0945:                        source.position(signatureLength);
0946:                        return sigUniCharset;
0947:                    } else {
0948:                        signatureLength = 2;
0949:                        sigUniCharset = "UTF-16LE";
0950:                        source.position(signatureLength);
0951:                        return sigUniCharset;
0952:                    }
0953:                } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
0954:                        && start[2] == (byte) 0xBF) {
0955:                    signatureLength = 3;
0956:                    sigUniCharset = "UTF-8";
0957:                    source.position(signatureLength);
0958:                    return sigUniCharset;
0959:                } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
0960:                        && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
0961:                    signatureLength = 4;
0962:                    sigUniCharset = "UTF-32BE";
0963:                    source.position(signatureLength);
0964:                    return sigUniCharset;
0965:                } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
0966:                        && start[2] == (byte) 0xFF) {
0967:                    signatureLength = 3;
0968:                    sigUniCharset = "SCSU";
0969:                    source.position(signatureLength);
0970:                    return sigUniCharset;
0971:                } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
0972:                        && start[2] == (byte) 0x28) {
0973:                    signatureLength = 3;
0974:                    sigUniCharset = "BOCU-1";
0975:                    source.position(signatureLength);
0976:                    return sigUniCharset;
0977:                } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
0978:                        && start[2] == (byte) 0x76) {
0979:
0980:                    if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
0981:                        signatureLength = 5;
0982:                        sigUniCharset = "UTF-7";
0983:                        source.position(signatureLength);
0984:                        return sigUniCharset;
0985:                    } else if (start[3] == (byte) 0x38
0986:                            || start[3] == (byte) 0x39
0987:                            || start[3] == (byte) 0x2B
0988:                            || start[3] == (byte) 0x2F) {
0989:                        signatureLength = 4;
0990:                        sigUniCharset = "UTF-7";
0991:                        source.position(signatureLength);
0992:                        return sigUniCharset;
0993:                    }
0994:                } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
0995:                        && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
0996:                    signatureLength = 4;
0997:                    sigUniCharset = "UTF-EBCDIC";
0998:                    source.position(signatureLength);
0999:                    return sigUniCharset;
1000:                }
1001:
1002:                /* no known Unicode signature byte sequence recognized */
1003:                return null;
1004:            }
1005:
1006:            void printbytes(ByteBuffer buf, int pos) {
1007:                int cur = buf.position();
1008:                log(" (" + pos + ")==[");
1009:                for (int i = 0; i < pos; i++) {
1010:                    log("(" + i + ")" + hex(buf.get(i) & 0xff) + " ");
1011:                }
1012:                log("]");
1013:                buf.position(cur);
1014:            }
1015:
1016:            void printchar(CharBuffer buf, int pos) {
1017:                int cur = buf.position();
1018:                log(" (" + pos + ")==[");
1019:                for (int i = 0; i < pos; i++) {
1020:                    log("(" + i + ")" + hex(buf.get(i)) + " ");
1021:                }
1022:                log("]");
1023:                buf.position(cur);
1024:            }
1025:
1026:            private boolean checkResultsFromUnicode(ConversionCase cc,
1027:                    ByteBuffer source,
1028:
1029:                    ByteBuffer target) {
1030:
1031:                int len = target.position();
1032:                target.limit(len); //added to stop where data ends
1033:                source.rewind();
1034:                target.rewind();
1035:
1036:                // remove any BOM signature before checking
1037:                /* String BOM =*/detectUnicodeSignature(target);
1038:
1039:                len = len - target.position();
1040:
1041:                if (len != source.remaining()) {
1042:                    errln("Test failed: output does not match expected\n");
1043:                    logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1044:                    printbytes(target, len);
1045:                    logln("");
1046:                    return false;
1047:                }
1048:                source.rewind();
1049:                for (int i = 0; i < source.remaining(); i++) {
1050:                    if (target.get() != source.get()) {
1051:                        errln("Test failed: output does not match expected\n");
1052:                        logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1053:                        printbytes(target, len);
1054:                        logln("");
1055:                        return false;
1056:                    }
1057:                }
1058:                logln("[" + cc.caseNr + "]:" + cc.charset);
1059:                log("output=");
1060:                printbytes(target, len);
1061:                logln("\nPassed\n");
1062:                return true;
1063:            }
1064:
1065:            private boolean checkResultsToUnicode(ConversionCase cc,
1066:                    String source, CharBuffer target) {
1067:
1068:                int len = target.position();
1069:                target.rewind();
1070:
1071:                // test to see if the conversion matches actual results
1072:                if (len != source.length()) {
1073:                    errln("Test failed: output does not match expected\n");
1074:                    logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1075:                    printchar(target, len);
1076:                    return false;
1077:                }
1078:                for (int i = 0; i < source.length(); i++) {
1079:                    if (!(hex(target.get(i)).equals(hex(source.charAt(i))))) {
1080:                        errln("Test failed: output does not match expected\n");
1081:                        logln("[" + cc.caseNr + "]:" + cc.charset + "\noutput=");
1082:                        printchar(target, len);
1083:                        return false;
1084:                    }
1085:                }
1086:                logln("[" + cc.caseNr + "]:" + cc.charset);
1087:                log("output=");
1088:                printchar(target, len);
1089:                logln("\nPassed\n");
1090:                return true;
1091:            }
1092:
1093:            private byte[] toByteArray(String str) {
1094:                byte[] ret = new byte[str.length()];
1095:                for (int i = 0; i < ret.length; i++) {
1096:                    char ch = str.charAt(i);
1097:                    if (ch <= 0xFF) {
1098:                        ret[i] = (byte) ch;
1099:                    } else {
1100:                        throw new IllegalArgumentException(
1101:                                " byte value out of range: " + ch);
1102:                    }
1103:                }
1104:                return ret;
1105:            }
1106:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.