Source Code Cross Referenced for RBBITestExtended.java in  » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » rbbi » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.rbbi 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Created on May 5, 2004
003:         * 
004:         * Copyright (C) 2004-2006 International Business Machines Corporation and others.
005:         * All Rights Reserved.
006:         *
007:         */
008:        package com.ibm.icu.dev.test.rbbi;
009:
010:        import com.ibm.icu.dev.test.TestFmwk;
011:        import com.ibm.icu.impl.Utility;
012:        import com.ibm.icu.text.BreakIterator;
013:        import com.ibm.icu.text.RuleBasedBreakIterator;
014:        import com.ibm.icu.lang.UCharacter;
015:        import com.ibm.icu.text.UTF16;
016:        import com.ibm.icu.util.ULocale;
017:        import java.io.InputStream;
018:        import java.io.InputStreamReader;
019:        import java.io.IOException;
020:        import java.util.Arrays;
021:
022:        /**
023:         * Rule based break iterator data driven test.
024:         *      Perform the tests from the file rbbitst.txt.
025:         *      The test data file is common to both ICU4C and ICU4J.
026:         *      See the data file for a description of the tests.
027:         *
028:         */
029:        public class RBBITestExtended extends TestFmwk {
030:
031:            public static void main(String[] args) throws Exception {
032:                new RBBITestExtended().run(args);
033:            }
034:
035:            public RBBITestExtended() {
036:            }
037:
038:            static class TestParams {
039:                BreakIterator bi;
040:                StringBuffer dataToBreak = new StringBuffer();
041:                int[] expectedBreaks = new int[1000];
042:                int[] srcLine = new int[1000];
043:                int[] srcCol = new int[1000];
044:                ULocale currentLocale = new ULocale("en_US");
045:            }
046:
047:            public void TestExtended() {
048:
049:                TestParams tp = new TestParams();
050:
051:                //
052:                //  Open and read the test data file.
053:                //
054:                InputStreamReader isr = null;
055:                StringBuffer testFileBuf = new StringBuffer();
056:                try {
057:                    InputStream is = RBBITestExtended.class
058:                            .getResourceAsStream("rbbitst.txt");
059:                    if (is == null) {
060:                        errln("Could not open test data file rbbitst.txt");
061:                        return;
062:                    }
063:                    isr = new InputStreamReader(is, "UTF-8");
064:                    int c;
065:                    int count = 0;
066:                    for (;;) {
067:                        c = isr.read();
068:                        if (c < 0) {
069:                            break;
070:                        }
071:                        count++;
072:                        if (c == 0xFEFF && count == 1) {
073:                            // BOM in the test data file.  Discard it.
074:                            continue;
075:                        }
076:
077:                        UTF16.append(testFileBuf, c);
078:                    }
079:
080:                } catch (IOException e) {
081:                    errln(e.toString());
082:                    return;
083:                }
084:
085:                String testString = testFileBuf.toString();
086:
087:                final int PARSE_COMMENT = 1;
088:                final int PARSE_TAG = 2;
089:                final int PARSE_DATA = 3;
090:                final int PARSE_NUM = 4;
091:
092:                int parseState = PARSE_TAG;
093:
094:                int savedState = PARSE_TAG;
095:
096:                final char CH_LF = 0x0a;
097:                final char CH_CR = 0x0d;
098:                final char CH_HASH = 0x23;
099:                /*static const UChar CH_PERIOD    = 0x2e;*/
100:                final char CH_LT = 0x3c;
101:                final char CH_GT = 0x3e;
102:                final char CH_BACKSLASH = 0x5c;
103:                final char CH_BULLET = 0x2022;
104:
105:                int lineNum = 1;
106:                int colStart = 0;
107:                int column = 0;
108:                int charIdx = 0;
109:                int i;
110:
111:                int tagValue = 0; // The numeric value of a <nnn> tag.
112:                int len = testString.length();
113:
114:                for (charIdx = 0; charIdx < len;) {
115:                    int c = UTF16.charAt(testString, charIdx);
116:                    charIdx++;
117:                    if (c == CH_CR && charIdx < len
118:                            && testString.charAt(charIdx) == CH_LF) {
119:                        // treat CRLF as a unit
120:                        c = CH_LF;
121:                        charIdx++;
122:                    }
123:                    if (c == CH_LF || c == CH_CR) {
124:                        lineNum++;
125:                        colStart = charIdx;
126:                    }
127:                    column = charIdx - colStart + 1;
128:
129:                    switch (parseState) {
130:                    case PARSE_COMMENT:
131:                        if (c == 0x0a || c == 0x0d) {
132:                            parseState = savedState;
133:                        }
134:                        break;
135:
136:                    case PARSE_TAG: {
137:                        if (c == CH_HASH) {
138:                            parseState = PARSE_COMMENT;
139:                            savedState = PARSE_TAG;
140:                            break;
141:                        }
142:                        if (UCharacter.isWhitespace(c)) {
143:                            break;
144:                        }
145:                        if (testString.startsWith("<word>", charIdx - 1)) {
146:                            tp.bi = BreakIterator
147:                                    .getWordInstance(tp.currentLocale);
148:                            charIdx += 5;
149:                            break;
150:                        }
151:                        if (testString.startsWith("<char>", charIdx - 1)) {
152:                            tp.bi = BreakIterator
153:                                    .getCharacterInstance(tp.currentLocale);
154:                            charIdx += 5;
155:                            break;
156:                        }
157:                        if (testString.startsWith("<line>", charIdx - 1)) {
158:                            tp.bi = BreakIterator
159:                                    .getLineInstance(tp.currentLocale);
160:                            charIdx += 5;
161:                            break;
162:                        }
163:                        if (testString.startsWith("<sent>", charIdx - 1)) {
164:                            tp.bi = BreakIterator
165:                                    .getSentenceInstance(tp.currentLocale);
166:                            charIdx += 5;
167:                            break;
168:                        }
169:                        if (testString.startsWith("<title>", charIdx - 1)) {
170:                            tp.bi = BreakIterator
171:                                    .getTitleInstance(tp.currentLocale);
172:                            charIdx += 6;
173:                            break;
174:                        }
175:                        if (testString.startsWith("<locale ", charIdx - 1)) {
176:                            int closeIndex = testString.indexOf(">", charIdx);
177:                            if (closeIndex < 0) {
178:                                errln("line" + lineNum
179:                                        + ": missing close on <locale  tag.");
180:                                break;
181:                            }
182:                            String localeName = testString.substring(
183:                                    charIdx + 6, closeIndex);
184:                            localeName = localeName.trim();
185:                            tp.currentLocale = new ULocale(localeName);
186:                            charIdx = closeIndex + 1;
187:                            break;
188:                        }
189:                        if (testString.startsWith("<data>", charIdx - 1)) {
190:                            parseState = PARSE_DATA;
191:                            charIdx += 5;
192:                            tp.dataToBreak.setLength(0);
193:                            Arrays.fill(tp.expectedBreaks, 0);
194:                            Arrays.fill(tp.srcCol, 0);
195:                            Arrays.fill(tp.srcLine, 0);
196:                            break;
197:                        }
198:
199:                        errln("line" + lineNum + ": Tag expected in test file.");
200:                        return;
201:                        //parseState = PARSE_COMMENT;
202:                        //savedState = PARSE_DATA;
203:                    }
204:
205:                    case PARSE_DATA:
206:                        if (c == CH_BULLET) {
207:                            int breakIdx = tp.dataToBreak.length();
208:                            tp.expectedBreaks[breakIdx] = -1;
209:                            tp.srcLine[breakIdx] = lineNum;
210:                            tp.srcCol[breakIdx] = column;
211:                            break;
212:                        }
213:
214:                        if (testString.startsWith("</data>", charIdx - 1)) {
215:                            // Add final entry to mappings from break location to source file position.
216:                            //  Need one extra because last break position returned is after the
217:                            //    last char in the data, not at the last char.
218:                            int idx = tp.dataToBreak.length();
219:                            tp.srcLine[idx] = lineNum;
220:                            tp.srcCol[idx] = column;
221:
222:                            parseState = PARSE_TAG;
223:                            charIdx += 6;
224:
225:                            // RUN THE TEST!
226:                            executeTest(tp);
227:                            break;
228:                        }
229:
230:                        if (testString.startsWith("\\N{", charIdx - 1)) {
231:                            int nameEndIdx = testString.indexOf('}', charIdx);
232:                            if (nameEndIdx == -1) {
233:                                errln("Error in named character in test file at line "
234:                                        + lineNum + ", col " + column);
235:                            }
236:                            // Named character, e.g. \N{COMBINING GRAVE ACCENT}
237:                            // Get the code point from the name and insert it into the test data.
238:                            String charName = testString.substring(charIdx + 2,
239:                                    nameEndIdx);
240:                            c = UCharacter.getCharFromName(charName);
241:                            if (c == -1) {
242:                                errln("Error in named character in test file at line "
243:                                        + lineNum + ", col " + column);
244:                            } else {
245:                                // Named code point was recognized.  Insert it
246:                                //   into the test data.
247:                                UTF16.append(tp.dataToBreak, c);
248:                                for (i = tp.dataToBreak.length() - 1; i >= 0
249:                                        && tp.srcLine[i] == 0; i--) {
250:                                    tp.srcLine[i] = lineNum;
251:                                    tp.srcCol[i] = column;
252:                                }
253:
254:                            }
255:                            if (nameEndIdx > charIdx) {
256:                                charIdx = nameEndIdx + 1;
257:                            }
258:                            break;
259:                        }
260:
261:                        if (testString.startsWith("<>", charIdx - 1)) {
262:                            charIdx++;
263:                            int breakIdx = tp.dataToBreak.length();
264:                            tp.expectedBreaks[breakIdx] = -1;
265:                            tp.srcLine[breakIdx] = lineNum;
266:                            tp.srcCol[breakIdx] = column;
267:                            break;
268:                        }
269:
270:                        if (c == CH_LT) {
271:                            tagValue = 0;
272:                            parseState = PARSE_NUM;
273:                            break;
274:                        }
275:
276:                        if (c == CH_HASH && column == 3) { // TODO:  why is column off so far?
277:                            parseState = PARSE_COMMENT;
278:                            savedState = PARSE_DATA;
279:                            break;
280:                        }
281:
282:                        if (c == CH_BACKSLASH) {
283:                            // Check for \ at end of line, a line continuation.
284:                            //     Advance over (discard) the newline
285:                            int cp = UTF16.charAt(testString, charIdx);
286:                            if (cp == CH_CR
287:                                    && charIdx < len
288:                                    && UTF16.charAt(testString, charIdx + 1) == CH_LF) {
289:                                // We have a CR LF
290:                                //  Need an extra increment of the input ptr to move over both of them
291:                                charIdx++;
292:                            }
293:                            if (cp == CH_LF || cp == CH_CR) {
294:                                lineNum++;
295:                                column = 0;
296:                                charIdx++;
297:                                colStart = charIdx;
298:                                break;
299:                            }
300:
301:                            // Let unescape handle the back slash.
302:                            int charIdxAr[] = new int[1];
303:                            charIdxAr[0] = charIdx;
304:                            cp = Utility.unescapeAt(testString, charIdxAr);
305:                            if (cp != -1) {
306:                                // Escape sequence was recognized.  Insert the char
307:                                //   into the test data.
308:                                charIdx = charIdxAr[0];
309:                                UTF16.append(tp.dataToBreak, cp);
310:                                for (i = tp.dataToBreak.length() - 1; i >= 0
311:                                        && tp.srcLine[i] == 0; i--) {
312:                                    tp.srcLine[i] = lineNum;
313:                                    tp.srcCol[i] = column;
314:                                }
315:
316:                                break;
317:                            }
318:
319:                            // Not a recognized backslash escape sequence.
320:                            // Take the next char as a literal.
321:                            //  TODO:  Should this be an error?
322:                            c = UTF16.charAt(testString, charIdx);
323:                            charIdx = UTF16.moveCodePointOffset(testString,
324:                                    charIdx, 1);
325:                        }
326:
327:                        // Normal, non-escaped data char.
328:                        UTF16.append(tp.dataToBreak, c);
329:
330:                        // Save the mapping from offset in the data to line/column numbers in
331:                        //   the original input file.  Will be used for better error messages only.
332:                        //   If there's an expected break before this char, the slot in the mapping
333:                        //     vector will already be set for this char; don't overwrite it.
334:                        for (i = tp.dataToBreak.length() - 1; i >= 0
335:                                && tp.srcLine[i] == 0; i--) {
336:                            tp.srcLine[i] = lineNum;
337:                            tp.srcCol[i] = column;
338:                        }
339:                        break;
340:
341:                    case PARSE_NUM:
342:                        // We are parsing an expected numeric tag value, like <1234>,
343:                        //   within a chunk of data.
344:                        if (UCharacter.isWhitespace(c)) {
345:                            break;
346:                        }
347:
348:                        if (c == CH_GT) {
349:                            // Finished the number.  Add the info to the expected break data,
350:                            //   and switch parse state back to doing plain data.
351:                            parseState = PARSE_DATA;
352:                            if (tagValue == 0) {
353:                                tagValue = -1;
354:                            }
355:                            int breakIdx = tp.dataToBreak.length();
356:                            tp.expectedBreaks[breakIdx] = tagValue;
357:                            tp.srcLine[breakIdx] = lineNum;
358:                            tp.srcCol[breakIdx] = column;
359:                            break;
360:                        }
361:
362:                        if (UCharacter.isDigit(c)) {
363:                            tagValue = tagValue * 10 + UCharacter.digit(c);
364:                            break;
365:                        }
366:
367:                        errln("Syntax Error in test file at line " + lineNum
368:                                + ", col %d" + column);
369:                        return;
370:
371:                        // parseState = PARSE_COMMENT;   // TODO: unreachable.  Don't stop on errors.
372:                        // break;
373:                    }
374:
375:                }
376:            }
377:
378:            void executeTest(TestParams t) {
379:                int bp;
380:                int prevBP;
381:                int i;
382:
383:                if (t.bi == null) {
384:                    return;
385:                }
386:
387:                t.bi.setText(t.dataToBreak.toString());
388:                //
389:                //  Run the iterator forward
390:                //
391:                prevBP = -1;
392:                for (bp = t.bi.first(); bp != BreakIterator.DONE; bp = t.bi
393:                        .next()) {
394:                    if (prevBP == bp) {
395:                        // Fail for lack of forward progress.
396:                        errln("Forward Iteration, no forward progress.  Break Pos="
397:                                + bp
398:                                + "  File line,col="
399:                                + t.srcLine[bp]
400:                                + ", " + t.srcCol[bp]);
401:                        break;
402:                    }
403:
404:                    // Check that there were we didn't miss an expected break between the last one
405:                    //  and this one.
406:                    for (i = prevBP + 1; i < bp; i++) {
407:                        if (t.expectedBreaks[i] != 0) {
408:                            errln("Forward Iteration, break expected, but not found.  Pos="
409:                                    + i
410:                                    + "  File line,col= "
411:                                    + t.srcLine[i]
412:                                    + ", " + t.srcCol[i]);
413:                        }
414:                    }
415:
416:                    // Check that the break we did find was expected
417:                    if (t.expectedBreaks[bp] == 0) {
418:                        errln("Forward Iteration, break found, but not expected.  Pos="
419:                                + bp
420:                                + "  File line,col= "
421:                                + t.srcLine[bp]
422:                                + ", " + t.srcCol[bp]);
423:                    } else {
424:                        // The break was expected.
425:                        //   Check that the {nnn} tag value is correct.
426:                        int expectedTagVal = t.expectedBreaks[bp];
427:                        if (expectedTagVal == -1) {
428:                            expectedTagVal = 0;
429:                        }
430:                        int line = t.srcLine[bp];
431:                        int rs = ((RuleBasedBreakIterator) t.bi)
432:                                .getRuleStatus();
433:                        if (rs != expectedTagVal) {
434:                            errln("Incorrect status for forward break.  Pos = "
435:                                    + bp + ".  File line,col = " + line + ", "
436:                                    + t.srcCol[bp] + "\n"
437:                                    + "          Actual, Expected status = "
438:                                    + rs + ", " + expectedTagVal);
439:                        }
440:                    }
441:
442:                    prevBP = bp;
443:                }
444:
445:                // Verify that there were no missed expected breaks after the last one found
446:                for (i = prevBP + 1; i < t.dataToBreak.length() + 1; i++) {
447:                    if (t.expectedBreaks[i] != 0) {
448:                        errln("Forward Iteration, break expected, but not found.  Pos="
449:                                + i
450:                                + "  File line,col= "
451:                                + t.srcLine[i]
452:                                + ", "
453:                                + t.srcCol[i]);
454:                    }
455:                }
456:
457:                //
458:                //  Run the iterator backwards, verify that the same breaks are found.
459:                //
460:                prevBP = t.dataToBreak.length() + 2; // start with a phony value for the last break pos seen.
461:                for (bp = t.bi.last(); bp != BreakIterator.DONE; bp = t.bi
462:                        .previous()) {
463:                    if (prevBP == bp) {
464:                        // Fail for lack of progress.
465:                        errln("Reverse Iteration, no progress.  Break Pos="
466:                                + bp + "File line,col=" + t.srcLine[bp] + " "
467:                                + t.srcCol[bp]);
468:                        break;
469:                    }
470:
471:                    // Check that there were we didn't miss an expected break between the last one
472:                    //  and this one.  (UVector returns zeros for index out of bounds.)
473:                    for (i = prevBP - 1; i > bp; i--) {
474:                        if (t.expectedBreaks[i] != 0) {
475:                            errln("Reverse Itertion, break expected, but not found.  Pos="
476:                                    + i
477:                                    + "  File line,col= "
478:                                    + t.srcLine[i]
479:                                    + ", " + t.srcCol[i]);
480:                        }
481:                    }
482:
483:                    // Check that the break we did find was expected
484:                    if (t.expectedBreaks[bp] == 0) {
485:                        errln("Reverse Itertion, break found, but not expected.  Pos="
486:                                + bp
487:                                + "  File line,col= "
488:                                + t.srcLine[bp]
489:                                + ", " + t.srcCol[bp]);
490:                    } else {
491:                        // The break was expected.
492:                        //   Check that the {nnn} tag value is correct.
493:                        int expectedTagVal = t.expectedBreaks[bp];
494:                        if (expectedTagVal == -1) {
495:                            expectedTagVal = 0;
496:                        }
497:                        int line = t.srcLine[bp];
498:                        int rs = ((RuleBasedBreakIterator) t.bi)
499:                                .getRuleStatus();
500:                        if (rs != expectedTagVal) {
501:                            errln("Incorrect status for reverse break.  Pos=  "
502:                                    + bp + "File line,col= " + line + ", "
503:                                    + t.srcCol[bp] + "\n"
504:                                    + "          Actual, Expected status = "
505:                                    + rs + ", " + expectedTagVal);
506:                        }
507:                    }
508:
509:                    prevBP = bp;
510:                }
511:
512:                // Verify that there were no missed breaks prior to the last one found
513:                for (i = prevBP - 1; i >= 0; i--) {
514:                    if (t.expectedBreaks[i] != 0) {
515:                        errln("Forward Itertion, break expected, but not found.  Pos="
516:                                + i
517:                                + "  File line,col= "
518:                                + t.srcLine[i]
519:                                + ", "
520:                                + t.srcCol[i]);
521:                    }
522:                }
523:            }
524:
525:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.