Source Code Cross Referenced for TestTokenizerSource.java in » Parser » JTopas » de » susebox » jtopas » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Parser » JTopas » de.susebox.jtopas
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * TestTokenizerSource.java: JUnit test for a Tokenizer
003:         *
004:         * Copyright (C) 2004 Heiko Blau
005:         *
006:         * This file belongs to the JTopas test suite.
007:         * The JTopas test suite is free software; you can redistribute it and/or modify it 
008:         * under the terms of the GNU Lesser General Public License as published by the 
009:         * Free Software Foundation; either version 2.1 of the License, or (at your option) 
010:         * any later version.
011:         *
012:         * This software is distributed in the hope that it will be useful, but WITHOUT
013:         * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
014:         * FITNESS FOR A PARTICULAR PURPOSE. 
015:         * See the GNU Lesser General Public License for more details.
016:         *
017:         * You should have received a copy of the GNU Lesser General Public License along 
018:         * with the JTopas test suite. If not, write to the
019:         *
020:         *   Free Software Foundation, Inc.
021:         *   59 Temple Place, Suite 330, 
022:         *   Boston, MA 02111-1307 
023:         *   USA
024:         *
025:         * or check the Internet: http://www.fsf.org
026:         *
027:         * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
028:         * You should have received a copy of their JUnit licence agreement along with 
029:         * the JTopas test suite.
030:         *
031:         * We do NOT provide the JUnit archive junit.jar nessecary to compile and run 
032:         * our tests, since we assume, that You  either have it already or would like 
033:         * to get the current release Yourself. 
034:         * Please visit either:
035:         *   http://sourceforge.net/projects/junit
036:         * or
037:         *   http://junit.org
038:         * to obtain JUnit.
039:         *
040:         * Contact:
041:         *   email: heiko@susebox.de 
042:         */
043:
044:        package de.susebox.jtopas;
045:
046:        //-----------------------------------------------------------------------------
047:        // Imports
048:        //
049:        import java.io.Reader;
050:        import java.io.StringReader;
051:        import java.io.File;
052:        import java.io.PrintWriter;
053:        import java.util.Iterator;
054:        import java.util.List;
055:        import java.util.LinkedList;
056:
057:        import junit.framework.Test;
058:        import junit.framework.TestCase;
059:        import junit.framework.TestSuite;
060:        import junit.framework.Assert;
061:
062:        import de.susebox.TestUtilities;
063:
064:        //-----------------------------------------------------------------------------
065:        // Class TestTokenizerSource
066:        //
067:
068:        /**<p>
069:         * The class contains a number of test cases that are supposed to be difficult
070:         * to handle for a {@link Tokenizer}, e.g. EOF conditions inside strings etc.
071:         *</p>
072:         *
073:         * @see     TokenizerSource
074:         * @author  Heiko Blau
075:         */
076:        public class TestTokenizerSource extends TestCase {
077:
078:            //---------------------------------------------------------------------------
079:            // properties
080:            //
081:
082:            //---------------------------------------------------------------------------
083:            // main method
084:            //
085:
086:            /**
087:             * call this method to invoke the tests
088:             */
089:            public static void main(String[] args) {
090:                String[] tests = { TestTokenizerSource.class.getName() };
091:
092:                TestUtilities.run(tests, args);
093:            }
094:
095:            //---------------------------------------------------------------------------
096:            // suite method
097:            //
098:
099:            /**
100:             * Implementation of the JUnit method <code>suite</code>. For each set of test
101:             * properties one or more tests are instantiated.
102:             *
103:             * @return a test suite
104:             */
105:            public static Test suite() {
106:                TestSuite suite = new TestSuite(TestTokenizerSource.class
107:                        .getName());
108:
109:                suite.addTest(new TestTokenizerSource("testEmptySource"));
110:                suite.addTest(new TestTokenizerSource("testSmallBuffer"));
111:                suite.addTest(new TestTokenizerSource("testLargeBuffer"));
112:                suite.addTest(new TestTokenizerSource("testSpeed"));
113:                suite.addTest(new TestTokenizerSource("testSimilarResults"));
114:                suite.addTest(new TestTokenizerSource("testLargeSource"));
115:                return suite;
116:            }
117:
118:            //---------------------------------------------------------------------------
119:            // Constructor
120:            //
121:
122:            /**
123:             * Default constructor. Standard input {@link java.lang.System#in} is used
124:             * to construct the input stream reader.
125:             */
126:            public TestTokenizerSource(String test) {
127:                super (test);
128:            }
129:
130:            //---------------------------------------------------------------------------
131:            // Fixture setup and release
132:            //
133:
134:            /**
135:             * Sets up the fixture, for example, open a network connection.
136:             * This method is called before a test is executed.
137:             */
138:            protected void setUp() throws Exception {
139:            }
140:
141:            /**
142:             * Tears down the fixture, for example, close a network connection.
143:             * This method is called after a test is executed.
144:             */
145:            protected void tearDown() throws Exception {
146:            }
147:
148:            //---------------------------------------------------------------------------
149:            // test cases
150:            //
151:
152:            /**
153:             * Test empty data sources
154:             */
155:            public void testEmptySource() throws Throwable {
156:                TokenizerSource[] source = { null, null, null, null, null, null };
157:                char[] cbuf = new char[8129];
158:                int count;
159:
160:                source[0] = new CharArraySource(null);
161:                source[1] = new ReaderSource((java.io.InputStream) null);
162:                source[2] = new StringSource(null);
163:                source[3] = new CharArraySource(new char[0]);
164:                source[4] = new ReaderSource(new StringReader(""));
165:                source[5] = new StringSource("");
166:                for (int index = 0; index < source.length; ++index) {
167:                    count = source[index].read(cbuf, 0, cbuf.length);
168:                    assertTrue(source[index].getClass().getName()
169:                            + ": expected -1, got " + count, count == -1);
170:                }
171:            }
172:
173:            /**
174:             * Test a buffer that is smaller than the available data
175:             */
176:            public void testSmallBuffer() throws Throwable {
177:                TokenizerSource[] source = { null, null, null };
178:                char[] cbuf = new char[1];
179:                char[] text = new char[DATA.length()];
180:                int count;
181:
182:                DATA.getChars(0, DATA.length(), text, 0);
183:                source[0] = new CharArraySource(text);
184:                source[1] = new ReaderSource(new StringReader(DATA));
185:                source[2] = new StringSource(DATA);
186:                for (int index = 0; index < source.length; ++index) {
187:                    for (int readIndex = 0; readIndex < DATA.length(); ++readIndex) {
188:                        count = source[index].read(cbuf, 0, cbuf.length);
189:                        assertTrue(source[index].getClass().getName()
190:                                + ": expected 1, got " + count, count == 1);
191:                    }
192:                    count = source[index].read(cbuf, 0, cbuf.length);
193:                    assertTrue(source[index].getClass().getName()
194:                            + ": expected -1, got " + count, count == -1);
195:                }
196:            }
197:
198:            /**
199:             * Test a buffer that is larger than the available data
200:             */
201:            public void testLargeBuffer() throws Throwable {
202:                TokenizerSource[] source = { null, null, null };
203:                char[] cbuf = new char[8192];
204:                char[] text = new char[DATA.length()];
205:                int count;
206:
207:                DATA.getChars(0, DATA.length(), text, 0);
208:                source[0] = new CharArraySource(text);
209:                source[1] = new ReaderSource(new StringReader(DATA));
210:                source[2] = new StringSource(DATA);
211:                for (int index = 0; index < source.length; ++index) {
212:                    count = source[index].read(cbuf, 0, cbuf.length);
213:                    assertTrue(source[index].getClass().getName()
214:                            + ": expected " + DATA.length() + ", got " + count,
215:                            count == DATA.length());
216:                    count = source[index].read(cbuf, 0, cbuf.length);
217:                    assertTrue(source[index].getClass().getName()
218:                            + ": expected -1, got " + count, count == -1);
219:                }
220:            }
221:
222:            /**
223:             * Test speed
224:             */
225:            public void testSpeed() throws Throwable {
226:                // construct a really huge string
227:                TokenizerSource source;
228:                char[] buffer;
229:                String text = expandData(20000);
230:                char[] cbuf = new char[text.length()];
231:
232:                text.getChars(0, text.length(), cbuf, 0);
233:
234:                for (int bufferSize = 8; bufferSize < 0x20000; bufferSize *= 2) {
235:                    System.out.println("Buffer size " + bufferSize + ":");
236:                    buffer = new char[bufferSize];
237:
238:                    // CharArraySource
239:                    readSource(new CharArraySource(cbuf), buffer);
240:
241:                    // ReaderSource
242:                    readSource(new ReaderSource(new StringReader(text)), buffer);
243:
244:                    // StringSource
245:                    readSource(new StringSource(text), buffer);
246:                }
247:            }
248:
249:            /**
250:             * Test similar special sequences.
251:             */
252:            public void testSimilarResults() throws Throwable {
253:                // construct a really huge string
254:                String text = expandData(1000);
255:
256:                // initialize the properties
257:                TokenizerProperties props = new StandardTokenizerProperties();
258:                StandardTokenizer tokenizer = new StandardTokenizer();
259:                TokenizerSource source;
260:                long startTime;
261:
262:                props.addSpecialSequence(ORIG_SMILEY, ORIG_SMILEY);
263:                props.addSpecialSequence(FRIGHTENED_SMIKEY, FRIGHTENED_SMIKEY);
264:                props.addSpecialSequence(WINKING_SMILEY, WINKING_SMILEY);
265:                props.addString("\"", "\"", "\\");
266:                props.addString("'", "'", "\\");
267:
268:                try {
269:                    tokenizer.setTokenizerProperties(props);
270:
271:                    // CharArraySource
272:                    char[] cbuf = new char[text.length()];
273:
274:                    text.getChars(0, text.length(), cbuf, 0);
275:
276:                    // tokenize several times to avoid JIT or hotspot optimization effects
277:                    int loopCount = 100;
278:                    int loops = 0;
279:                    long timeTotal1 = 0;
280:                    long timeTotal2 = 0;
281:                    long timeTotal3 = 0;
282:
283:                    while (loops++ < loopCount) {
284:                        tokenizer.setSource(new CharArraySource(cbuf));
285:
286:                        startTime = System.currentTimeMillis();
287:                        List list1 = tokenize(tokenizer);
288:                        long time1 = System.currentTimeMillis() - startTime;
289:                        System.out.println("Loop #" + loops
290:                                + ": CharArraySource needed " + time1
291:                                + "ms for " + list1.size() + " token.");
292:                        timeTotal1 += time1;
293:
294:                        // ReaderSource
295:                        tokenizer.setSource(new ReaderSource(new StringReader(
296:                                text)));
297:
298:                        startTime = System.currentTimeMillis();
299:                        List list2 = tokenize(tokenizer);
300:                        long time2 = System.currentTimeMillis() - startTime;
301:                        System.out.println("Loop #" + loops
302:                                + ": ReaderSource needed " + time2 + "ms for "
303:                                + list2.size() + " token.");
304:                        timeTotal2 += time2;
305:
306:                        // StringSource
307:                        tokenizer.setSource(new StringSource(text));
308:
309:                        startTime = System.currentTimeMillis();
310:                        List list3 = tokenize(tokenizer);
311:                        long time3 = System.currentTimeMillis() - startTime;
312:                        System.out.println("Loop #" + loops
313:                                + ": StringSource needed " + time3 + "ms for "
314:                                + list3.size() + " token.");
315:                        timeTotal3 += time3;
316:
317:                        System.out.println("CharArraySource has "
318:                                + list1.size() + " token.");
319:                        System.out.println("ReaderSource has " + list2.size()
320:                                + " token.");
321:                        System.out.println("StringSource has " + list3.size()
322:                                + " token.");
323:
324:                        // any list shorter than the others?
325:                        assertTrue(
326:                                "CharArraySource token count differs from ReaderSource token count.",
327:                                list1.size() == list2.size());
328:                        assertTrue(
329:                                "CharArraySource token count differs from StringSource token count.",
330:                                list1.size() == list3.size());
331:
332:                        // check token list only once
333:                        if (loops == loopCount) {
334:                            System.out.println("CharArraySource total time: "
335:                                    + timeTotal1 + "ms.");
336:                            System.out.println("ReaderSource total time: "
337:                                    + timeTotal2 + "ms.");
338:                            System.out.println("StringSource total time: "
339:                                    + timeTotal3 + "ms.");
340:
341:                            Iterator iter1 = list1.iterator();
342:                            Iterator iter2 = list2.iterator();
343:                            Iterator iter3 = list3.iterator();
344:                            int index = 0;
345:                            while (iter1.hasNext()) {
346:                                // compare token
347:                                Token token1 = (Token) iter1.next();
348:                                Token token2 = (Token) iter2.next();
349:                                Token token3 = (Token) iter3.next();
350:
351:                                assertTrue("Token mismatch at position "
352:                                        + index + ": CharArraySource \""
353:                                        + token1 + "\", ReaderSource \""
354:                                        + token2 + "\"", token1.equals(token2));
355:                                assertTrue("Token mismatch at position "
356:                                        + index + ": CharArraySource \""
357:                                        + token1 + "\", StringSource \""
358:                                        + token3 + "\"", token1.equals(token3));
359:                                index++;
360:                            }
361:                        }
362:                    }
363:                } finally {
364:                    tokenizer.close();
365:                }
366:            }
367:
368:            /**
369:             * Test similar special sequences.
370:             */
371:            public void testLargeSource() throws Throwable {
372:                // construct a large data source
373:                String dataItem = "/*\n"
374:                        + "* This is a Java style data item.\n"
375:                        + "* It is concatenated \"multible\" times to get a real\n"
376:                        + "* big chunk of data.\n"
377:                        + "* With such a lot of characters the speed of the tokenizers\n"
378:                        + "* can be compared.\n" + "*/\n"
379:                        + "package org.muppets.gonzo;\n\n" + "/**\n"
380:                        + "* This is a class comment :-)\n" + "*/\n"
381:                        + "public class Gonzo extends Serializable {\n\n"
382:                        + "  /** The standard constructor */\n"
383:                        + "  public Gonzo() {\n" + "    // nothing todo here\n"
384:                        + "  }\n\n" + "  /** a method */\n"
385:                        + "  public String toString() {\n"
386:                        + "    return \"This is Gonzo\";\n" + "  }\n\n"
387:                        + "}\n\n\n";
388:                int tokenCountPerItem = 35;
389:                int tokenCount = 0;
390:                int maxSize = 0x80000;
391:                StringBuffer data = new StringBuffer(maxSize);
392:
393:                while (data.length() < maxSize) {
394:                    data.append(dataItem);
395:                    tokenCount += tokenCountPerItem;
396:                }
397:                tokenCount++; // EOF token
398:
399:                // Set up the Properties
400:                TokenizerProperties props = new StandardTokenizerProperties();
401:
402:                props
403:                        .setParseFlags(Flags.F_RETURN_BLOCK_COMMENTS
404:                                + Flags.F_RETURN_LINE_COMMENTS
405:                                + Flags.F_TOKEN_POS_ONLY);
406:                props.addBlockComment("/*", "*/");
407:                props.addBlockComment("/**", "*/");
408:                props.addLineComment("//");
409:                props.addString("\"", "\"", "\\");
410:                props.addString("'", "'", "\\");
411:                props.addKeyword("package");
412:                props.addKeyword("public");
413:                props.addKeyword("class");
414:                props.addKeyword("extends");
415:                props.addKeyword("return");
416:                props.addKeyword("if");
417:                props.addKeyword("then");
418:                props.addKeyword("while");
419:                props.addKeyword("for");
420:                props.addKeyword("int");
421:                props.addKeyword("char");
422:                props.addSpecialSequence("(");
423:                props.addSpecialSequence(")");
424:                props.addSpecialSequence(";");
425:                props.addSpecialSequence("==");
426:                props.addSpecialSequence("!=");
427:                props.addSpecialSequence("<=");
428:                props.addSpecialSequence(">=");
429:
430:                // create the tokenizers.
431:                // NOTE: the sources have a special structure that is required for the
432:                // analysis below
433:                Tokenizer tokenizer = new StandardTokenizer(props);
434:                Object[] sources = new Object[] {
435:                        new StringSource(data.toString()),
436:                        new ReaderSource(new StringReader(data.toString())),
437:                        new StringSource(data.toString().substring(0,
438:                                data.toString().length() / 2)),
439:                        new ReaderSource(new StringReader(data.toString()
440:                                .substring(0, data.toString().length() / 2))),
441:                        new StringSource(data.toString().substring(0,
442:                                data.toString().length() / 5)),
443:                        new ReaderSource(new StringReader(data.toString()
444:                                .substring(0, data.toString().length() / 5))),
445:                        new StringSource(data.toString().substring(0,
446:                                data.toString().length() / 20)),
447:                        new ReaderSource(new StringReader(data.toString()
448:                                .substring(0, data.toString().length() / 20))) };
449:                Object[] tokenLists = new Object[] { null, null, null, null,
450:                        null, null, null, null };
451:
452:                try {
453:                    for (int index = 0; index < sources.length; ++index) {
454:                        long start = System.currentTimeMillis();
455:
456:                        System.out.println(sources[index].getClass().getName()
457:                                + ": running ...");
458:                        tokenizer.setSource((TokenizerSource) sources[index]);
459:
460:                        tokenLists[index] = tokenize(tokenizer);
461:
462:                        System.out.println(sources[index].getClass().getName()
463:                                + ": " + (System.currentTimeMillis() - start)
464:                                + "ms.");
465:                    }
466:                } finally {
467:                    tokenizer.close();
468:                }
469:
470:                // check the results
471:                for (int index = 0; index < sources.length; ++index) {
472:                    List tokenList = (List) tokenLists[index];
473:
474:                    System.out.println(sources[index].getClass().getName()
475:                            + " has " + tokenList.size() + " token.");
476:
477:                    // only the first 2 data sources have the full token count
478:                    if (index < 2) {
479:                        assertTrue("Expected " + tokenCount + " token, got "
480:                                + tokenList.size(), tokenCount == tokenList
481:                                .size());
482:                    }
483:
484:                    // compare two lists with the same amount of data 
485:                    if (index % 2 == 1) {
486:                        List tokenList0 = (List) tokenLists[index - 1];
487:                        Iterator iter0 = tokenList0.iterator();
488:                        Iterator iter = tokenList.iterator();
489:                        int tokenIndex = 0;
490:
491:                        while (iter.hasNext()) {
492:                            Token token0 = (Token) iter0.next();
493:                            Token token = (Token) iter.next();
494:
495:                            assertTrue("Token #" + tokenIndex + "differs:\n"
496:                                    + token0 + "\n" + token, token0
497:                                    .equals(token));
498:                            tokenIndex++;
499:                        }
500:                    }
501:                }
502:            }
503:
504:            //---------------------------------------------------------------------------
505:            // helpers
506:            //
507:
508:            /**
509:             * This method returns a {@link java.util.List} of Token.
510:             */
511:            private List tokenize(Tokenizer tokenizer) throws Throwable {
512:                List list = new LinkedList();
513:                //File          file    = File.createTempFile(tokenizer.getSource().getClass().getName(), null);
514:                //PrintWriter   writer  = new PrintWriter(file.getAbsolutePath());
515:
516:                try {
517:                    while (tokenizer.hasMoreToken()) {
518:                        Token token = tokenizer.nextToken();
519:
520:                        // writer.println(token);
521:                        list.add(token);
522:                    }
523:                } finally {
524:                    // writer.close();
525:                }
526:                return list;
527:            }
528:
529:            /**
530:             * Expand some text
531:             */
532:            private String expandData(int factor) {
533:                StringBuffer expandedData = new StringBuffer(DATA.length()
534:                        * factor);
535:
536:                for (int ii = 0; ii < factor; ++ii) {
537:                    expandedData.append(DATA);
538:                }
539:                return expandedData.toString();
540:            }
541:
542:            /**
543:             * Read the full source
544:             */
545:            private void readSource(TokenizerSource source, char[] buffer)
546:                    throws Throwable {
547:                long startTime = System.currentTimeMillis();
548:                int chars;
549:
550:                while ((chars = source.read(buffer, 0, buffer.length)) > 0)
551:                    ;
552:                System.out.println(source.getClass().getName() + " needed "
553:                        + (System.currentTimeMillis() - startTime) + "ms.");
554:            }
555:
556:            //---------------------------------------------------------------------------
557:            // members
558:            //
559:
560:            // various constants
561:            private static final String ORIG_SMILEY = ":-)";
562:            private static final String FRIGHTENED_SMIKEY = "=8-[";
563:            private static final String WINKING_SMILEY = ".-\\";
564:
565:            // Text data for the tests
566:            private static final String DATA = "this is a simple text with a lot of perfectly normal\n"
567:                    + "token. And a few separators (brackets are some, for instance)\n"
568:                    + "as well.     There could\talso be some\ttabs (\"\\t\")\n"
569:                    + "in between. And 'some strings' :-).\n"
570:                    + "And the smileys (;-), =8-[, .-\\ etc.) should be regarded as\n"
571:                    + "'special sequences'.\n\n";
572:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.