Source Code Cross Referenced for BuildDictionaryFile.java in  » Internationalization-Localization » icu4j » com » ibm » icu » dev » tool » rbbi » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.tool.rbbi 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 1996-2004, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:        package com.ibm.icu.dev.tool.rbbi;
008:
009:        import com.ibm.icu.util.CompactByteArray;
010:        import com.ibm.icu.text.UnicodeSet;
011:        import com.ibm.icu.impl.Utility;
012:        import java.io.*;
013:        import java.util.Vector;
014:
015:        public class BuildDictionaryFile {
016:            public static void main(String args[])
017:                    throws FileNotFoundException, UnsupportedEncodingException,
018:                    IOException {
019:                String filename = args[0];
020:                String encoding = "";
021:                String outputFile = "";
022:                String listingFile = "";
023:
024:                if (args.length >= 2)
025:                    encoding = args[1];
026:
027:                if (args.length >= 3)
028:                    outputFile = args[2];
029:
030:                if (args.length >= 4)
031:                    listingFile = args[3];
032:
033:                BuildDictionaryFile dictionary = new BuildDictionaryFile();
034:                dictionary.build(filename, encoding);
035:
036:                DataOutputStream out = null;
037:                if (outputFile.length() != 0) {
038:                    out = new DataOutputStream(new FileOutputStream(outputFile));
039:                    dictionary.writeDictionaryFile(out);
040:                }
041:
042:                PrintWriter listing = null;
043:                if (listingFile.length() != 0) {
044:                    listing = new PrintWriter(new OutputStreamWriter(
045:                            new FileOutputStream(listingFile), "UnicodeLittle"));
046:                    dictionary.printWordList("", 0, listing);
047:                    listing.close();
048:                }
049:            }
050:
051:            public BuildDictionaryFile() {
052:            }
053:
054:            public void build(String filename, String encoding)
055:                    throws FileNotFoundException, UnsupportedEncodingException,
056:                    IOException {
057:                FileInputStream file = new FileInputStream(filename);
058:                InputStreamReader in;
059:                if (encoding.length() == 0)
060:                    in = new InputStreamReader(file);
061:                else
062:                    in = new InputStreamReader(file, encoding);
063:
064:                buildColumnMap(in);
065:
066:                file = new FileInputStream(filename);
067:                if (encoding.length() == 0)
068:                    in = new InputStreamReader(file);
069:                else
070:                    in = new InputStreamReader(file, encoding);
071:
072:                buildStateTable(in);
073:                //printTable();
074:            }
075:
076:            public void buildColumnMap(InputStreamReader in) throws IOException {
077:                System.out.println("Building column map...");
078:                UnicodeSet charsInFile = new UnicodeSet();
079:                int c = in.read();
080:                int totalChars = 0;
081:                while (c >= 0) {
082:                    ++totalChars;
083:                    if (totalChars > 0 && totalChars % 5000 == 0)
084:                        System.out.println("Read " + totalChars
085:                                + " characters...");
086:                    if (c > ' ')
087:                        charsInFile.add((char) c);
088:                    c = in.read();
089:                }
090:                //        Test.debugPrintln(charsInFile.toString());
091:
092:                StringBuffer tempReverseMap = new StringBuffer();
093:                tempReverseMap.append(' ');
094:
095:                columnMap = new CompactByteArray();
096:                int n = charsInFile.getRangeCount();
097:                byte p = 1;
098:                for (int i = 0; i < n; ++i) {
099:                    char start = (char) charsInFile.getRangeStart(i);
100:                    char end = (char) charsInFile.getRangeEnd(i);
101:                    for (char ch = start; ch <= end; ch++) {
102:                        if (columnMap.elementAt(Character.toLowerCase(ch)) == 0) {
103:                            columnMap.setElementAt(Character.toUpperCase(ch),
104:                                    Character.toUpperCase(ch), p);
105:                            columnMap.setElementAt(Character.toLowerCase(ch),
106:                                    Character.toLowerCase(ch), p);
107:                            ++p;
108:                            tempReverseMap.append(ch);
109:                        }
110:                    }
111:                }
112:                //System.out.println("Compacting...");
113:                columnMap.compact();
114:
115:                //System.out.println(tempReverseMap.toString());
116:                reverseColumnMap = new char[p];
117:                Utility.getChars(tempReverseMap, 0, p, reverseColumnMap, 0);
118:
119:                System.out.println("total columns = " + p);
120:                numCols = p;
121:                numColGroups = (numCols >> 5) + 1;
122:
123:                /*
124:                 short[] index = columnMap.getIndexArray();
125:                 System.out.println("Index:");
126:                 for (int i = 0; i < index.length; i++) {
127:                 if (i % 16 == 0) {
128:                 System.out.println();
129:                 System.out.print("    " + Integer.toHexString(i * 128) + ":");
130:                 }
131:                 System.out.print("\t" + Integer.toHexString(index[i]));
132:                 }
133:                 System.out.println();
134:                 byte[] data = columnMap.getStringArray();
135:                 System.out.print("Values:");
136:                 for (int i = 0; i < data.length; i++) {
137:                 if (i % 16 == 0) {
138:                 System.out.println();
139:                 System.out.print("    " + Integer.toHexString(i) + ":");
140:                 }
141:                 if (data[i] == 0)
142:                 System.out.print("\t.");
143:                 else
144:                 System.out.print("\t" + Integer.toString(data[i]));
145:                 }
146:                 System.out.println();
147:                 */
148:            }
149:
150:            public void buildStateTable(InputStreamReader in)
151:                    throws IOException {
152:                Vector tempTable = new Vector();
153:                tempTable.addElement(new int[numCols + 1]);
154:                int state = 0;
155:
156:                int c = in.read();
157:                int[] row = null;
158:                int charsInWord = 0;
159:                while (c >= 0) {
160:                    charsInWord++;
161:                    short column = columnMap.elementAt((char) c);
162:
163:                    row = (int[]) (tempTable.elementAt(state));
164:                    if (column != 0) {
165:                        if (row[column] == 0) {
166:                            row[column] = tempTable.size();
167:                            ++row[numCols];
168:                            state = (tempTable.size());
169:                            tempTable.addElement(new int[numCols + 1]);
170:                        } else
171:                            state = row[column];
172:                    } else if (state != 0) {
173:                        if (row[0] != -1) {
174:                            row[0] = -1;
175:                            ++row[numCols];
176:                            uniqueWords++;
177:                            totalUniqueWordChars += charsInWord;
178:                        }
179:                        totalWords++;
180:                        if (totalWords % 5000 == 0)
181:                            System.out.println("Read " + totalWords
182:                                    + " words, " + tempTable.size()
183:                                    + " rows...");
184:                        charsInWord = 0;
185:                        state = 0;
186:                    }
187:                    c = in.read();
188:                }
189:                if (state != 0) {
190:                    row = (int[]) (tempTable.elementAt(state));
191:                    if (row[0] != -1) {
192:                        row[0] = -1;
193:                        uniqueWords++;
194:                        totalUniqueWordChars += charsInWord;
195:                    }
196:                    totalWords++;
197:                }
198:
199:                compress(tempTable);
200:
201:                table = new short[numCols * tempTable.size()];
202:                for (int i = 0; i < tempTable.size(); i++) {
203:                    row = (int[]) tempTable.elementAt(i);
204:                    for (int j = 0; j < numCols; j++)
205:                        table[i * numCols + j] = (short) row[j];
206:                }
207:            }
208:
209:            private void compress(Vector tempTable) {
210:                System.out.println("Before compression:");
211:                System.out.println("  Number of rows = " + tempTable.size());
212:                System.out.println("  Number of columns = " + numCols);
213:                System.out.println("  Number of cells = " + tempTable.size()
214:                        * numCols);
215:                deleteDuplicateRows(tempTable);
216:                System.out.println("After removing duplicate rows:");
217:                System.out.println("  Number of rows = " + tempTable.size());
218:                System.out.println("  Number of columns = " + numCols);
219:                System.out.println("  Number of cells = " + tempTable.size()
220:                        * numCols);
221:                stackRows(tempTable);
222:                if (tempTable.size() > 32767)
223:                    throw new IllegalArgumentException(
224:                            "Too many rows in table!");
225:                System.out.println("After doubling up on rows:");
226:                System.out.println("  Number of rows = " + tempTable.size());
227:                System.out.println("  Number of columns = " + numCols);
228:                System.out.println("  Number of cells = " + tempTable.size()
229:                        * numCols);
230:            }
231:
232:            /*
233:             experimental...
234:             private void deleteDuplicateRows(Vector tempTable) {
235:             int[] rowNumMap = new int[tempTable.size()];
236:             for (int i = 0; i < rowNumMap.length; i++)
237:             rowNumMap[i] = i;
238:
239:             int nextClass = numCols;
240:             int currentClass;
241:             int lastClass;
242:             boolean split;
243:             int[] row1, row2, tempRow;
244:             int tempCat;
245:
246:             do {
247:             System.out.println("Making a pass (" + nextClass + " classes)...");
248:             currentClass = 0;
249:             lastClass = nextClass;
250:             while (currentClass < nextClass) {
251:             System.out.println("   currentClass = " + currentClass);
252:             split = false;
253:             row1 = row2 = null;
254:             for (int i = 0; i < tempTable.size(); i++) {
255:             tempRow = (int[])tempTable.elementAt(i);
256:             if (tempRow[numCols] == currentClass) {
257:             if (row1 == null) {
258:             row1 = (int[])tempTable.elementAt(i);
259:             }
260:             else {
261:             row2 = (int[])tempTable.elementAt(i);
262:             for (int j = 0; j < numCols; j++) {
263:             if ((row1[j] == 0) != (row2[j] == 0) ||
264:             (row1[j] == -1) != (row2[j] == -1)) {
265:             row2[numCols] = nextClass;
266:             split = true;
267:             break;
268:             }
269:             else if (row1[j] != 0 && row2[j] != 0 && row1[j] != -1
270:             && row2[j] != -1) {
271:             tempRow = (int[])tempTable.elementAt(row1[j]);
272:             tempCat = tempRow[numCols];
273:             tempRow = (int[])tempTable.elementAt(row2[j]);
274:             if (tempCat != tempRow[numCols]) {
275:             row2[numCols] = nextClass;
276:             split = true;
277:             break;
278:             }
279:             }
280:             }
281:             }
282:             }
283:             }
284:             if (split)
285:             ++nextClass;
286:             ++currentClass;
287:             //System.out.println();
288:             }
289:             } while (lastClass != nextClass);
290:
291:             int[] representatives = new int[nextClass];
292:             for (int i = 1; i < tempTable.size(); i++) {
293:             tempRow = (int[])tempTable.elementAt(i);
294:             if (representatives[tempRow[numCols]] == 0)
295:             representatives[tempRow[numCols]] = i;
296:             else
297:             rowNumMap[i] = representatives[tempRow[numCols]];
298:             }
299:             System.out.println("Renumbering...");
300:
301:             // renumber all remaining rows
302:             for (int i = 0; i < rowNumMap.length; i++)
303:             if (rowNumMap[i] != i)
304:             tempTable.setElementAt(null, i);
305:             int newRowNum = 0;
306:             for (int i = 0; i < rowNumMap.length; i++)
307:             if (tempTable.elementAt(i) != null)
308:             rowNumMap[i] = newRowNum++;
309:             for (int i = 0; i < rowNumMap.length; i++)
310:             if (tempTable.elementAt(i) == null)
311:             rowNumMap[i] = rowNumMap[rowNumMap[i]];
312:
313:             for (int i = tempTable.size() - 1; i >= 0; i--) {
314:             tempRow = (int[])tempTable.elementAt(i);
315:             if (tempRow == null)
316:             tempTable.removeElementAt(i);
317:             else {
318:             for (int j = 0; j < numCols; j++)
319:             if (tempRow[j] != -1)
320:             tempRow[j] = rowNumMap[j];
321:             }
322:             }
323:             //for (int i = 1; i < rowNumMap.length; i++) rowNumMap[i] = i; int newRowNum = rowNumMap.length;
324:             }
325:             */
326:
327:            private void deleteDuplicateRows(Vector tempTable) {
328:                Vector work = (Vector) (tempTable.clone());
329:                boolean didDeleteRow = true;
330:
331:                Vector tempMapping = new Vector(work.size());
332:                int[] mapping = new int[work.size()];
333:                for (int i = 0; i < mapping.length; i++) {
334:                    mapping[i] = i;
335:                    tempMapping.addElement(new Integer(i));
336:                }
337:                boolean[] tbd = new boolean[work.size()];
338:
339:                while (didDeleteRow) {
340:                    System.out.println(" " + work.size() + " rows...");
341:                    int deletedRows = 0;
342:                    didDeleteRow = false;
343:
344:                    sortTable(work, tempMapping, mapping, 1, work.size());
345:                    for (int i = 0; i < work.size() - 1;) {
346:                        System.out.print("Deleting, inspecting row " + i
347:                                + ", deleted " + deletedRows + " rows...\r");
348:                        int rowToDelete = ((Integer) (tempMapping
349:                                .elementAt(i + 1))).intValue();
350:                        int rowToMapTo = ((Integer) (tempMapping.elementAt(i)))
351:                                .intValue();
352:                        if (compareRows((int[]) work.elementAt(i), (int[]) work
353:                                .elementAt(i + 1), mapping) == 0) {
354:                            tbd[rowToDelete] = true;
355:                            tempTable.setElementAt(null, rowToDelete);
356:                            while (tbd[mapping[rowToMapTo]])
357:                                mapping[rowToMapTo] = mapping[mapping[rowToMapTo]];
358:                            mapping[rowToDelete] = mapping[rowToMapTo];
359:                            didDeleteRow = true;
360:                            deletedRows++;
361:                            work.removeElementAt(i + 1);
362:                            tempMapping.removeElementAt(i + 1);
363:                        } else
364:                            i++;
365:                    }
366:                    for (int i = 0; i < mapping.length; i++) {
367:                        if (tbd[i] && tbd[mapping[i]])
368:                            mapping[i] = mapping[mapping[i]];
369:                    }
370:                }
371:
372:                int decrementBy = 0;
373:                for (int i = 0; i < mapping.length; i++) {
374:                    if (tbd[i])
375:                        decrementBy++;
376:                    else
377:                        mapping[i] -= decrementBy;
378:                }
379:                for (int i = 0; i < mapping.length; i++) {
380:                    if (tbd[i])
381:                        mapping[i] = mapping[mapping[i]];
382:                }
383:                for (int i = tempTable.size() - 1; i >= 0; i--) {
384:                    if (tbd[i])
385:                        tempTable.removeElementAt(i);
386:                    else {
387:                        int[] row = (int[]) tempTable.elementAt(i);
388:                        for (int j = 0; j < numCols; j++)
389:                            row[j] = (row[j] == -1) ? -1 : mapping[row[j]];
390:                    }
391:                }
392:            }
393:
394:            private void sortTable(Vector table, Vector tempMapping,
395:                    int[] mapping, int start, int end) {
396:                System.out.print("Sorting (" + start + ", " + end + ")...\r");
397:                if (start + 1 >= end)
398:                    return;
399:                else if (start + 10 >= end) {
400:                    for (int i = start + 1; i < end; i++) {
401:                        int[] row = (int[]) table.elementAt(i);
402:                        Integer tempMap = (Integer) tempMapping.elementAt(i);
403:                        int j;
404:                        for (j = i - 1; j >= start; j--) {
405:                            if (compareRows((int[]) table.elementAt(j), row,
406:                                    mapping) > 0) {
407:                                table.setElementAt((int[]) table.elementAt(j),
408:                                        j + 1);
409:                                tempMapping.setElementAt((Integer) tempMapping
410:                                        .elementAt(j), j + 1);
411:                            } else {
412:                                table.setElementAt(row, j + 1);
413:                                tempMapping.setElementAt(tempMap, j + 1);
414:                                break;
415:                            }
416:                        }
417:                        if (j < start) {
418:                            table.setElementAt(row, start);
419:                            tempMapping.setElementAt(tempMap, start);
420:                        }
421:                    }
422:                } else {
423:                    int boundaryPos = (start + end) / 2;
424:                    int i;
425:                    boolean allTheSame = true;
426:                    int firstDifferent = 0;
427:
428:                    do {
429:                        int[] boundary = (int[]) table.elementAt(boundaryPos);
430:                        i = start;
431:                        int j = end - 1;
432:                        int[] row = null;
433:                        byte compResult;
434:                        while (i < j) {
435:                            row = (int[]) table.elementAt(i);
436:                            while (i <= j
437:                                    && compareRows(row, boundary, mapping) < 0) {
438:                                i++;
439:                                row = (int[]) table.elementAt(i);
440:                            }
441:
442:                            row = (int[]) table.elementAt(j);
443:                            compResult = compareRows(row, boundary, mapping);
444:                            while (i <= j && (compResult >= 0)) {
445:                                if (compResult != 0) {
446:                                    allTheSame = false;
447:                                    firstDifferent = j;
448:                                }
449:                                j--;
450:                                row = (int[]) table.elementAt(j);
451:                                compResult = compareRows(row, boundary, mapping);
452:                            }
453:                            if (i <= j) {
454:                                row = (int[]) table.elementAt(j);
455:                                table.setElementAt(table.elementAt(i), j);
456:                                table.setElementAt(row, i);
457:                                Object temp = tempMapping.elementAt(j);
458:                                tempMapping.setElementAt(tempMapping
459:                                        .elementAt(i), j);
460:                                tempMapping.setElementAt(temp, i);
461:                            }
462:                        }
463:                        if (i <= start) {
464:                            if (allTheSame)
465:                                return;
466:                            else
467:                                boundaryPos = firstDifferent;
468:                        }
469:                    } while (i <= start);
470:                    sortTable(table, tempMapping, mapping, start, i);
471:                    sortTable(table, tempMapping, mapping, i, end);
472:                }
473:            }
474:
475:            private byte compareRows(int[] row1, int[] row2, int[] mapping) {
476:                for (int i = 0; i < numCols; i++) {
477:                    int c1 = (row1[i] == -1) ? -1 : mapping[row1[i]];
478:                    int c2 = (row2[i] == -1) ? -1 : mapping[row2[i]];
479:                    if (c1 < c2)
480:                        return -1;
481:                    else if (c1 > c2)
482:                        return 1;
483:                }
484:                return 0;
485:            }
486:
487:            private int[] buildRowIndex(Vector tempTable) {
488:                int[] tempRowIndex = new int[tempTable.size()];
489:                rowIndexFlagsIndex = new short[tempTable.size()];
490:                Vector tempRowIndexFlags = new Vector();
491:                rowIndexShifts = new byte[tempTable.size()];
492:
493:                // build the row index.  Each entry in the row index starts out referring
494:                // to the original row (so it doesn't actually do any mapping), and we set
495:                // up the index flags to show which cells in the row are populated
496:                for (int i = 0; i < tempTable.size(); i++) {
497:                    tempRowIndex[i] = i;
498:
499:                    int[] row = (int[]) tempTable.elementAt(i);
500:                    if (row[numCols] == 1 && row[0] == 0) {
501:                        int j = 0;
502:                        while (row[j] == 0)
503:                            ++j;
504:                        rowIndexFlagsIndex[i] = (short) (-j);
505:                    } else {
506:                        int[] flags = new int[numColGroups];
507:                        int nextFlag = 1;
508:                        int colGroup = 0;
509:                        for (int j = 0; j < numCols; j++) {
510:                            if (row[j] != 0)
511:                                flags[colGroup] |= nextFlag;
512:                            nextFlag <<= 1;
513:                            if (nextFlag == 0) {
514:                                ++colGroup;
515:                                nextFlag = 1;
516:                            }
517:                        }
518:                        colGroup = 0;
519:                        int j = 0;
520:                        while (j < tempRowIndexFlags.size()) {
521:                            if (((Integer) tempRowIndexFlags.elementAt(j))
522:                                    .intValue() == flags[colGroup]) {
523:                                ++colGroup;
524:                                ++j;
525:                                if (colGroup >= numColGroups)
526:                                    break;
527:                            } else if (colGroup != 0)
528:                                colGroup = 0;
529:                            else
530:                                ++j;
531:                        }
532:                        rowIndexFlagsIndex[i] = (short) (j - colGroup);
533:                        while (colGroup < numColGroups) {
534:                            tempRowIndexFlags.addElement(new Integer(
535:                                    flags[colGroup]));
536:                            ++colGroup;
537:                        }
538:                    }
539:                }
540:                rowIndexFlags = new int[tempRowIndexFlags.size()];
541:                for (int i = 0; i < rowIndexFlags.length; i++)
542:                    rowIndexFlags[i] = ((Integer) tempRowIndexFlags
543:                            .elementAt(i)).intValue();
544:                System.out.println("Number of column groups = " + numColGroups);
545:                System.out.println("Size of rowIndexFlags = "
546:                        + rowIndexFlags.length);
547:
548:                return tempRowIndex;
549:            }
550:
551:            private void stackRows(Vector tempTable) {
552:                /*
553:                 System.out.print("Row:\t");
554:                 for (int i = 0; i < numCols; i++)
555:                 System.out.print(reverseColumnMap[i] + "\t");
556:                 System.out.println();
557:                 for (int i = 0; i < tempTable.size(); i++) {
558:                 System.out.print(Integer.toString(i) + ":\t");
559:                 int[] row = (int[])tempTable.elementAt(i);
560:                 for (int j = 0; j < row.length; j++)
561:                 if (row[j] != 0) System.out.print(Integer.toString(row[j]) + "\t");
562:                 else System.out.print(".\t");
563:                 System.out.println();
564:                 }
565:                 */
566:
567:                int[] tempRowIndex = buildRowIndex(tempTable);
568:                boolean[] tbd = new boolean[tempTable.size()];
569:
570:                // now we actually go through and stack rows together
571:                for (int i = 0; i < tempTable.size(); i++) {
572:                    if (tbd[i])
573:                        continue;
574:                    System.out.print("Stacking, inspecting row " + i + "...\r");
575:                    //System.out.println("Stacking, inspecting row " + i + "...");
576:
577:                    int[] destRow = (int[]) tempTable.elementAt(i);
578:
579:                    boolean[] tempFlags = new boolean[numCols];
580:                    boolean[] filledCells = new boolean[numCols];
581:                    for (int j = 0; j < numCols; j++)
582:                        filledCells[j] = destRow[j] != 0;
583:
584:                    for (int j = i + 1; destRow[numCols] < numCols
585:                            && j < tempTable.size(); j++) {
586:                        if (tbd[j])
587:                            continue;
588:
589:                        int[] srcRow = (int[]) tempTable.elementAt(j);
590:                        if (srcRow[numCols] + destRow[numCols] > numCols)
591:                            continue;
592:
593:                        int maxLeftShift = -999;
594:                        int maxRightShift = 0;
595:                        for (int k = 0; k < numCols; k++) {
596:                            tempFlags[k] = srcRow[k] != 0;
597:                            if (tempFlags[k]) {
598:                                if (maxLeftShift == -999)
599:                                    maxLeftShift = -k;
600:                                maxRightShift = (numCols - 1) - k;
601:                            }
602:                        }
603:
604:                        int shift;
605:                        for (shift = maxLeftShift; shift <= maxRightShift; shift++) {
606:                            int k;
607:                            for (k = 0; k < numCols; k++) {
608:                                if (tempFlags[k] && filledCells[k + shift])
609:                                    break;
610:                            }
611:                            if (k >= numCols)
612:                                break;
613:                        }
614:                        if (shift <= maxRightShift) {
615:                            //System.out.println("Packing row " + j + " into row " + i + " with shift = " + shift);
616:                            for (int k = 0; k < numCols; k++) {
617:                                if (tempFlags[k]) {
618:                                    filledCells[k + shift] = true;
619:                                    destRow[k + shift] = srcRow[k];
620:                                    ++destRow[numCols];
621:                                }
622:                            }
623:                            tbd[j] = true;
624:                            tempRowIndex[j] = i;
625:                            rowIndexShifts[j] = (byte) shift;
626:                        }
627:                    }
628:                }
629:
630:                // finally, we squeeze out all the deleted rows
631:                int decrementBy = 0;
632:                for (int i = 0; i < tempRowIndex.length; i++) {
633:                    if (!tbd[i])
634:                        tempRowIndex[i] -= decrementBy;
635:                    else
636:                        ++decrementBy;
637:                }
638:                rowIndex = new short[tempRowIndex.length];
639:                for (int i = tempRowIndex.length - 1; i >= 0; i--) {
640:                    if (tbd[i]) {
641:                        rowIndex[i] = (short) (tempRowIndex[tempRowIndex[i]]);
642:                        tempTable.removeElementAt(i);
643:                    } else
644:                        rowIndex[i] = (short) tempRowIndex[i];
645:                }
646:            }
647:
648:            private void printTable() {
649:                short cell;
650:                int populatedCells = 0;
651:                /*
652:                 System.out.println("Conceptual table:");
653:                 System.out.println(" Row:   a   b   c   d   e   f   g   h   i   j   k   l   m   n"
654:                 + "   o   p   q   r   s   t   u   v   w   x   y   z   '   #");
655:
656:                 boolean[] rowPrintFlags = new boolean[rowIndex.length];
657:                 printConceptualTable("", 0, rowPrintFlags);
658:                 */
659:
660:                System.out.println();
661:                System.out.println("Conceptual table:");
662:                System.out.print(" Row:");
663:                for (int i = 0; i < reverseColumnMap.length; i++) {
664:                    System.out.print("   " + reverseColumnMap[i]);
665:                }
666:                for (int i = 0; i < rowIndex.length; i++) {
667:                    System.out.println();
668:                    printNumber(i, 4);
669:                    System.out.print(":");
670:                    for (int j = 0; j < numCols; j++)
671:                        printNumber(at(i, j), 4);
672:                }
673:                System.out.println('\n');
674:
675:                System.out.println();
676:                System.out.println("Internally stored table:");
677:                System.out.print(" Row:");
678:                for (int i = 0; i < reverseColumnMap.length; i++) {
679:                    System.out.print("   " + reverseColumnMap[i]);
680:                }
681:                for (int i = 0; i < table.length; i++) {
682:                    if (i % numCols == 0) {
683:                        System.out.println();
684:                        printNumber(i / numCols, 4);
685:                        System.out.print(":");
686:                    }
687:                    cell = table[i];
688:                    if (cell != 0)
689:                        populatedCells++;
690:                    printNumber(cell, 4);
691:                }
692:                System.out.println('\n');
693:
694:                System.out.println("Row index:");
695:                for (int i = 0; i < rowIndex.length; i++) {
696:                    System.out.print("   " + i + " -> " + rowIndex[i]);
697:                    if (rowIndexFlagsIndex[i] < 0)
698:                        System.out
699:                                .print(", flags = "
700:                                        + Integer
701:                                                .toBinaryString((1 << (-rowIndexFlagsIndex[i])))
702:                                        + " (" + rowIndexFlagsIndex[i]);
703:                    else
704:                        System.out
705:                                .print(", flags = "
706:                                        + Integer
707:                                                .toBinaryString(rowIndexFlags[rowIndexFlagsIndex[i]])
708:                                        + " (" + rowIndexFlagsIndex[i]);
709:                    System.out.println("), shift = " + rowIndexShifts[i]);
710:                }
711:                /*
712:                 int theoreticalMinRows = populatedCells / numCols;
713:                 if (populatedCells % numCols != 0)
714:                 theoreticalMinRows++;
715:                 int oneCellRows = 0;
716:                 for (int i = 0; i < rowIndexFlags.length; i++) {
717:                 double temp = Math.log(rowIndexFlags[i]) / Math.log(2);
718:                 if (temp == (int)temp)
719:                 oneCellRows++;
720:                 }
721:
722:                 System.out.println('\n');
723:                 System.out.println("Total words in input = " + totalWords);
724:                 System.out.println("Total unique words = " + uniqueWords + ", comprising " +
725:                 totalUniqueWordChars + " characters\n");
726:                 System.out.println("Number of populated cells = " + populatedCells);
727:                 System.out.println("Total number of cells = " + (table.length));
728:                 System.out.println("Residency = " + ((float)populatedCells / table.length * 100) + '%');
729:                 System.out.println("Ratio of populated cells to unique-word characters = " +
730:                 ((float)populatedCells / totalUniqueWordChars * 100) + '%');
731:                 System.out.println("Ratio of total cells to unique-word characters = " +
732:                 ((float)table.length / totalUniqueWordChars * 100) + '%');
733:                 System.out.println("Number of rows = " + (table.length / numCols));
734:                 System.out.println("Theoretical minimum number of rows = " + theoreticalMinRows);
735:                 System.out.println("Ratio of number of rows to theoretical minimum = " +
736:                 ((float)(table.length / numCols) / theoreticalMinRows * 100) + '%');
737:                 System.out.println("Number of conceptual rows = " + rowIndex.length);
738:                 System.out.println("Conceptual rows with only one populated cell = " + oneCellRows);
739:                 System.out.println("Ratio of one-cell rows to total conceptual rows = " + (((float)oneCellRows)
740:                 / rowIndex.length * 100) + '%');
741:                 System.out.println("Average number of populated cells in multi-cell rows = " +
742:                 ((float)(populatedCells - oneCellRows) / (rowIndex.length - oneCellRows)));
743:
744:                 int storageUsed = table.length * 2 + rowIndex.length * 2
745:                 + rowIndexFlags.length * 4 + rowIndexShifts.length;
746:                 System.out.println("Total number of bytes in table (including indexes) = " +
747:                 storageUsed);
748:                 System.out.println("Bytes of overhead per unique-word character = " + ((double)(storageUsed
749:                 - (totalUniqueWordChars * 2)) / totalUniqueWordChars));
750:                 */
751:            }
752:
753:            private void printConceptualTable(String initialString, int state,
754:                    boolean[] flags) {
755:                if (initialString.length() == 0)
756:                    System.out.println("root:");
757:                else
758:                    System.out.println(initialString + ':');
759:
760:                if (!flags[state]) {
761:                    flags[state] = true;
762:                    printNumber(state, 4);
763:                    System.out.print(":");
764:                    for (int i = 0; i < numCols; i++)
765:                        printNumber(at(state, i), 4);
766:                    System.out.println();
767:                }
768:
769:                int nextState;
770:                for (int i = 0; i < numCols; i++) {
771:                    nextState = at(state, i);
772:                    if (nextState > 0 && !flags[nextState]) {
773:                        printNumber(nextState, 4);
774:                        System.out.print(":");
775:                        for (int j = 0; j < numCols; j++)
776:                            printNumber(at(nextState, j), 4);
777:                        System.out.println();
778:                    }
779:                }
780:                for (int i = 0; i < numCols; i++) {
781:                    nextState = at(state, i);
782:                    if (nextState > 0 && !flags[nextState]) {
783:                        char nextChar;
784:                        if (nextState == 27)
785:                            nextChar = ' ';
786:                        else if (nextState == 26)
787:                            nextChar = '\'';
788:                        else
789:                            nextChar = (char) (i + 'a');
790:                        flags[nextState] = true;
791:                        printConceptualTable(initialString + nextChar,
792:                                nextState, flags);
793:                    }
794:                }
795:            }
796:
797:            private void printWordList(String partialWord, int state,
798:                    PrintWriter out) throws IOException {
799:                if (state == -1) {
800:                    System.out.println(partialWord);
801:                    if (out != null)
802:                        out.println(partialWord);
803:                } else {
804:                    for (int i = 0; i < numCols; i++) {
805:                        if (at(state, i) != 0)
806:                            printWordList(partialWord + reverseColumnMap[i],
807:                                    at(state, i), out);
808:                    }
809:                }
810:            }
811:
812:            private void writeDictionaryFile(DataOutputStream out)
813:                    throws IOException {
814:                out.writeInt(0); // version number
815:
816:                char[] columnMapIndexes = columnMap.getIndexArray();
817:                out.writeInt(columnMapIndexes.length);
818:                for (int i = 0; i < columnMapIndexes.length; i++)
819:                    out.writeShort((short) columnMapIndexes[i]);
820:                byte[] columnMapValues = columnMap.getValueArray();
821:                out.writeInt(columnMapValues.length);
822:                for (int i = 0; i < columnMapValues.length; i++)
823:                    out.writeByte((byte) columnMapValues[i]);
824:
825:                out.writeInt(numCols);
826:                out.writeInt(numColGroups);
827:
828:                out.writeInt(rowIndex.length);
829:                for (int i = 0; i < rowIndex.length; i++)
830:                    out.writeShort(rowIndex[i]);
831:
832:                out.writeInt(rowIndexFlagsIndex.length);
833:                for (int i = 0; i < rowIndexFlagsIndex.length; i++)
834:                    out.writeShort(rowIndexFlagsIndex[i]);
835:                out.writeInt(rowIndexFlags.length);
836:                for (int i = 0; i < rowIndexFlags.length; i++)
837:                    out.writeInt(rowIndexFlags[i]);
838:
839:                out.writeInt(rowIndexShifts.length);
840:                for (int i = 0; i < rowIndexShifts.length; i++)
841:                    out.writeByte(rowIndexShifts[i]);
842:
843:                out.writeInt(table.length);
844:                for (int i = 0; i < table.length; i++)
845:                    out.writeShort(table[i]);
846:
847:                out.close();
848:            }
849:
850:            private void printNumber(int x, int width) {
851:                String s = String.valueOf(x);
852:                if (width > s.length())
853:                    System.out.print(spaces.substring(0, width - s.length()));
854:                if (x != 0)
855:                    System.out.print(s);
856:                else
857:                    System.out.print('.');
858:            }
859:
860:            public final short at(int row, char ch) {
861:                int col = columnMap.elementAt(ch);
862:                return at(row, col);
863:            }
864:
865:            public final short at(int row, int col) {
866:                if (cellIsPopulated(row, col))
867:                    return internalAt(rowIndex[row], col + rowIndexShifts[row]);
868:                else
869:                    return 0;
870:            }
871:
872:            private final boolean cellIsPopulated(int row, int col) {
873:                if (rowIndexFlagsIndex[row] < 0)
874:                    return col == -rowIndexFlagsIndex[row];
875:                else {
876:                    int flags = rowIndexFlags[rowIndexFlagsIndex[row]
877:                            + (col >> 5)];
878:                    return (flags & (1 << (col & 0x1f))) != 0;
879:                }
880:            }
881:
882:            private final short internalAt(int row, int col) {
883:                return table[row * numCols + col];
884:            }
885:
886:            private CompactByteArray columnMap = null;
887:            private char[] reverseColumnMap = null;
888:            private int numCols;
889:            private int numColGroups;
890:            private short[] table = null;
891:            private short[] rowIndex = null;
892:            private int[] rowIndexFlags = null;
893:            private short[] rowIndexFlagsIndex = null;
894:            private byte[] rowIndexShifts = null;
895:
896:            private int totalWords = 0;
897:            private int uniqueWords = 0;
898:            private int totalUniqueWordChars = 0;
899:
900:            private static final String spaces = "      ";
901:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.