Source Code Cross Referenced for BrazilianStemmer.java in  » Net » lucene-connector » org » apache » lucene » analysis » br » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.analysis.br 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        package org.apache.lucene.analysis.br;
0002:
0003:        /**
0004:         * Licensed to the Apache Software Foundation (ASF) under one or more
0005:         * contributor license agreements.  See the NOTICE file distributed with
0006:         * this work for additional information regarding copyright ownership.
0007:         * The ASF licenses this file to You under the Apache License, Version 2.0
0008:         * (the "License"); you may not use this file except in compliance with
0009:         * the License.  You may obtain a copy of the License at
0010:         *
0011:         *     http://www.apache.org/licenses/LICENSE-2.0
0012:         *
0013:         * Unless required by applicable law or agreed to in writing, software
0014:         * distributed under the License is distributed on an "AS IS" BASIS,
0015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016:         * See the License for the specific language governing permissions and
0017:         * limitations under the License.
0018:         */
0019:
0020:        /**
0021:         * A stemmer for Brazilian words.
0022:         */
0023:        public class BrazilianStemmer {
0024:
0025:            /**
0026:             * Changed term
0027:             */
0028:            private String TERM;
0029:            private String CT;
0030:            private String R1;
0031:            private String R2;
0032:            private String RV;
0033:
0034:            public BrazilianStemmer() {
0035:            }
0036:
0037:            /**
0038:             * Stemms the given term to an unique <tt>discriminator</tt>.
0039:             *
0040:             * @param term  The term that should be stemmed.
0041:             * @return      Discriminator for <tt>term</tt>
0042:             */
0043:            protected String stem(String term) {
0044:                boolean altered = false; // altered the term
0045:
0046:                // creates CT
0047:                createCT(term);
0048:
0049:                if (!isIndexable(CT)) {
0050:                    return null;
0051:                }
0052:                if (!isStemmable(CT)) {
0053:                    return CT;
0054:                }
0055:
0056:                R1 = getR1(CT);
0057:                R2 = getR1(R1);
0058:                RV = getRV(CT);
0059:                TERM = term + ";" + CT;
0060:
0061:                altered = step1();
0062:                if (!altered) {
0063:                    altered = step2();
0064:                }
0065:
0066:                if (altered) {
0067:                    step3();
0068:                } else {
0069:                    step4();
0070:                }
0071:
0072:                step5();
0073:
0074:                return CT;
0075:            }
0076:
0077:            /**
0078:             * Checks a term if it can be processed correctly.
0079:             *
0080:             * @return  true if, and only if, the given term consists in letters.
0081:             */
0082:            private boolean isStemmable(String term) {
0083:                for (int c = 0; c < term.length(); c++) {
0084:                    // Discard terms that contain non-letter characters.
0085:                    if (!Character.isLetter(term.charAt(c))) {
0086:                        return false;
0087:                    }
0088:                }
0089:                return true;
0090:            }
0091:
0092:            /**
0093:             * Checks a term if it can be processed indexed.
0094:             *
0095:             * @return  true if it can be indexed
0096:             */
0097:            private boolean isIndexable(String term) {
0098:                return (term.length() < 30) && (term.length() > 2);
0099:            }
0100:
0101:            /**
0102:             * See if string is 'a','e','i','o','u'
0103:             *
0104:             * @return true if is vowel
0105:             */
0106:            private boolean isVowel(char value) {
0107:                return (value == 'a') || (value == 'e') || (value == 'i')
0108:                        || (value == 'o') || (value == 'u');
0109:            }
0110:
0111:            /**
0112:             * Gets R1
0113:             *
0114:             * R1 - is the region after the first non-vowel follwing a vowel,
0115:             *      or is the null region at the end of the word if there is
0116:             *      no such non-vowel.
0117:             *
0118:             * @return null or a string representing R1
0119:             */
0120:            private String getR1(String value) {
0121:                int i;
0122:                int j;
0123:
0124:                // be-safe !!!
0125:                if (value == null) {
0126:                    return null;
0127:                }
0128:
0129:                // find 1st vowel
0130:                i = value.length() - 1;
0131:                for (j = 0; j < i; j++) {
0132:                    if (isVowel(value.charAt(j))) {
0133:                        break;
0134:                    }
0135:                }
0136:
0137:                if (!(j < i)) {
0138:                    return null;
0139:                }
0140:
0141:                // find 1st non-vowel
0142:                for (; j < i; j++) {
0143:                    if (!(isVowel(value.charAt(j)))) {
0144:                        break;
0145:                    }
0146:                }
0147:
0148:                if (!(j < i)) {
0149:                    return null;
0150:                }
0151:
0152:                return value.substring(j + 1);
0153:            }
0154:
0155:            /**
0156:             * Gets RV
0157:             *
0158:             * RV - IF the second letter is a consoant, RV is the region after
0159:             *      the next following vowel,
0160:             *
0161:             *      OR if the first two letters are vowels, RV is the region
0162:             *      after the next consoant,
0163:             *
0164:             *      AND otherwise (consoant-vowel case) RV is the region after
0165:             *      the third letter.
0166:             *
0167:             *      BUT RV is the end of the word if this positions cannot be
0168:             *      found.
0169:             *
0170:             * @return null or a string representing RV
0171:             */
0172:            private String getRV(String value) {
0173:                int i;
0174:                int j;
0175:
0176:                // be-safe !!!
0177:                if (value == null) {
0178:                    return null;
0179:                }
0180:
0181:                i = value.length() - 1;
0182:
0183:                // RV - IF the second letter is a consoant, RV is the region after
0184:                //      the next following vowel,
0185:                if ((i > 0) && !isVowel(value.charAt(1))) {
0186:                    // find 1st vowel
0187:                    for (j = 2; j < i; j++) {
0188:                        if (isVowel(value.charAt(j))) {
0189:                            break;
0190:                        }
0191:                    }
0192:
0193:                    if (j < i) {
0194:                        return value.substring(j + 1);
0195:                    }
0196:                }
0197:
0198:                // RV - OR if the first two letters are vowels, RV is the region
0199:                //      after the next consoant,
0200:                if ((i > 1) && isVowel(value.charAt(0))
0201:                        && isVowel(value.charAt(1))) {
0202:                    // find 1st consoant
0203:                    for (j = 2; j < i; j++) {
0204:                        if (!isVowel(value.charAt(j))) {
0205:                            break;
0206:                        }
0207:                    }
0208:
0209:                    if (j < i) {
0210:                        return value.substring(j + 1);
0211:                    }
0212:                }
0213:
0214:                // RV - AND otherwise (consoant-vowel case) RV is the region after
0215:                //      the third letter.
0216:                if (i > 2) {
0217:                    return value.substring(3);
0218:                }
0219:
0220:                return null;
0221:            }
0222:
0223:            /**
0224:             * 1) Turn to lowercase
0225:             * 2) Remove accents
0226:             * 3) ã -> a ; õ -> o
0227:             * 4) ç -> c
0228:             *
0229:             * @return null or a string transformed
0230:             */
0231:            private String changeTerm( String value ) {
0232:    int     j;
0233:    String  r = "" ;
0234:
0235:    // be-safe !!!
0236:    if (value == null) {
0237:      return null ;
0238:    }
0239:
0240:    value = value.toLowerCase() ;
0241:    for (j=0 ; j < value.length() ; j++) {
0242:      if ((value.charAt(j) == 'á') ||
0243:          (value.charAt(j) == 'â') ||
0244:          (value.charAt(j) == 'ã')) {
0245:        r= r + "a" ; continue ;
0246:      }
0247:      if ((value.charAt(j) == 'é') ||
0248:          (value.charAt(j) == 'ê')) {
0249:        r= r + "e" ; continue ;
0250:      }
0251:      if (value.charAt(j) == 'í') {
0252:        r= r + "i" ; continue ;
0253:      }
0254:      if ((value.charAt(j) == 'ó') ||
0255:          (value.charAt(j) == 'ô') ||
0256:          (value.charAt(j) == 'õ')) {
0257:        r= r + "o" ; continue ;
0258:      }
0259:      if ((value.charAt(j) == 'ú') ||
0260:          (value.charAt(j) == 'ü')) {
0261:        r= r + "u" ; continue ;
0262:      }
0263:      if (value.charAt(j) == 'ç') {
0264:        r= r + "c" ; continue ;
0265:      }
0266:      if (value.charAt(j) == 'ñ') {
0267:        r= r + "n" ; continue ;
0268:      }
0269:
0270:      r= r+ value.charAt(j) ;
0271:    }
0272:
0273:    return r ;
0274:  }
0275:
0276:            /**
0277:             * Check if a string ends with a suffix
0278:             *
0279:             * @return true if the string ends with the specified suffix
0280:             */
0281:            private boolean suffix(String value, String suffix) {
0282:
0283:                // be-safe !!!
0284:                if ((value == null) || (suffix == null)) {
0285:                    return false;
0286:                }
0287:
0288:                if (suffix.length() > value.length()) {
0289:                    return false;
0290:                }
0291:
0292:                return value.substring(value.length() - suffix.length())
0293:                        .equals(suffix);
0294:            }
0295:
0296:            /**
0297:             * Replace a string suffix by another
0298:             *
0299:             * @return the replaced String
0300:             */
0301:            private String replaceSuffix(String value, String toReplace,
0302:                    String changeTo) {
0303:                String vvalue;
0304:
0305:                // be-safe !!!
0306:                if ((value == null) || (toReplace == null)
0307:                        || (changeTo == null)) {
0308:                    return value;
0309:                }
0310:
0311:                vvalue = removeSuffix(value, toReplace);
0312:
0313:                if (value.equals(vvalue)) {
0314:                    return value;
0315:                } else {
0316:                    return vvalue + changeTo;
0317:                }
0318:            }
0319:
0320:            /**
0321:             * Remove a string suffix
0322:             *
0323:             * @return the String without the suffix
0324:             */
0325:            private String removeSuffix(String value, String toRemove) {
0326:                // be-safe !!!
0327:                if ((value == null) || (toRemove == null)
0328:                        || !suffix(value, toRemove)) {
0329:                    return value;
0330:                }
0331:
0332:                return value.substring(0, value.length() - toRemove.length());
0333:            }
0334:
0335:            /**
0336:             * See if a suffix is preceded by a String
0337:             *
0338:             * @return true if the suffix is preceded
0339:             */
0340:            private boolean suffixPreceded(String value, String suffix,
0341:                    String preceded) {
0342:                // be-safe !!!
0343:                if ((value == null) || (suffix == null) || (preceded == null)
0344:                        || !suffix(value, suffix)) {
0345:                    return false;
0346:                }
0347:
0348:                return suffix(removeSuffix(value, suffix), preceded);
0349:            }
0350:
0351:            /**
0352:             * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
0353:             */
0354:            private void createCT(String term) {
0355:                CT = changeTerm(term);
0356:
0357:                if (CT.length() < 2)
0358:                    return;
0359:
0360:                // if the first character is ... , remove it
0361:                if ((CT.charAt(0) == '"') || (CT.charAt(0) == '\'')
0362:                        || (CT.charAt(0) == '-') || (CT.charAt(0) == ',')
0363:                        || (CT.charAt(0) == ';') || (CT.charAt(0) == '.')
0364:                        || (CT.charAt(0) == '?') || (CT.charAt(0) == '!')) {
0365:                    CT = CT.substring(1);
0366:                }
0367:
0368:                if (CT.length() < 2)
0369:                    return;
0370:
0371:                // if the last character is ... , remove it
0372:                if ((CT.charAt(CT.length() - 1) == '-')
0373:                        || (CT.charAt(CT.length() - 1) == ',')
0374:                        || (CT.charAt(CT.length() - 1) == ';')
0375:                        || (CT.charAt(CT.length() - 1) == '.')
0376:                        || (CT.charAt(CT.length() - 1) == '?')
0377:                        || (CT.charAt(CT.length() - 1) == '!')
0378:                        || (CT.charAt(CT.length() - 1) == '\'')
0379:                        || (CT.charAt(CT.length() - 1) == '"')) {
0380:                    CT = CT.substring(0, CT.length() - 1);
0381:                }
0382:            }
0383:
0384:            /**
0385:             * Standart suffix removal.
0386:             * Search for the longest among the following suffixes, and perform
0387:             * the following actions:
0388:             *
0389:             * @return false if no ending was removed
0390:             */
0391:            private boolean step1() {
0392:                if (CT == null)
0393:                    return false;
0394:
0395:                // suffix lenght = 7
0396:                if (suffix(CT, "uciones") && suffix(R2, "uciones")) {
0397:                    CT = replaceSuffix(CT, "uciones", "u");
0398:                    return true;
0399:                }
0400:
0401:                // suffix lenght = 6
0402:                if (CT.length() >= 6) {
0403:                    if (suffix(CT, "imentos") && suffix(R2, "imentos")) {
0404:                        CT = removeSuffix(CT, "imentos");
0405:                        return true;
0406:                    }
0407:                    if (suffix(CT, "amentos") && suffix(R2, "amentos")) {
0408:                        CT = removeSuffix(CT, "amentos");
0409:                        return true;
0410:                    }
0411:                    if (suffix(CT, "adores") && suffix(R2, "adores")) {
0412:                        CT = removeSuffix(CT, "adores");
0413:                        return true;
0414:                    }
0415:                    if (suffix(CT, "adoras") && suffix(R2, "adoras")) {
0416:                        CT = removeSuffix(CT, "adoras");
0417:                        return true;
0418:                    }
0419:                    if (suffix(CT, "logias") && suffix(R2, "logias")) {
0420:                        replaceSuffix(CT, "logias", "log");
0421:                        return true;
0422:                    }
0423:                    if (suffix(CT, "encias") && suffix(R2, "encias")) {
0424:                        CT = replaceSuffix(CT, "encias", "ente");
0425:                        return true;
0426:                    }
0427:                    if (suffix(CT, "amente") && suffix(R1, "amente")) {
0428:                        CT = removeSuffix(CT, "amente");
0429:                        return true;
0430:                    }
0431:                    if (suffix(CT, "idades") && suffix(R2, "idades")) {
0432:                        CT = removeSuffix(CT, "idades");
0433:                        return true;
0434:                    }
0435:                }
0436:
0437:                // suffix lenght = 5
0438:                if (CT.length() >= 5) {
0439:                    if (suffix(CT, "acoes") && suffix(R2, "acoes")) {
0440:                        CT = removeSuffix(CT, "acoes");
0441:                        return true;
0442:                    }
0443:                    if (suffix(CT, "imento") && suffix(R2, "imento")) {
0444:                        CT = removeSuffix(CT, "imento");
0445:                        return true;
0446:                    }
0447:                    if (suffix(CT, "amento") && suffix(R2, "amento")) {
0448:                        CT = removeSuffix(CT, "amento");
0449:                        return true;
0450:                    }
0451:                    if (suffix(CT, "adora") && suffix(R2, "adora")) {
0452:                        CT = removeSuffix(CT, "adora");
0453:                        return true;
0454:                    }
0455:                    if (suffix(CT, "ismos") && suffix(R2, "ismos")) {
0456:                        CT = removeSuffix(CT, "ismos");
0457:                        return true;
0458:                    }
0459:                    if (suffix(CT, "istas") && suffix(R2, "istas")) {
0460:                        CT = removeSuffix(CT, "istas");
0461:                        return true;
0462:                    }
0463:                    if (suffix(CT, "logia") && suffix(R2, "logia")) {
0464:                        CT = replaceSuffix(CT, "logia", "log");
0465:                        return true;
0466:                    }
0467:                    if (suffix(CT, "ucion") && suffix(R2, "ucion")) {
0468:                        CT = replaceSuffix(CT, "ucion", "u");
0469:                        return true;
0470:                    }
0471:                    if (suffix(CT, "encia") && suffix(R2, "encia")) {
0472:                        CT = replaceSuffix(CT, "encia", "ente");
0473:                        return true;
0474:                    }
0475:                    if (suffix(CT, "mente") && suffix(R2, "mente")) {
0476:                        CT = removeSuffix(CT, "mente");
0477:                        return true;
0478:                    }
0479:                    if (suffix(CT, "idade") && suffix(R2, "idade")) {
0480:                        CT = removeSuffix(CT, "idade");
0481:                        return true;
0482:                    }
0483:                }
0484:
0485:                // suffix lenght = 4
0486:                if (CT.length() >= 4) {
0487:                    if (suffix(CT, "acao") && suffix(R2, "acao")) {
0488:                        CT = removeSuffix(CT, "acao");
0489:                        return true;
0490:                    }
0491:                    if (suffix(CT, "ezas") && suffix(R2, "ezas")) {
0492:                        CT = removeSuffix(CT, "ezas");
0493:                        return true;
0494:                    }
0495:                    if (suffix(CT, "icos") && suffix(R2, "icos")) {
0496:                        CT = removeSuffix(CT, "icos");
0497:                        return true;
0498:                    }
0499:                    if (suffix(CT, "icas") && suffix(R2, "icas")) {
0500:                        CT = removeSuffix(CT, "icas");
0501:                        return true;
0502:                    }
0503:                    if (suffix(CT, "ismo") && suffix(R2, "ismo")) {
0504:                        CT = removeSuffix(CT, "ismo");
0505:                        return true;
0506:                    }
0507:                    if (suffix(CT, "avel") && suffix(R2, "avel")) {
0508:                        CT = removeSuffix(CT, "avel");
0509:                        return true;
0510:                    }
0511:                    if (suffix(CT, "ivel") && suffix(R2, "ivel")) {
0512:                        CT = removeSuffix(CT, "ivel");
0513:                        return true;
0514:                    }
0515:                    if (suffix(CT, "ista") && suffix(R2, "ista")) {
0516:                        CT = removeSuffix(CT, "ista");
0517:                        return true;
0518:                    }
0519:                    if (suffix(CT, "osos") && suffix(R2, "osos")) {
0520:                        CT = removeSuffix(CT, "osos");
0521:                        return true;
0522:                    }
0523:                    if (suffix(CT, "osas") && suffix(R2, "osas")) {
0524:                        CT = removeSuffix(CT, "osas");
0525:                        return true;
0526:                    }
0527:                    if (suffix(CT, "ador") && suffix(R2, "ador")) {
0528:                        CT = removeSuffix(CT, "ador");
0529:                        return true;
0530:                    }
0531:                    if (suffix(CT, "ivas") && suffix(R2, "ivas")) {
0532:                        CT = removeSuffix(CT, "ivas");
0533:                        return true;
0534:                    }
0535:                    if (suffix(CT, "ivos") && suffix(R2, "ivos")) {
0536:                        CT = removeSuffix(CT, "ivos");
0537:                        return true;
0538:                    }
0539:                    if (suffix(CT, "iras") && suffix(RV, "iras")
0540:                            && suffixPreceded(CT, "iras", "e")) {
0541:                        CT = replaceSuffix(CT, "iras", "ir");
0542:                        return true;
0543:                    }
0544:                }
0545:
0546:                // suffix lenght = 3
0547:                if (CT.length() >= 3) {
0548:                    if (suffix(CT, "eza") && suffix(R2, "eza")) {
0549:                        CT = removeSuffix(CT, "eza");
0550:                        return true;
0551:                    }
0552:                    if (suffix(CT, "ico") && suffix(R2, "ico")) {
0553:                        CT = removeSuffix(CT, "ico");
0554:                        return true;
0555:                    }
0556:                    if (suffix(CT, "ica") && suffix(R2, "ica")) {
0557:                        CT = removeSuffix(CT, "ica");
0558:                        return true;
0559:                    }
0560:                    if (suffix(CT, "oso") && suffix(R2, "oso")) {
0561:                        CT = removeSuffix(CT, "oso");
0562:                        return true;
0563:                    }
0564:                    if (suffix(CT, "osa") && suffix(R2, "osa")) {
0565:                        CT = removeSuffix(CT, "osa");
0566:                        return true;
0567:                    }
0568:                    if (suffix(CT, "iva") && suffix(R2, "iva")) {
0569:                        CT = removeSuffix(CT, "iva");
0570:                        return true;
0571:                    }
0572:                    if (suffix(CT, "ivo") && suffix(R2, "ivo")) {
0573:                        CT = removeSuffix(CT, "ivo");
0574:                        return true;
0575:                    }
0576:                    if (suffix(CT, "ira") && suffix(RV, "ira")
0577:                            && suffixPreceded(CT, "ira", "e")) {
0578:                        CT = replaceSuffix(CT, "ira", "ir");
0579:                        return true;
0580:                    }
0581:                }
0582:
0583:                // no ending was removed by step1
0584:                return false;
0585:            }
0586:
0587:            /**
0588:             * Verb suffixes.
0589:             *
0590:             * Search for the longest among the following suffixes in RV,
0591:             * and if found, delete.
0592:             *
0593:             * @return false if no ending was removed
0594:             */
0595:            private boolean step2() {
0596:                if (RV == null)
0597:                    return false;
0598:
0599:                // suffix lenght = 7
0600:                if (RV.length() >= 7) {
0601:                    if (suffix(RV, "issemos")) {
0602:                        CT = removeSuffix(CT, "issemos");
0603:                        return true;
0604:                    }
0605:                    if (suffix(RV, "essemos")) {
0606:                        CT = removeSuffix(CT, "essemos");
0607:                        return true;
0608:                    }
0609:                    if (suffix(RV, "assemos")) {
0610:                        CT = removeSuffix(CT, "assemos");
0611:                        return true;
0612:                    }
0613:                    if (suffix(RV, "ariamos")) {
0614:                        CT = removeSuffix(CT, "ariamos");
0615:                        return true;
0616:                    }
0617:                    if (suffix(RV, "eriamos")) {
0618:                        CT = removeSuffix(CT, "eriamos");
0619:                        return true;
0620:                    }
0621:                    if (suffix(RV, "iriamos")) {
0622:                        CT = removeSuffix(CT, "iriamos");
0623:                        return true;
0624:                    }
0625:                }
0626:
0627:                // suffix lenght = 6
0628:                if (RV.length() >= 6) {
0629:                    if (suffix(RV, "iremos")) {
0630:                        CT = removeSuffix(CT, "iremos");
0631:                        return true;
0632:                    }
0633:                    if (suffix(RV, "eremos")) {
0634:                        CT = removeSuffix(CT, "eremos");
0635:                        return true;
0636:                    }
0637:                    if (suffix(RV, "aremos")) {
0638:                        CT = removeSuffix(CT, "aremos");
0639:                        return true;
0640:                    }
0641:                    if (suffix(RV, "avamos")) {
0642:                        CT = removeSuffix(CT, "avamos");
0643:                        return true;
0644:                    }
0645:                    if (suffix(RV, "iramos")) {
0646:                        CT = removeSuffix(CT, "iramos");
0647:                        return true;
0648:                    }
0649:                    if (suffix(RV, "eramos")) {
0650:                        CT = removeSuffix(CT, "eramos");
0651:                        return true;
0652:                    }
0653:                    if (suffix(RV, "aramos")) {
0654:                        CT = removeSuffix(CT, "aramos");
0655:                        return true;
0656:                    }
0657:                    if (suffix(RV, "asseis")) {
0658:                        CT = removeSuffix(CT, "asseis");
0659:                        return true;
0660:                    }
0661:                    if (suffix(RV, "esseis")) {
0662:                        CT = removeSuffix(CT, "esseis");
0663:                        return true;
0664:                    }
0665:                    if (suffix(RV, "isseis")) {
0666:                        CT = removeSuffix(CT, "isseis");
0667:                        return true;
0668:                    }
0669:                    if (suffix(RV, "arieis")) {
0670:                        CT = removeSuffix(CT, "arieis");
0671:                        return true;
0672:                    }
0673:                    if (suffix(RV, "erieis")) {
0674:                        CT = removeSuffix(CT, "erieis");
0675:                        return true;
0676:                    }
0677:                    if (suffix(RV, "irieis")) {
0678:                        CT = removeSuffix(CT, "irieis");
0679:                        return true;
0680:                    }
0681:                }
0682:
0683:                // suffix lenght = 5
0684:                if (RV.length() >= 5) {
0685:                    if (suffix(RV, "irmos")) {
0686:                        CT = removeSuffix(CT, "irmos");
0687:                        return true;
0688:                    }
0689:                    if (suffix(RV, "iamos")) {
0690:                        CT = removeSuffix(CT, "iamos");
0691:                        return true;
0692:                    }
0693:                    if (suffix(RV, "armos")) {
0694:                        CT = removeSuffix(CT, "armos");
0695:                        return true;
0696:                    }
0697:                    if (suffix(RV, "ermos")) {
0698:                        CT = removeSuffix(CT, "ermos");
0699:                        return true;
0700:                    }
0701:                    if (suffix(RV, "areis")) {
0702:                        CT = removeSuffix(CT, "areis");
0703:                        return true;
0704:                    }
0705:                    if (suffix(RV, "ereis")) {
0706:                        CT = removeSuffix(CT, "ereis");
0707:                        return true;
0708:                    }
0709:                    if (suffix(RV, "ireis")) {
0710:                        CT = removeSuffix(CT, "ireis");
0711:                        return true;
0712:                    }
0713:                    if (suffix(RV, "asses")) {
0714:                        CT = removeSuffix(CT, "asses");
0715:                        return true;
0716:                    }
0717:                    if (suffix(RV, "esses")) {
0718:                        CT = removeSuffix(CT, "esses");
0719:                        return true;
0720:                    }
0721:                    if (suffix(RV, "isses")) {
0722:                        CT = removeSuffix(CT, "isses");
0723:                        return true;
0724:                    }
0725:                    if (suffix(RV, "astes")) {
0726:                        CT = removeSuffix(CT, "astes");
0727:                        return true;
0728:                    }
0729:                    if (suffix(RV, "assem")) {
0730:                        CT = removeSuffix(CT, "assem");
0731:                        return true;
0732:                    }
0733:                    if (suffix(RV, "essem")) {
0734:                        CT = removeSuffix(CT, "essem");
0735:                        return true;
0736:                    }
0737:                    if (suffix(RV, "issem")) {
0738:                        CT = removeSuffix(CT, "issem");
0739:                        return true;
0740:                    }
0741:                    if (suffix(RV, "ardes")) {
0742:                        CT = removeSuffix(CT, "ardes");
0743:                        return true;
0744:                    }
0745:                    if (suffix(RV, "erdes")) {
0746:                        CT = removeSuffix(CT, "erdes");
0747:                        return true;
0748:                    }
0749:                    if (suffix(RV, "irdes")) {
0750:                        CT = removeSuffix(CT, "irdes");
0751:                        return true;
0752:                    }
0753:                    if (suffix(RV, "ariam")) {
0754:                        CT = removeSuffix(CT, "ariam");
0755:                        return true;
0756:                    }
0757:                    if (suffix(RV, "eriam")) {
0758:                        CT = removeSuffix(CT, "eriam");
0759:                        return true;
0760:                    }
0761:                    if (suffix(RV, "iriam")) {
0762:                        CT = removeSuffix(CT, "iriam");
0763:                        return true;
0764:                    }
0765:                    if (suffix(RV, "arias")) {
0766:                        CT = removeSuffix(CT, "arias");
0767:                        return true;
0768:                    }
0769:                    if (suffix(RV, "erias")) {
0770:                        CT = removeSuffix(CT, "erias");
0771:                        return true;
0772:                    }
0773:                    if (suffix(RV, "irias")) {
0774:                        CT = removeSuffix(CT, "irias");
0775:                        return true;
0776:                    }
0777:                    if (suffix(RV, "estes")) {
0778:                        CT = removeSuffix(CT, "estes");
0779:                        return true;
0780:                    }
0781:                    if (suffix(RV, "istes")) {
0782:                        CT = removeSuffix(CT, "istes");
0783:                        return true;
0784:                    }
0785:                    if (suffix(RV, "areis")) {
0786:                        CT = removeSuffix(CT, "areis");
0787:                        return true;
0788:                    }
0789:                    if (suffix(RV, "aveis")) {
0790:                        CT = removeSuffix(CT, "aveis");
0791:                        return true;
0792:                    }
0793:                }
0794:
0795:                // suffix lenght = 4
0796:                if (RV.length() >= 4) {
0797:                    if (suffix(RV, "aria")) {
0798:                        CT = removeSuffix(CT, "aria");
0799:                        return true;
0800:                    }
0801:                    if (suffix(RV, "eria")) {
0802:                        CT = removeSuffix(CT, "eria");
0803:                        return true;
0804:                    }
0805:                    if (suffix(RV, "iria")) {
0806:                        CT = removeSuffix(CT, "iria");
0807:                        return true;
0808:                    }
0809:                    if (suffix(RV, "asse")) {
0810:                        CT = removeSuffix(CT, "asse");
0811:                        return true;
0812:                    }
0813:                    if (suffix(RV, "esse")) {
0814:                        CT = removeSuffix(CT, "esse");
0815:                        return true;
0816:                    }
0817:                    if (suffix(RV, "isse")) {
0818:                        CT = removeSuffix(CT, "isse");
0819:                        return true;
0820:                    }
0821:                    if (suffix(RV, "aste")) {
0822:                        CT = removeSuffix(CT, "aste");
0823:                        return true;
0824:                    }
0825:                    if (suffix(RV, "este")) {
0826:                        CT = removeSuffix(CT, "este");
0827:                        return true;
0828:                    }
0829:                    if (suffix(RV, "iste")) {
0830:                        CT = removeSuffix(CT, "iste");
0831:                        return true;
0832:                    }
0833:                    if (suffix(RV, "arei")) {
0834:                        CT = removeSuffix(CT, "arei");
0835:                        return true;
0836:                    }
0837:                    if (suffix(RV, "erei")) {
0838:                        CT = removeSuffix(CT, "erei");
0839:                        return true;
0840:                    }
0841:                    if (suffix(RV, "irei")) {
0842:                        CT = removeSuffix(CT, "irei");
0843:                        return true;
0844:                    }
0845:                    if (suffix(RV, "aram")) {
0846:                        CT = removeSuffix(CT, "aram");
0847:                        return true;
0848:                    }
0849:                    if (suffix(RV, "eram")) {
0850:                        CT = removeSuffix(CT, "eram");
0851:                        return true;
0852:                    }
0853:                    if (suffix(RV, "iram")) {
0854:                        CT = removeSuffix(CT, "iram");
0855:                        return true;
0856:                    }
0857:                    if (suffix(RV, "avam")) {
0858:                        CT = removeSuffix(CT, "avam");
0859:                        return true;
0860:                    }
0861:                    if (suffix(RV, "arem")) {
0862:                        CT = removeSuffix(CT, "arem");
0863:                        return true;
0864:                    }
0865:                    if (suffix(RV, "erem")) {
0866:                        CT = removeSuffix(CT, "erem");
0867:                        return true;
0868:                    }
0869:                    if (suffix(RV, "irem")) {
0870:                        CT = removeSuffix(CT, "irem");
0871:                        return true;
0872:                    }
0873:                    if (suffix(RV, "ando")) {
0874:                        CT = removeSuffix(CT, "ando");
0875:                        return true;
0876:                    }
0877:                    if (suffix(RV, "endo")) {
0878:                        CT = removeSuffix(CT, "endo");
0879:                        return true;
0880:                    }
0881:                    if (suffix(RV, "indo")) {
0882:                        CT = removeSuffix(CT, "indo");
0883:                        return true;
0884:                    }
0885:                    if (suffix(RV, "arao")) {
0886:                        CT = removeSuffix(CT, "arao");
0887:                        return true;
0888:                    }
0889:                    if (suffix(RV, "erao")) {
0890:                        CT = removeSuffix(CT, "erao");
0891:                        return true;
0892:                    }
0893:                    if (suffix(RV, "irao")) {
0894:                        CT = removeSuffix(CT, "irao");
0895:                        return true;
0896:                    }
0897:                    if (suffix(RV, "adas")) {
0898:                        CT = removeSuffix(CT, "adas");
0899:                        return true;
0900:                    }
0901:                    if (suffix(RV, "idas")) {
0902:                        CT = removeSuffix(CT, "idas");
0903:                        return true;
0904:                    }
0905:                    if (suffix(RV, "aras")) {
0906:                        CT = removeSuffix(CT, "aras");
0907:                        return true;
0908:                    }
0909:                    if (suffix(RV, "eras")) {
0910:                        CT = removeSuffix(CT, "eras");
0911:                        return true;
0912:                    }
0913:                    if (suffix(RV, "iras")) {
0914:                        CT = removeSuffix(CT, "iras");
0915:                        return true;
0916:                    }
0917:                    if (suffix(RV, "avas")) {
0918:                        CT = removeSuffix(CT, "avas");
0919:                        return true;
0920:                    }
0921:                    if (suffix(RV, "ares")) {
0922:                        CT = removeSuffix(CT, "ares");
0923:                        return true;
0924:                    }
0925:                    if (suffix(RV, "eres")) {
0926:                        CT = removeSuffix(CT, "eres");
0927:                        return true;
0928:                    }
0929:                    if (suffix(RV, "ires")) {
0930:                        CT = removeSuffix(CT, "ires");
0931:                        return true;
0932:                    }
0933:                    if (suffix(RV, "ados")) {
0934:                        CT = removeSuffix(CT, "ados");
0935:                        return true;
0936:                    }
0937:                    if (suffix(RV, "idos")) {
0938:                        CT = removeSuffix(CT, "idos");
0939:                        return true;
0940:                    }
0941:                    if (suffix(RV, "amos")) {
0942:                        CT = removeSuffix(CT, "amos");
0943:                        return true;
0944:                    }
0945:                    if (suffix(RV, "emos")) {
0946:                        CT = removeSuffix(CT, "emos");
0947:                        return true;
0948:                    }
0949:                    if (suffix(RV, "imos")) {
0950:                        CT = removeSuffix(CT, "imos");
0951:                        return true;
0952:                    }
0953:                    if (suffix(RV, "iras")) {
0954:                        CT = removeSuffix(CT, "iras");
0955:                        return true;
0956:                    }
0957:                    if (suffix(RV, "ieis")) {
0958:                        CT = removeSuffix(CT, "ieis");
0959:                        return true;
0960:                    }
0961:                }
0962:
0963:                // suffix lenght = 3
0964:                if (RV.length() >= 3) {
0965:                    if (suffix(RV, "ada")) {
0966:                        CT = removeSuffix(CT, "ada");
0967:                        return true;
0968:                    }
0969:                    if (suffix(RV, "ida")) {
0970:                        CT = removeSuffix(CT, "ida");
0971:                        return true;
0972:                    }
0973:                    if (suffix(RV, "ara")) {
0974:                        CT = removeSuffix(CT, "ara");
0975:                        return true;
0976:                    }
0977:                    if (suffix(RV, "era")) {
0978:                        CT = removeSuffix(CT, "era");
0979:                        return true;
0980:                    }
0981:                    if (suffix(RV, "ira")) {
0982:                        CT = removeSuffix(CT, "ava");
0983:                        return true;
0984:                    }
0985:                    if (suffix(RV, "iam")) {
0986:                        CT = removeSuffix(CT, "iam");
0987:                        return true;
0988:                    }
0989:                    if (suffix(RV, "ado")) {
0990:                        CT = removeSuffix(CT, "ado");
0991:                        return true;
0992:                    }
0993:                    if (suffix(RV, "ido")) {
0994:                        CT = removeSuffix(CT, "ido");
0995:                        return true;
0996:                    }
0997:                    if (suffix(RV, "ias")) {
0998:                        CT = removeSuffix(CT, "ias");
0999:                        return true;
1000:                    }
1001:                    if (suffix(RV, "ais")) {
1002:                        CT = removeSuffix(CT, "ais");
1003:                        return true;
1004:                    }
1005:                    if (suffix(RV, "eis")) {
1006:                        CT = removeSuffix(CT, "eis");
1007:                        return true;
1008:                    }
1009:                    if (suffix(RV, "ira")) {
1010:                        CT = removeSuffix(CT, "ira");
1011:                        return true;
1012:                    }
1013:                    if (suffix(RV, "ear")) {
1014:                        CT = removeSuffix(CT, "ear");
1015:                        return true;
1016:                    }
1017:                }
1018:
1019:                // suffix lenght = 2
1020:                if (RV.length() >= 2) {
1021:                    if (suffix(RV, "ia")) {
1022:                        CT = removeSuffix(CT, "ia");
1023:                        return true;
1024:                    }
1025:                    if (suffix(RV, "ei")) {
1026:                        CT = removeSuffix(CT, "ei");
1027:                        return true;
1028:                    }
1029:                    if (suffix(RV, "am")) {
1030:                        CT = removeSuffix(CT, "am");
1031:                        return true;
1032:                    }
1033:                    if (suffix(RV, "em")) {
1034:                        CT = removeSuffix(CT, "em");
1035:                        return true;
1036:                    }
1037:                    if (suffix(RV, "ar")) {
1038:                        CT = removeSuffix(CT, "ar");
1039:                        return true;
1040:                    }
1041:                    if (suffix(RV, "er")) {
1042:                        CT = removeSuffix(CT, "er");
1043:                        return true;
1044:                    }
1045:                    if (suffix(RV, "ir")) {
1046:                        CT = removeSuffix(CT, "ir");
1047:                        return true;
1048:                    }
1049:                    if (suffix(RV, "as")) {
1050:                        CT = removeSuffix(CT, "as");
1051:                        return true;
1052:                    }
1053:                    if (suffix(RV, "es")) {
1054:                        CT = removeSuffix(CT, "es");
1055:                        return true;
1056:                    }
1057:                    if (suffix(RV, "is")) {
1058:                        CT = removeSuffix(CT, "is");
1059:                        return true;
1060:                    }
1061:                    if (suffix(RV, "eu")) {
1062:                        CT = removeSuffix(CT, "eu");
1063:                        return true;
1064:                    }
1065:                    if (suffix(RV, "iu")) {
1066:                        CT = removeSuffix(CT, "iu");
1067:                        return true;
1068:                    }
1069:                    if (suffix(RV, "iu")) {
1070:                        CT = removeSuffix(CT, "iu");
1071:                        return true;
1072:                    }
1073:                    if (suffix(RV, "ou")) {
1074:                        CT = removeSuffix(CT, "ou");
1075:                        return true;
1076:                    }
1077:                }
1078:
1079:                // no ending was removed by step2
1080:                return false;
1081:            }
1082:
1083:            /**
1084:             * Delete suffix 'i' if in RV and preceded by 'c'
1085:             *
1086:             */
1087:            private void step3() {
1088:                if (RV == null)
1089:                    return;
1090:
1091:                if (suffix(RV, "i") && suffixPreceded(RV, "i", "c")) {
1092:                    CT = removeSuffix(CT, "i");
1093:                }
1094:
1095:            }
1096:
1097:            /**
1098:             * Residual suffix
1099:             *
1100:             * If the word ends with one of the suffixes (os a i o á í ó)
1101:             * in RV, delete it
1102:             *
1103:             */
1104:            private void step4() {
1105:                if (RV == null)
1106:                    return;
1107:
1108:                if (suffix(RV, "os")) {
1109:                    CT = removeSuffix(CT, "os");
1110:                    return;
1111:                }
1112:                if (suffix(RV, "a")) {
1113:                    CT = removeSuffix(CT, "a");
1114:                    return;
1115:                }
1116:                if (suffix(RV, "i")) {
1117:                    CT = removeSuffix(CT, "i");
1118:                    return;
1119:                }
1120:                if (suffix(RV, "o")) {
1121:                    CT = removeSuffix(CT, "o");
1122:                    return;
1123:                }
1124:
1125:            }
1126:
1127:            /**
1128:             * If the word ends with one of ( e é ê) in RV,delete it,
1129:             * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
1130:             * delete the 'u' (or 'i')
1131:             *
1132:             * Or if the word ends ç remove the cedilha
1133:             *
1134:             */
1135:            private void step5() {
1136:                if (RV == null)
1137:                    return;
1138:
1139:                if (suffix(RV, "e")) {
1140:                    if (suffixPreceded(RV, "e", "gu")) {
1141:                        CT = removeSuffix(CT, "e");
1142:                        CT = removeSuffix(CT, "u");
1143:                        return;
1144:                    }
1145:
1146:                    if (suffixPreceded(RV, "e", "ci")) {
1147:                        CT = removeSuffix(CT, "e");
1148:                        CT = removeSuffix(CT, "i");
1149:                        return;
1150:                    }
1151:
1152:                    CT = removeSuffix(CT, "e");
1153:                    return;
1154:                }
1155:            }
1156:
1157:            /**
1158:             * For log and debug purpose
1159:             *
1160:             * @return  TERM, CT, RV, R1 and R2
1161:             */
1162:            public String log() {
1163:                return " (TERM = " + TERM + ")" + " (CT = " + CT + ")"
1164:                        + " (RV = " + RV + ")" + " (R1 = " + R1 + ")"
1165:                        + " (R2 = " + R2 + ")";
1166:            }
1167:
1168:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.