Source Code Cross Referenced for PatternMatcher.java in  » Parser » JTopas » de » susebox » jtopas » impl » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Parser » JTopas » de.susebox.jtopas.impl 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * PatternMatcher.java: Interface for pattern-aware tokenizers.
003:         *
004:         * Copyright (C) 2003 Heiko Blau
005:         *
006:         * This file belongs to the JTopas Library.
007:         * JTopas is free software; you can redistribute it and/or modify it 
008:         * under the terms of the GNU Lesser General Public License as published by the 
009:         * Free Software Foundation; either version 2.1 of the License, or (at your 
010:         * option) any later version.
011:         *
012:         * This software is distributed in the hope that it will be useful, but WITHOUT
013:         * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
014:         * FITNESS FOR A PARTICULAR PURPOSE. 
015:         * See the GNU Lesser General Public License for more details.
016:         *
017:         * You should have received a copy of the GNU Lesser General Public License along
018:         * with JTopas. If not, write to the
019:         *
020:         *   Free Software Foundation, Inc.
021:         *   59 Temple Place, Suite 330, 
022:         *   Boston, MA 02111-1307 
023:         *   USA
024:         *
025:         * or check the Internet: http://www.fsf.org
026:         *
027:         * Contact:
028:         *   email: heiko@susebox.de 
029:         */
030:
031:        package de.susebox.jtopas.impl;
032:
033:        //-----------------------------------------------------------------------------
034:        // Imports
035:        //
036:        import java.util.regex.Pattern;
037:        import java.util.regex.Matcher;
038:        import java.util.regex.PatternSyntaxException;
039:
040:        import de.susebox.jtopas.TokenizerProperty;
041:        import de.susebox.jtopas.Flags;
042:        import de.susebox.jtopas.TokenizerException;
043:
044:        import de.susebox.jtopas.spi.PatternHandler;
045:        import de.susebox.jtopas.spi.DataProvider;
046:
047:        //-----------------------------------------------------------------------------
048:        // Class PatternMatcher
049:        //
050:
051:        /**<p>
052:         * Implementation of the {@link PatternHandler} interface using the JDK 1.4 
053:         * package <code>java.util.regex</code>.
054:         *</p>
055:         *
056:         * @author  Heiko Blau
057:         */
058:        public class PatternMatcher implements  PatternHandler {
059:
060:            //---------------------------------------------------------------------------
061:            // Constructors
062:            //
063:
064:            /**
065:             * The constructor takes a pattern and the {@link TokenizerProperty} object
066:             * associated with this instance of <code>PatternMatcher</code>. The global
067:             * flags are passed to control the behaviour for attributes that are not
068:             * specified in the property itself (e.g. case-sensitivity).
069:             *
070:             * @param   prop          the {@link TokenizerProperty} associated with this object
071:             * @param   globalFlags   flags that are to be used if not set explicitely in the property
072:             * @throws  NullPointerException if the given parameter is <code>null</code>
073:             */
074:            public PatternMatcher(TokenizerProperty prop, int globalFlags)
075:                    throws NullPointerException {
076:                _globalFlags = globalFlags;
077:                setProperty(prop);
078:            }
079:
080:            //---------------------------------------------------------------------------
081:            // Methods of the PatternHandler interface
082:            //
083:
084:            /**
085:             * The method is a dummy implementation for the interface {@link PatternHandler}
086:             * and always returns <code>true</code>.
087:             *
088:             * @return  always <code>true</code>
089:             */
090:            public boolean hasPattern() {
091:                return true;
092:            }
093:
094:            /**
095:             * This method checks if the start of a character range given through the 
096:             * {@link DataProvider} matches a pattern. See {@link PatternHandler#matches}
097:             * for details.
098:             *
099:             * @param   dataProvider    the source to get the data from
100:             * @param   freePatternOnly if <code>true</code> only unbounded pattern should be
101:             *                          checked (pattern not enclosed in whitespaces, separators etc.)
102:             * @return  a {@link PatternHandler.Result} object or <code>null</code> if no
103:             *          match was found
104:             * @throws  TokenizerException    generic exception
105:             * @throws  NullPointerException  if no {@link DataProvider} is given
106:             */
107:            public PatternHandler.Result matches(DataProvider dataProvider)
108:                    throws TokenizerException, NullPointerException {
109:                // invoke JDK 1.4 or jakarta regexp API
110:                try {
111:                    String[] groups;
112:
113:                    _matcher.reset(new DataProviderCharSequence(dataProvider));
114:                    if (_matcher.lookingAt()) {
115:                        if (_property
116:                                .isFlagSet(
117:                                        Flags.F_RETURN_IMAGE_PARTS,
118:                                        (_globalFlags & Flags.F_RETURN_IMAGE_PARTS) != 0)) {
119:                            // get the capturing groups
120:                            groups = new String[_matcher.groupCount() + 1];
121:                            for (int index = 0; index < groups.length; ++index) {
122:                                groups[index] = _matcher.group(index);
123:                            }
124:                        } else {
125:                            groups = new String[] {};
126:                        }
127:                        return new LocalResult(_property, _matcher.end(),
128:                                groups);
129:                    } else {
130:                        return null;
131:                    }
132:                } catch (Exception ex) {
133:                    throw new TokenizerException(ex);
134:                }
135:            }
136:
137:            //---------------------------------------------------------------------------
138:            // Methods
139:            //
140:
141:            /**
142:             * Setting the {@link TokenizerProperty} for this <code>PatternMatcher</code>.
143:             * This method will recompile the regular expression pattern. 
144:             *
145:             * @param   prop    the {@link TokenizerProperty} associated with this object
146:             * @throws  NullPointerException if the given parameter is <code>null</code>
147:             */
148:            public void setProperty(TokenizerProperty prop)
149:                    throws NullPointerException {
150:                // no pattern given
151:                if (prop == null) {
152:                    throw new NullPointerException("No property given.");
153:                } else if (prop.getImages() == null
154:                        || prop.getImages().length < 1
155:                        || prop.getImages()[0] == null) {
156:                    throw new NullPointerException(
157:                            "Property contains no pattern image.");
158:                }
159:
160:                // compile the pattern
161:                int flags = Pattern.MULTILINE | Pattern.DOTALL;
162:
163:                if (prop.isFlagSet(Flags.F_NO_CASE,
164:                        (_globalFlags & Flags.F_NO_CASE) != 0)) {
165:                    flags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
166:                }
167:                _matcher = Pattern.compile(prop.getImages()[0], flags).matcher(
168:                        "");
169:
170:                // set property
171:                _property = prop;
172:            }
173:
174:            /**
175:             * Retrieving the {@link TokenizerProperty} of this <code>PatternMatcher</code>.
176:             *
177:             * @return  the {@link TokenizerProperty} associated with this object
178:             */
179:            public TokenizerProperty getProperty() {
180:                return _property;
181:            }
182:
183:            //---------------------------------------------------------------------------
184:            // Inner Classes
185:            //
186:
187:            /**
188:             * The result of a match operation.
189:             */
190:            private final class LocalResult implements  PatternHandler.Result {
191:
192:                /**
193:                 * The constructor gets all the nessecary parameters.
194:                 *
195:                 * @param prop          the pattern property
196:                 * @param lengthOfMatch the detected number of characters that match the pattern
197:                 * @param groups        array with the capturing groups
198:                 */
199:                protected LocalResult(TokenizerProperty prop,
200:                        int lengthOfMatch, String[] groups) {
201:                    _property = prop;
202:                    _lengthOfMatch = lengthOfMatch;
203:                    _groups = groups;
204:                }
205:
206:                /**
207:                 * Returns the capturing groups of a match. 
208:                 *
209:                 * @return  the capturing groups of the last pattern match in {@link #matches}.
210:                 */
211:                public String[] getGroups() throws TokenizerException {
212:                    return _groups;
213:                }
214:
215:                /**
216:                 * Returns the number of characters that are part of a match.
217:                 *
218:                 * @return length of match
219:                 */
220:                public int getLengthOfMatch() {
221:                    return _lengthOfMatch;
222:                }
223:
224:                /**
225:                 * Returns the {@link TokenizerProperty} that describes the pattern that 
226:                 * matches data passed to {@link PatternHandler#matches}.
227:                 *
228:                 * @return the pattern property of a successful match
229:                 */
230:                public TokenizerProperty getProperty() {
231:                    return _property;
232:                }
233:
234:                // member
235:                private TokenizerProperty _property;
236:                private int _lengthOfMatch;
237:                private String[] _groups;
238:            }
239:
240:            /**
241:             * An implementation of the JDK 1.4 {@link java.lang.CharSequence} interface
242:             * backed by a {@link DataProvider}.
243:             */
244:            private final class DataProviderCharSequence implements 
245:                    CharSequence {
246:
247:                /**
248:                 * The constructor takes the reference to the {@link DataProvider}.
249:                 *
250:                 * @param dataProvider  the backing <code>DataProvider</code>
251:                 */
252:                public DataProviderCharSequence(DataProvider dataProvider) {
253:                    this (dataProvider, dataProvider.getStartPosition(),
254:                            dataProvider.getLength());
255:                }
256:
257:                /**
258:                 * The constructor takes the reference to the {@link DataProvider}, the
259:                 * start position and length. It is nessecary for the {@link #subSequence}
260:                 * method
261:                 *
262:                 * @param dataProvider  the backing <code>DataProvider</code>
263:                 */
264:                private DataProviderCharSequence(DataProvider dataProvider,
265:                        int start, int length) {
266:                    _dataProvider = dataProvider;
267:                    _start = start;
268:                    _length = length;
269:                }
270:
271:                /** 
272:                 * Returns the character at the specified index.  An index ranges from zero
273:                 * to <code>length() - 1</code>.  The first character of the sequence is at
274:                 * index zero, the next at index one, and so on, as for array
275:                 * indexing. </p>
276:                 *
277:                 * @param   index   the index of the character to be returned
278:                 * @return  the specified character
279:                 * @throws  ArrayIndexOutOfBoundsException
280:                 *          if the <code>index</code> argument is negative or not less than
281:                 *          <code>length()</code>
282:                 */
283:                public char charAt(int index)
284:                        throws ArrayIndexOutOfBoundsException {
285:                    return _dataProvider.getCharAt(_start + index
286:                            - _dataProvider.getStartPosition());
287:                }
288:
289:                /** Returns the length of this character sequence.  The length is the number
290:                 * of 16-bit Unicode characters in the sequence. </p>
291:                 *
292:                 * @return  the number of characters in this sequence
293:                 *
294:                 */
295:                public int length() {
296:                    return _length;
297:                }
298:
299:                /** 
300:                 * Returns a new character sequence that is a subsequence of this sequence.
301:                 * See {@link java.lang.CharSequence#subSequence} for details.
302:                 *
303:                 * @param   start   the start index, inclusive
304:                 * @param   end     the end index, exclusive
305:                 * @return  the specified subsequence
306:                 * @throws  IndexOutOfBoundsException
307:                 *          if <code>start</code> or <code>end</code> are negative,
308:                 *          if <code>end</code> is greater than <code>length()</code>,
309:                 *          or if <code>start</code> is greater than <code>end</code>
310:                 */
311:                public CharSequence subSequence(int start, int end) {
312:                    if (start < 0 || end < 0 || end > length() || start > end) {
313:                        throw new IndexOutOfBoundsException();
314:                    }
315:                    return new DataProviderCharSequence(_dataProvider, _start
316:                            + start, end - start);
317:                }
318:
319:                /**
320:                 * Returns the string representation for the <code>DataProvider</code>.
321:                 *
322:                 * @return the string consisting of all available data in the DataProvider.
323:                 */
324:                public String toString() {
325:                    int realStart = _start - _dataProvider.getStartPosition();
326:
327:                    return _dataProvider.toString().substring(realStart,
328:                            realStart + _length);
329:                }
330:
331:                // members
332:                private DataProvider _dataProvider = null;
333:                private int _start = 0;
334:                private int _length = 0;
335:            }
336:
337:            //---------------------------------------------------------------------------
338:            // Members
339:            //
340:            private TokenizerProperty _property = null;
341:            private Matcher _matcher = null;
342:            private int _globalFlags = 0;
343:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.