01: /*
02: * Lexer.java February 2006
03: *
04: * Copyright (C) 2006, Niall Gallagher <niallg@users.sf.net>
05: *
06: * This library is free software; you can redistribute it and/or
07: * modify it under the terms of the GNU Lesser General Public
08: * License as published by the Free Software Foundation.
09: *
10: * This library is distributed in the hope that it will be useful,
11: * but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13: * GNU Lesser General Public License for more details.
14: *
15: * You should have received a copy of the GNU Lesser General
16: * Public License along with this library; if not, write to the
17: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18: * Boston, MA 02111-1307 USA
19: */
20:
21: package simple.page.translate;
22:
23: /**
24: * The <code>Lexer</code> interface represents an object that can be
25: * initialized with token matching patterns. This is used so that
26: * a parser can be handed an arbitrary lexer implementation and be
27: * able to tell that lexer the token types it would like to receive.
28: * Typically this will be initialized with tokens such as those
29: * used by PHP, JSP, and Ruby on Rails. For example, tokens such as
30: * the JSP <code><%</code> and <code>%></code> could be used.
31: * Such tokens would ensure the lexer emits JSP directives.
32: *
33: * @author Niall Gallagher
34: *
35: * @see simple.page.translate.Tokenizer
36: */
37: interface Lexer {
38:
39: /**
40: * This method tells the lexer how to extract the tokens
41: * from the source document. This is given the opening and
42: * closing tokens used to identify a segment. Typically
43: * with languages such as JSP and PHP code segments are
44: * opened with a delimeter like <code><%</code> for JSP
45: * and <code><?php</code> for PHP. This method allows
46: * the lexer to be configured to process such delimeters.
47: *
48: * @param start this is the opening token for a segment
49: * @param finish this is the closing token for a segment
50: */
51: public void match(String start, String finish);
52:
53: /**
54: * This method tells the lexer how to extract the tokens
55: * from the source document. This is given the opening and
56: * closing tokens used to identify a segment. Typically
57: * with languages such as JSP and PHP code segments are
58: * opened with a delimeter like <code><%</code> for JSP
59: * and <code><?php</code> for PHP. This method allows
60: * the lexer to be configured to process such delimeters.
61: * <p>
62: * With this <code>match</code> method a collection of
63: * special characters can be specified. These characters
64: * tell the lexer what it should allow whitespace to
65: * surround for example take the HTML expressions below.
66: * <pre>
67: *
68: * < script language ='groovy' >
69: * <script language='groovy'>
70: *
71: * </pre>
72: * The above two HTML expressions should be considered
73: * equals using the special characters <code><</code>,
74: * <code>></code>, and <code>=</code>.
75: *
76: * @param start this is the opening token for a segment
77: * @param finish this is the closing token for a segment
78: */
79: public void match(String start, String finish, String special);
80: }
|