001: package fri.patterns.interpreter.parsergenerator.builder;
002:
003: import java.util.List;
004: import fri.util.TimeStopper;
005: import fri.patterns.interpreter.parsergenerator.Lexer;
006: import fri.patterns.interpreter.parsergenerator.syntax.*;
007: import fri.patterns.interpreter.parsergenerator.syntax.builder.SyntaxSeparation;
008: import fri.patterns.interpreter.parsergenerator.lexer.*;
009:
010: /**
011: Buffering Lexers. SerializedLexer will build the Lexer from scratch
012: only the first time. Every following call will load the serialized Lexer
013: from filesystem.
014: <p>
015: The time to build a lexer from scratch is equal to deserializing it
016: in most cases. So a standalone lexer can be built without this class.
017: When needed for a Parser, use the SerializedParser factory!
018: <p>
019: This factory will separate the passed syntax into parser and lexer syntax if
020: token and ignored symbol Lists are null. So take care to use "token" and "ignored"
021: rules within syntax to achieve the desired result!
022: <p>
023: Example (syntax input from a file):
024: <pre>
025: File ebnfFile = ...;
026: Lexer lexer = new SerializedLexer().get(ebnfFile);
027: </pre>
028: or (syntax input from a Reader, must pass a filename):
029: <pre>
030: Reader ebnfReader = ...;
031: Lexer lexer = new SerializedLexer().get(ebnfReader, "MyLexer.ser");
032: </pre>
033:
034: @author (c) 2002, Fritz Ritzberger
035: */
036:
037: public class SerializedLexer extends SerializedObject {
038: private SyntaxSeparation separation;
039: protected boolean PRODUCTION; // setting this to false in constructor will prevent the Lexer from being serialized
040:
041: /** Create a Lexer factory that caches built Lexers. */
042: public SerializedLexer() {
043: this (true);
044: }
045:
046: /** Create a Lexer factory that caches built Lexers. @param production when false the Lexer will not be serialized. */
047: public SerializedLexer(boolean production) {
048: this .PRODUCTION = production;
049: }
050:
051: /**
052: Builds the Lexer from scratch if not found in filesystem, else loads the serialized Lexer.
053: @param syntaxInput the Lexer syntax as File, InputStream, List of Lists, String [][] or Syntax.
054: @return deserialized Lexer, or one built from scratch that gets written to filesystem.
055: */
056: public Lexer get(Object syntaxInput) throws Exception {
057: return get(syntaxInput, null);
058: }
059:
060: /**
061: Builds the Lexer from scratch if not found in filesystem, else loads the serialized Lexer.
062: @param syntaxInput the Lexer syntax as File, InputStream, List of Lists, String [][] or Syntax.
063: @param baseName name of serialization file, can be null when syntaxInput is a File
064: @return deserialized Lexer, or one built from scratch that gets written to filesystem.
065: */
066: public Lexer get(Object syntaxInput, String baseName)
067: throws Exception {
068: return get(syntaxInput, baseName, null, null);
069: }
070:
071: /**
072: Builds the Lexer from scratch if not found in filesystem, else loads the serialized Lexer.
073: @param syntaxInput the Lexer syntax as File, InputStream, List of Lists, String [][] or Syntax.
074: @param baseName name of serialization file, can be null when syntaxInput is a File
075: @param tokenSymbols the token symbols when used by a prebuilt Parser
076: @param ignoredSymbols the ignored symbols when used by a prebuilt Parser
077: @return deserialized Lexer, or one built from scratch that gets written to filesystem.
078: */
079: public Lexer get(Object syntaxInput, String baseName,
080: List tokenSymbols, List ignoredSymbols) throws Exception {
081: Lexer lexer = readLexer(syntaxInput, baseName);
082: if (lexer == null)
083: lexer = buildAndStoreLexer(syntaxInput, baseName,
084: tokenSymbols, ignoredSymbols);
085: return lexer;
086: }
087:
088: private String ensureFileName(Object syntaxInput, String baseName) {
089: if (baseName == null)
090: baseName = baseNameFromSyntax(syntaxInput);
091: return baseName + "Lexer.ser";
092: }
093:
094: /**
095: * Tries to read the lexer from a serialized file. One of the two arguments must be non-null.
096: * @param syntaxInput the lexer syntax input to retrieve a default name when it is a File
097: * @param baseName if baseName is "Xml", the file "XmlLexer.ser" will be read, can be null
098: */
099: public Lexer readLexer(Object syntaxInput, String baseName) {
100: if (PRODUCTION)
101: return (Lexer) read(ensureFileName(syntaxInput, baseName));
102: return null;
103: }
104:
105: /**
106: * Builds a lexer from passed syntax and stores it to a File (when PRODUCTION is true, this is default).
107: * @param syntaxInput the lexer syntax input
108: * @param baseName a file basename, if "Xml", the file "XmlLexer.ser" will be written
109: */
110: public Lexer buildAndStoreLexer(Object syntaxInput,
111: String baseName, List tokenSymbols, List ignoredSymbols)
112: throws Exception {
113: Syntax syntax = toSyntax(syntaxInput);
114:
115: if (tokenSymbols == null || ignoredSymbols == null) {
116: this .separation = newSyntaxSeparation(syntax);
117: syntax = separation.getLexerSyntax();
118:
119: if (tokenSymbols == null)
120: tokenSymbols = separation.getTokenSymbols();
121: if (ignoredSymbols == null)
122: ignoredSymbols = separation.getIgnoredSymbols();
123: }
124: // else: assume that syntaxInput is a prebuilt lexer Syntax and a list of ignored tokens
125:
126: TimeStopper ts = new TimeStopper();
127: LexerBuilder builder = newLexerBuilder(syntax, ignoredSymbols);
128: Lexer lexer = builder.getLexer();
129: lexer.setTerminals(tokenSymbols);
130: System.err.println("Lexer scratch construction took "
131: + ts.getTimeMillis() + " millis");
132:
133: if (PRODUCTION)
134: write(ensureFileName(syntaxInput, baseName), lexer);
135:
136: return lexer;
137: }
138:
139: /** To be overridden when a modified SyntaxSeparation is needed. */
140: protected SyntaxSeparation newSyntaxSeparation(Syntax syntax)
141: throws SyntaxException {
142: return new SyntaxSeparation(syntax);
143: }
144:
145: /** To be overridden when a modified LexerBuilder is needed. */
146: protected LexerBuilder newLexerBuilder(Syntax syntax,
147: List ignoredSymbols) throws LexerException, SyntaxException {
148: return new LexerBuilder(syntax, ignoredSymbols);
149: }
150:
151: /**
152: If the lexer was built from scratch, the SyntaxSeparation object returned
153: will not be null and can be used to retrieve the parser syntax, else
154: null is returned, as the separation is not available in serialized Lexer.
155: */
156: public SyntaxSeparation getSyntaxSeparation() {
157: return separation;
158: }
159:
160: /** Test main. Building serialized Lexer takes 330, building from scratch takes 130 millis. */
161: public static void main(String[] args) {
162: try {
163: TimeStopper ts = new TimeStopper();
164: Lexer lexer = new SerializedLexer().get(
165: StandardLexerRules.lexerSyntax, "SyntaxBuilder");
166: System.err.println("Lexer was built in "
167: + ts.getTimeMillis() + " millis");
168: } catch (Exception e) {
169: e.printStackTrace();
170: }
171: }
172:
173: }
|