001: package fri.patterns.interpreter.parsergenerator.examples;
002:
003: import java.util.*;
004: import java.io.*;
005: import fri.util.TimeStopper;
006: import fri.util.io.UnicodeReader;
007: import fri.patterns.interpreter.parsergenerator.builder.SerializedLexer;
008: import fri.patterns.interpreter.parsergenerator.lexer.LexerImpl;
009: import fri.patterns.interpreter.parsergenerator.lexer.LexerSemantic;
010: import fri.patterns.interpreter.parsergenerator.lexer.ResultTree;
011: import fri.patterns.interpreter.parsergenerator.syntax.Rule;
012:
013: /**
014: Example XML lexer. Not event-driven like SAX, but good for DOM-building.
015:
016: @author Fritz Ritzberger, 2003
017: */
018:
019: public class XmlLexer {
020: public static void main(String[] args) throws Exception {
021: if (args.length <= 0) {
022: System.err.println("SYNTAX: java "
023: + XmlLexer.class.getName()
024: + " file.xml [file.xml ...]");
025: System.err.println(" Example XML Parser");
026: System.exit(1);
027: }
028:
029: // Standalone lexer as top-down parser.
030: TimeStopper timer = new TimeStopper();
031: // Building lexer from scratch takes 840 millis. Parsing takes 60 millis for a 70 line XML file.
032:
033: // read the syntax from EBNF file
034: Reader syntaxInput = new InputStreamReader(XmlLexer.class
035: .getResourceAsStream("Xml.syntax"));
036: boolean PRODUCTION = false; // always build from scratch at development time
037: LexerImpl lexer = (LexerImpl) new SerializedLexer(PRODUCTION)
038: .get(syntaxInput, "Xml");
039:
040: System.err.println("time to build XML file parser was "
041: + timer.getInterval());
042:
043: for (int i = 0; i < args.length; i++) {
044: String parseFile = args[i];
045: Reader parseInput = new UnicodeReader(new FileInputStream(
046: parseFile));
047: lexer.setInput(parseInput);
048:
049: System.err.println("======================== Parsing: "
050: + parseFile + " ========================");
051: boolean result = lexer.lex(new PrintXmlLexerSemantic());
052: System.err
053: .println("========================================================");
054:
055: System.err.println("Lexing took " + timer.getInterval()
056: + " millis.");
057: System.err.println("Result was: " + result);
058: }
059: }
060:
061: static class PrintXmlLexerSemantic implements LexerSemantic {
062: /**
063: * Receives evaluated lexer ruels and their result.
064: */
065: public void ruleEvaluated(Rule rule, ResultTree resultTree) {
066: System.out.println("Nonterminal=" + rule.getNonterminal()
067: + ", range(" + resultTree.getRange()
068: + "), Input=\"" + resultTree.toString() + "\"");
069: }
070:
071: /**
072: * Returns a Set of nonterminal Strings whose rule evaluations the Lexer should
073: * report to this semantic. Could return null to receive all rules.
074: * For XML only a subset of all tokens in the EBNF is needed. There is no other
075: * way than to hardcode those nonterminal names here. When using the SourceGenerator
076: * on the XML EBNF, the Strings could be imported from generated source to be consistent.
077: */
078: public Set getWantedNonterminals() {
079: Set considered = new HashSet();
080: considered.add("Name");
081: considered.add("Nmtoken");
082: considered.add("EntityValue");
083: considered.add("AttValue");
084: considered.add("SystemLiteral");
085: considered.add("PubidLiteral");
086: considered.add("CharData");
087: considered.add("Comment");
088: considered.add("VersionNum");
089: considered.add("PITargetContent");
090: considered.add("PITarget");
091: considered.add("CData");
092: considered.add("doctypedecl");
093: considered.add("SDDecl");
094: considered.add("STag");
095: considered.add("Attribute");
096: considered.add("ETag");
097: considered.add("EmptyElemTag");
098: considered.add("elementdecl");
099: considered.add("contentspec");
100: considered.add("cp");
101: considered.add("ChoiceList");
102: considered.add("SeqListOpt");
103: considered.add("Mixed");
104: considered.add("AttDef");
105: considered.add("StringType");
106: considered.add("TokenizedType");
107: considered.add("NotationType");
108: considered.add("Enumeration");
109: considered.add("DefaultDecl");
110: considered.add("CharRef");
111: considered.add("EntityRef");
112: considered.add("PEReference");
113: considered.add("GEDecl");
114: considered.add("PEDecl");
115: considered.add("EntityDef");
116: considered.add("PEDef");
117: considered.add("ExternalID");
118: considered.add("NDataDecl");
119: considered.add("EncName");
120: considered.add("NotationDecl");
121: return considered;
122: }
123:
124: public Set getIgnoredNonterminals() {
125: return null;
126: }
127:
128: }
129:
130: }
|