001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.common;
010:
011: import net.sourceforge.chaperon.build.LexicalAutomatonBuilder;
012: import net.sourceforge.chaperon.build.ParserAutomatonBuilder;
013: import net.sourceforge.chaperon.model.grammar.Grammar;
014: import net.sourceforge.chaperon.model.grammar.GrammarFactory;
015: import net.sourceforge.chaperon.model.lexicon.Lexicon;
016: import net.sourceforge.chaperon.model.lexicon.LexiconFactory;
017: import net.sourceforge.chaperon.process.LexicalAutomaton;
018: import net.sourceforge.chaperon.process.LexicalProcessor;
019: import net.sourceforge.chaperon.process.ParserAutomaton;
020: import net.sourceforge.chaperon.process.ParserProcessor;
021:
022: import org.apache.commons.logging.Log;
023: import org.apache.commons.logging.impl.SimpleLog;
024:
025: import org.xml.sax.XMLReader;
026: import org.xml.sax.helpers.AttributesImpl;
027: import org.xml.sax.helpers.LocatorImpl;
028:
029: import java.io.File;
030: import java.io.FileInputStream;
031: import java.io.InputStreamReader;
032: import java.io.LineNumberReader;
033:
034: import javax.xml.parsers.SAXParserFactory;
035: import javax.xml.transform.sax.SAXTransformerFactory;
036: import javax.xml.transform.sax.TransformerHandler;
037: import javax.xml.transform.stream.StreamResult;
038:
039: /**
040: * Simple example for the using of the Chaperon parser.
041: *
042: * @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
043: * @version CVS $Id: SimpleParser.java,v 1.7 2003/12/09 19:55:52 benedikta Exp $
044: */
045: public class SimpleParser {
046: public static void process(File lexiconFile, File grammarFile,
047: File inFile, File outFile) throws Exception {
048: // Create log
049: Log log = new SimpleLog("log");
050:
051: // Create factory for SAX parser
052: SAXParserFactory parserFactoryImpl = SAXParserFactory
053: .newInstance();
054: parserFactoryImpl.setNamespaceAware(true);
055:
056: // Get a SAX parser
057: XMLReader xmlparser = parserFactoryImpl.newSAXParser()
058: .getXMLReader();
059:
060: // Create a lexicon model for a given lexicon file
061: LexiconFactory lexiconfactory = new LexiconFactory();
062: xmlparser.setContentHandler(lexiconfactory);
063: xmlparser.parse(lexiconFile.toString());
064:
065: Lexicon lexicon = lexiconfactory.getLexicon();
066:
067: // Build a automaton from the lexicon model
068: LexicalAutomaton lexicalautomaton = (new LexicalAutomatonBuilder(
069: lexicon, log)).getLexicalAutomaton();
070:
071: // Create a processor for the lexicon
072: LexicalProcessor lexer = new LexicalProcessor();
073: lexer.setLog(log);
074: lexer.setLexicalAutomaton(lexicalautomaton);
075:
076: // Get a SAX parser
077: xmlparser = parserFactoryImpl.newSAXParser().getXMLReader();
078:
079: // Create a grammar model for a given grammar file
080: GrammarFactory grammarfactory = new GrammarFactory();
081: xmlparser.setContentHandler(grammarfactory);
082: xmlparser.parse(grammarFile.toString());
083:
084: Grammar grammar = grammarfactory.getGrammar();
085:
086: // Build a automaton from the grammar model
087: ParserAutomaton parserautomaton = (new ParserAutomatonBuilder(
088: grammar, log)).getParserAutomaton();
089:
090: // Create a processor for the grammar
091: ParserProcessor parser = new ParserProcessor();
092: parser.setLog(log);
093: parser.setParserAutomaton(parserautomaton);
094:
095: // Create factory for SAX transformer
096: SAXTransformerFactory transformerFactoryImpl = (SAXTransformerFactory) SAXTransformerFactory
097: .newInstance();
098:
099: // Create serializer to write the SAX stream into a file
100: TransformerHandler serializer = transformerFactoryImpl
101: .newTransformerHandler();
102: serializer.setResult(new StreamResult(outFile));
103:
104: // Connect components into a pipeline
105: lexer.setContentHandler(parser);
106: parser.setContentHandler(serializer);
107:
108: // Push text into this pipeline
109: // Create locator, which help to find possible syntax errors
110: LocatorImpl locator = new LocatorImpl();
111: locator.setSystemId(inFile.toURL().toString());
112: locator.setLineNumber(1);
113: locator.setColumnNumber(1);
114: lexer.setDocumentLocator(locator);
115:
116: // Start document
117: lexer.startDocument();
118:
119: // Start 'text' element, which the parser dispatch
120: lexer.startElement(
121: "http://chaperon.sourceforge.net/schema/text/1.0",
122: "text", "text", new AttributesImpl());
123:
124: LineNumberReader reader = new LineNumberReader(
125: new InputStreamReader(new FileInputStream(inFile)));
126:
127: String line;
128: String newline = null;
129: String separator = System.getProperty("line.separator");
130:
131: // Push text
132: while (true) {
133: if (newline == null)
134: line = reader.readLine();
135: else
136: line = newline;
137:
138: if (line == null)
139: break;
140:
141: newline = reader.readLine();
142:
143: line = (newline != null) ? (line + separator) : line;
144:
145: locator.setLineNumber(reader.getLineNumber());
146: locator.setColumnNumber(1);
147: lexer.characters(line.toCharArray(), 0, line.length());
148:
149: if (newline == null)
150: break;
151: }
152:
153: reader.close();
154:
155: // End 'text' element
156: lexer.endElement(
157: "http://chaperon.sourceforge.net/schema/text/1.0",
158: "text", "text");
159:
160: // End document
161: lexer.endDocument();
162: }
163:
164: public static void main(String[] args) {
165: File lexiconFile = new File(args[0]);
166: File grammarFile = new File(args[1]);
167: File inFile = new File(args[2]);
168: File outFile = new File(args[3]);
169:
170: try {
171: process(lexiconFile, grammarFile, inFile, outFile);
172: } catch (Exception e) {
173: e.printStackTrace();
174: }
175: }
176: }
|