001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.process.extended;
010:
011: import net.sourceforge.chaperon.common.Decoder;
012: import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
013:
014: import org.apache.commons.logging.Log;
015:
016: import org.xml.sax.Attributes;
017: import org.xml.sax.ContentHandler;
018: import org.xml.sax.Locator;
019: import org.xml.sax.SAXException;
020: import org.xml.sax.ext.LexicalHandler;
021: import org.xml.sax.helpers.AttributesImpl;
022: import org.xml.sax.helpers.LocatorImpl;
023:
024: /**
025: * This class represents a simulation of a pushdown automata using the parser automaton class.
026: *
027: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
028: * @version CVS $Id: ExtendedBacktrackingParserProcessor.java,v 1.1 2004/01/04 16:49:12 benedikta Exp $
029: */
030: public class ExtendedBacktrackingParserProcessor implements
031: ContentHandler, LexicalHandler {
032: /** Namespace and element names for the consumed SAX events. */
033: public static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0";
034: public static final String TEXT = "text";
035:
036: /** Namespace and element names for the generated SAX events. */
037: public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/2.0";
038: public static final String OUTPUT = "output";
039: public static final String ERROR = "error";
040:
041: /** Content handler and locator facilities */
042: private ContentHandler contentHandler = null;
043: private LexicalHandler lexicalHandler = null;
044: private Locator locator = null;
045: private LocatorImpl locatorImpl = null;
046:
047: /** State of consumed input */
048: private static final int STATE_OUTER = 0;
049: private static final int STATE_INNER = 1;
050: private int state = STATE_OUTER;
051:
052: /** Internals */
053: private ExtendedParserAutomaton automaton;
054: private ExtendedGrammar grammar;
055: private boolean flatten = false;
056: private StackNode stackNode = null;
057: private Log log;
058: private CharBuffer buffer = new CharBuffer();
059: private StackNode topmost = null;
060:
061: /**
062: * Create a new parser processor.
063: */
064: public ExtendedBacktrackingParserProcessor() {
065: }
066:
067: /**
068: * Create a new parser processor.
069: *
070: * @param automaton Parser automaton, which the processor should ues.
071: * @param handler Handler, which should receives the parser events.
072: * @param log Log, which should used.
073: */
074: public ExtendedBacktrackingParserProcessor(
075: ExtendedParserAutomaton automaton, Log log) {
076: this .automaton = automaton;
077: this .log = log;
078: }
079:
080: /**
081: * Set the parser automaton for the processor.
082: *
083: * @param automaton Parser automaton.
084: */
085: public void setExtendedParserAutomaton(
086: ExtendedParserAutomaton automaton) {
087: this .automaton = automaton;
088: this .grammar = automaton.getExtendedGrammar();
089: }
090:
091: /**
092: * Set the <code>ContentHandler</code> that will receive XML data.
093: */
094: public void setContentHandler(ContentHandler handler) {
095: this .contentHandler = handler;
096: }
097:
098: /**
099: * Set the <code>LexicalHandler</code> that will receive XML data.
100: */
101: public void setLexicalHandler(LexicalHandler handler) {
102: this .lexicalHandler = handler;
103: }
104:
105: /**
106: * Provide processor with a log.
107: *
108: * @param log The log.
109: */
110: public void setLog(Log log) {
111: this .log = log;
112: }
113:
114: /**
115: * If the adapter should produce a more flatten XML hirachy, which means elements which the same
116: * name will be collapsed
117: *
118: * @param flatten True, if a more flatten hirachy should be produced.
119: */
120: public void setFlatten(boolean flatten) {
121: this .flatten = flatten;
122: }
123:
124: /**
125: * Receive an object for locating the origin of SAX document events.
126: */
127: public void setDocumentLocator(Locator locator) {
128: this .locator = locator;
129:
130: if (locator != null) {
131: this .locatorImpl = new LocatorImpl(locator);
132: contentHandler.setDocumentLocator(locatorImpl);
133: }
134: }
135:
136: /**
137: * Receive notification of the beginning of a document.
138: */
139: public void startDocument() throws SAXException {
140: locatorImpl.setLineNumber(locator.getLineNumber());
141: locatorImpl.setColumnNumber(locator.getColumnNumber());
142: contentHandler.startDocument();
143: state = STATE_OUTER;
144: }
145:
146: /**
147: * Receive notification of the beginning of an element.
148: */
149: public void startElement(String namespaceURI, String localName,
150: String qName, Attributes atts) throws SAXException {
151: locatorImpl.setLineNumber(locator.getLineNumber());
152: locatorImpl.setColumnNumber(locator.getColumnNumber());
153:
154: if (state == STATE_INNER)
155: throw new SAXException("Unexpected element " + qName);
156:
157: if (state == STATE_OUTER) {
158: if ((namespaceURI != null) && (namespaceURI.equals(NS))) {
159: if (!localName.equals(TEXT))
160: throw new SAXException("Unknown element " + qName);
161: } else {
162: contentHandler.startElement(namespaceURI, localName,
163: qName, atts);
164: return;
165: }
166: }
167:
168: state = STATE_INNER;
169:
170: // ======================= Start Text Document =======================
171: buffer.clear();
172: stackNode = new TerminalStackNode('\u0000', automaton.first,
173: null);
174: topmost = stackNode;
175: }
176:
177: /**
178: * Receive notification of character data.
179: */
180: public void characters(char[] text, int textstart, int textlength)
181: throws SAXException {
182: locatorImpl.setLineNumber(locator.getLineNumber());
183: locatorImpl.setColumnNumber(locator.getColumnNumber());
184:
185: if (state == STATE_OUTER) {
186: contentHandler.characters(text, textstart, textlength);
187:
188: return;
189: }
190:
191: if ((log != null) && (log.isDebugEnabled()))
192: log.debug("getting text "
193: + Decoder.toString(new String(text, textstart,
194: textlength)));
195:
196: buffer.push(text, textstart, textlength);
197: }
198:
199: /**
200: * Receive notification of the end of an element.
201: */
202: public void endElement(String namespaceURI, String localName,
203: String qName) throws SAXException {
204: locatorImpl.setLineNumber(locator.getLineNumber());
205: locatorImpl.setColumnNumber(locator.getColumnNumber());
206:
207: if (state == STATE_OUTER)
208: contentHandler.endElement(namespaceURI, localName, qName);
209:
210: if (state == STATE_INNER) {
211: if ((namespaceURI != null) && (namespaceURI.equals(NS))) {
212: if (!localName.equals(TEXT))
213: throw new SAXException("Unknown element " + qName);
214: } else
215: throw new SAXException("Unexpected element " + qName);
216: }
217:
218: // ======================= End Text Document =======================
219: while (stackNode != null) {
220: if ((log != null) && (log.isDebugEnabled())
221: && (buffer.available()))
222: log.debug("process " + Decoder.toChar(buffer.peek()));
223:
224: //log.debug("State "+automaton.indexOf(stackNode.state)+" "+stackNode.toCanonicalString(automaton));
225:
226: /* ============================ Reduce =================================== */
227: ShiftAction shiftAction = null;
228: ReduceAction[] reduceActions;
229:
230: if (buffer.available()) {
231: outer: while (((shiftAction = stackNode.state
232: .getShiftAction(buffer.peek())) == null)
233: && ((reduceActions = stackNode.state
234: .getReduceActions()).length > 0)) {
235: reduceActions = stackNode.state.getReduceActions();
236:
237: for (int index = 0; index <= reduceActions.length; index++) {
238: if (index == reduceActions.length)
239: break outer;
240:
241: ReduceAction reduceAction = reduceActions[index];
242:
243: StackNode second = (reduceAction.length == 2) ? stackNode
244: : null;
245: StackNode first = (reduceAction.length == 2) ? second.ancestor
246: : null;
247: StackNode previousStackNode = (reduceAction.length == 2) ? first.ancestor
248: : stackNode;
249:
250: GotoAction gotoAction = (reduceAction.symbol != null) ? previousStackNode.state
251: .getGotoAction(reduceAction.symbol)
252: : previousStackNode.state
253: .getGotoAction(reduceAction.pattern);
254:
255: if (gotoAction != null) {
256: if ((log != null) && (log.isDebugEnabled()))
257: log
258: .debug("State "
259: + automaton
260: .indexOf(stackNode.state)
261: + " " + reduceAction);
262:
263: stackNode = new DefinitionStackNode(
264: reduceAction, index, first, second,
265: gotoAction.state, previousStackNode);
266:
267: topmost = topmost.getTopMost(stackNode);
268:
269: break;
270: }
271: }
272: }
273: } else {
274: while ((reduceActions = stackNode.state
275: .getReduceActions()).length > 0) {
276: ReduceAction reduceAction = reduceActions[0];
277:
278: StackNode second = (reduceAction.length == 2) ? stackNode
279: : null;
280: StackNode first = (reduceAction.length == 2) ? second.ancestor
281: : null;
282: StackNode previousStackNode = (reduceAction.length == 2) ? first.ancestor
283: : stackNode;
284:
285: GotoAction gotoAction = (reduceAction.symbol != null) ? previousStackNode.state
286: .getGotoAction(reduceAction.symbol)
287: : previousStackNode.state
288: .getGotoAction(reduceAction.pattern);
289:
290: if ((automaton.first == previousStackNode.state)
291: && (grammar.getStartSymbol()
292: .equals(reduceAction.symbol))) {
293: if ((log != null) && (log.isDebugEnabled()))
294: log.debug("State "
295: + automaton
296: .indexOf(stackNode.state)
297: + " accept");
298:
299: stackNode = new DefinitionStackNode(
300: reduceAction, 0, first, second, null,
301: previousStackNode);
302: fireEvents();
303:
304: state = STATE_OUTER;
305:
306: return;
307: } else {
308: if ((log != null) && (log.isDebugEnabled()))
309: log.debug("State "
310: + automaton
311: .indexOf(stackNode.state)
312: + " " + reduceAction);
313:
314: stackNode = new DefinitionStackNode(
315: reduceAction, 0, first, second,
316: gotoAction.state, previousStackNode);
317: }
318: }
319: }
320:
321: /* ==================================== Shift =================================== */
322: if (shiftAction != null) {
323: if ((log != null) && (log.isDebugEnabled()))
324: log.debug("State "
325: + automaton.indexOf(stackNode.state) + " "
326: + shiftAction);
327:
328: stackNode = new TerminalStackNode(buffer.read(),
329: shiftAction.state, stackNode);
330:
331: topmost = topmost.getTopMost(stackNode);
332: } else {
333: if ((log != null) && (log.isDebugEnabled()))
334: if (buffer.available())
335: log.debug("State "
336: + automaton.indexOf(stackNode.state)
337: + " error "
338: + Decoder.toChar(buffer.peek()));
339: else
340: log.debug("State "
341: + automaton.indexOf(stackNode.state)
342: + " error EOF");
343:
344: //if (buffer.available())
345: // buffer.back(); // push character back into buffer
346: backtrack();
347: }
348: }
349:
350: if (buffer.available())
351: throw new IllegalArgumentException("Character "
352: + Decoder.toChar(buffer.peek())
353: + " is not expected");
354: else
355: throw new IllegalArgumentException(
356: "Eon of file is not expected");
357: }
358:
359: private void backtrack() throws SAXException {
360: while (automaton.first != stackNode.state) {
361: if ((log != null) && (log.isDebugEnabled()))
362: log.debug("State " + automaton.indexOf(stackNode.state)
363: + " backtracking");
364:
365: if (stackNode instanceof DefinitionStackNode) {
366: DefinitionStackNode definitionStackNode = (DefinitionStackNode) stackNode;
367: stackNode = (definitionStackNode.action.length == 0) ? stackNode.ancestor
368: : definitionStackNode.second;
369:
370: ReduceAction[] reduceActions = stackNode.state
371: .getReduceActions();
372:
373: if (reduceActions.length > (definitionStackNode.index + 1)) {
374: // another reduction is possible
375: ReduceAction reduceAction = reduceActions[definitionStackNode.index + 1];
376:
377: StackNode second = (reduceAction.length == 2) ? stackNode
378: : null;
379: StackNode first = (reduceAction.length == 2) ? second.ancestor
380: : null;
381: StackNode previousStackNode = (reduceAction.length == 2) ? first.ancestor
382: : stackNode;
383:
384: GotoAction gotoAction = (reduceAction.symbol != null) ? previousStackNode.state
385: .getGotoAction(reduceAction.symbol)
386: : previousStackNode.state
387: .getGotoAction(reduceAction.pattern);
388:
389: if ((log != null) && (log.isDebugEnabled()))
390: log.debug("State "
391: + automaton.indexOf(stackNode.state)
392: + " " + reduceAction);
393:
394: stackNode = new DefinitionStackNode(reduceAction,
395: definitionStackNode.index + 1, first,
396: second, gotoAction.state, previousStackNode);
397:
398: // reparse text
399: return;
400: }
401:
402: // else no other action is possible, going deeper
403: } else {
404: TerminalStackNode terminalStackNode = (TerminalStackNode) stackNode;
405: stackNode = stackNode.ancestor;
406:
407: ReduceAction[] reduceActions = stackNode.state
408: .getReduceActions();
409:
410: buffer.back();
411:
412: if (reduceActions.length > 0) {
413: // reduction is possible instead of shift action
414: ReduceAction reduceAction = reduceActions[0];
415:
416: StackNode second = (reduceAction.length == 2) ? stackNode
417: : null;
418: StackNode first = (reduceAction.length == 2) ? second.ancestor
419: : null;
420: StackNode previousStackNode = (reduceAction.length == 2) ? first.ancestor
421: : stackNode;
422:
423: GotoAction gotoAction = (reduceAction.symbol != null) ? previousStackNode.state
424: .getGotoAction(reduceAction.symbol)
425: : previousStackNode.state
426: .getGotoAction(reduceAction.pattern);
427:
428: if ((log != null) && (log.isDebugEnabled()))
429: log.debug("State "
430: + automaton.indexOf(stackNode.state)
431: + " " + reduceAction);
432:
433: stackNode = new DefinitionStackNode(reduceAction,
434: 0, first, second, gotoAction.state,
435: previousStackNode);
436:
437: return;
438: }
439:
440: // else no other action is possible, going deeper
441: }
442: }
443:
444: throw new SAXException("Could not recognize text at ["
445: + topmost.lineNumber + ":" + topmost.columnNumber + "]");
446: }
447:
448: private void fireEvents() throws SAXException {
449: contentHandler.startPrefixMapping("", NS_OUTPUT);
450: contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT,
451: new AttributesImpl());
452:
453: stackNode.toXML(contentHandler);
454:
455: contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
456: contentHandler.endPrefixMapping("");
457: }
458:
459: /**
460: * Receive notification of ignorable whitespace in element content.
461: */
462: public void ignorableWhitespace(char[] ch, int start, int length)
463: throws SAXException {
464: locatorImpl.setLineNumber(locator.getLineNumber());
465: locatorImpl.setColumnNumber(locator.getColumnNumber());
466:
467: if (state == STATE_OUTER)
468: contentHandler.ignorableWhitespace(ch, start, length);
469: }
470:
471: /**
472: * Begin the scope of a prefix-URI Namespace mapping.
473: */
474: public void startPrefixMapping(String prefix, String uri)
475: throws SAXException {
476: locatorImpl.setLineNumber(locator.getLineNumber());
477: locatorImpl.setColumnNumber(locator.getColumnNumber());
478:
479: contentHandler.startPrefixMapping(prefix, uri);
480: }
481:
482: /**
483: * End the scope of a prefix-URI mapping.
484: */
485: public void endPrefixMapping(String prefix) throws SAXException {
486: locatorImpl.setLineNumber(locator.getLineNumber());
487: locatorImpl.setColumnNumber(locator.getColumnNumber());
488:
489: contentHandler.endPrefixMapping(prefix);
490: }
491:
492: /**
493: * Receive notification of a processing instruction.
494: */
495: public void processingInstruction(String target, String data)
496: throws SAXException {
497: locatorImpl.setLineNumber(locator.getLineNumber());
498: locatorImpl.setColumnNumber(locator.getColumnNumber());
499:
500: if (state == STATE_OUTER)
501: contentHandler.processingInstruction(target, data);
502: }
503:
504: /**
505: * Receive notification of a skipped entity.
506: */
507: public void skippedEntity(String name) throws SAXException {
508: locatorImpl.setLineNumber(locator.getLineNumber());
509: locatorImpl.setColumnNumber(locator.getColumnNumber());
510:
511: if (state == STATE_OUTER)
512: contentHandler.skippedEntity(name);
513: }
514:
515: /**
516: * Receive notification of the end of a document.
517: */
518: public void endDocument() throws SAXException {
519: locatorImpl.setLineNumber(locator.getLineNumber());
520: locatorImpl.setColumnNumber(locator.getColumnNumber());
521:
522: contentHandler.endDocument();
523: }
524:
525: /**
526: * Report the start of DTD declarations, if any.
527: */
528: public void startDTD(String name, String publicId, String systemId)
529: throws SAXException {
530: if (lexicalHandler != null)
531: lexicalHandler.startDTD(name, publicId, systemId);
532: }
533:
534: /**
535: * Report the end of DTD declarations.
536: */
537: public void endDTD() throws SAXException {
538: if (lexicalHandler != null)
539: lexicalHandler.endDTD();
540: }
541:
542: /**
543: * Report the beginning of an entity.
544: */
545: public void startEntity(String name) throws SAXException {
546: if (lexicalHandler != null)
547: lexicalHandler.startEntity(name);
548: }
549:
550: /**
551: * Report the end of an entity.
552: */
553: public void endEntity(String name) throws SAXException {
554: if (lexicalHandler != null)
555: lexicalHandler.endEntity(name);
556: }
557:
558: /**
559: * Report the start of a CDATA section.
560: */
561: public void startCDATA() throws SAXException {
562: if (lexicalHandler != null)
563: lexicalHandler.startCDATA();
564: }
565:
566: /**
567: * Report the end of a CDATA section.
568: */
569: public void endCDATA() throws SAXException {
570: if (lexicalHandler != null)
571: lexicalHandler.endCDATA();
572: }
573:
574: /**
575: * Report an XML comment anywhere in the document.
576: */
577: public void comment(char[] ch, int start, int len)
578: throws SAXException {
579: if (lexicalHandler != null)
580: lexicalHandler.comment(ch, start, len);
581: }
582: }
|