001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.process.extended;
010:
011: import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
012:
013: import org.apache.commons.logging.Log;
014:
015: import org.xml.sax.Attributes;
016: import org.xml.sax.ContentHandler;
017: import org.xml.sax.Locator;
018: import org.xml.sax.SAXException;
019: import org.xml.sax.ext.LexicalHandler;
020: import org.xml.sax.helpers.AttributesImpl;
021: import org.xml.sax.helpers.LocatorImpl;
022:
023: /**
024: * This class represents a simulation of a pushdown automata using the parser automaton class.
025: *
026: * @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
027: * @version CVS $Id: ExtendedGeneralParserProcessor.java,v 1.1 2004/01/04 16:49:12 benedikta Exp $
028: */
029: public class ExtendedGeneralParserProcessor implements ContentHandler,
030: LexicalHandler {
031: public static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0";
032: public static final String TEXT = "text";
033:
034: /** Namespace for the generated SAX events. */
035: public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/syntaxtree/2.0";
036: public static final String OUTPUT = "output";
037: public static final String ERROR = "error";
038: private ContentHandler contentHandler = null;
039: private LexicalHandler lexicalHandler = null;
040: private Locator locator = null;
041: private LocatorImpl locatorImpl = null;
042: private static final int STATE_OUTER = 0;
043: private static final int STATE_INNER = 1;
044: private int state = STATE_OUTER;
045: private ExtendedParserAutomaton automaton;
046: private ExtendedGrammar grammar;
047: private boolean flatten = false;
048: private StackNodeSet current = new StackNodeSet();
049: private StackNodeSet next = new StackNodeSet();
050: private Log log;
051: private int maxActiveStates = 50;
052:
053: /**
054: * Create a new parser processor.
055: */
056: public ExtendedGeneralParserProcessor() {
057: }
058:
059: /**
060: * Create a new parser processor.
061: *
062: * @param automaton Parser automaton, which the processor should ues.
063: * @param handler Handler, which should receives the parser events.
064: * @param log Log, which should used.
065: */
066: public ExtendedGeneralParserProcessor(
067: ExtendedParserAutomaton automaton, Log log) {
068: this .automaton = automaton;
069: this .log = log;
070: }
071:
072: /**
073: * Set the parser automaton for the processor.
074: *
075: * @param automaton Parser automaton.
076: */
077: public void setExtendedParserAutomaton(
078: ExtendedParserAutomaton automaton) {
079: this .automaton = automaton;
080: this .grammar = automaton.getExtendedGrammar();
081: current.setExtendedParserAutomaton(automaton);
082: next.setExtendedParserAutomaton(automaton);
083: }
084:
085: /**
086: * Set the <code>ContentHandler</code> that will receive XML data.
087: */
088: public void setContentHandler(ContentHandler handler) {
089: this .contentHandler = handler;
090: }
091:
092: /**
093: * Set the <code>LexicalHandler</code> that will receive XML data.
094: */
095: public void setLexicalHandler(LexicalHandler handler) {
096: this .lexicalHandler = handler;
097: }
098:
099: /**
100: * Provide processor with a log.
101: *
102: * @param log The log.
103: */
104: public void setLog(Log log) {
105: this .log = log;
106: }
107:
108: /**
109: * If the adapter should produce a more flatten XML hirachy, which means elements which the same
110: * name will be collapsed
111: *
112: * @param flatten True, if a more flatten hirachy should be produced.
113: */
114: public void setFlatten(boolean flatten) {
115: this .flatten = flatten;
116: }
117:
118: /**
119: * Receive an object for locating the origin of SAX document events.
120: */
121: public void setDocumentLocator(Locator locator) {
122: this .locator = locator;
123: if (locator != null) {
124: this .locatorImpl = new LocatorImpl(locator);
125: contentHandler.setDocumentLocator(locatorImpl);
126: }
127: }
128:
129: /**
130: * Receive notification of the beginning of a document.
131: */
132: public void startDocument() throws SAXException {
133: locatorImpl.setLineNumber(locator.getLineNumber());
134: locatorImpl.setColumnNumber(locator.getColumnNumber());
135: contentHandler.startDocument();
136: state = STATE_OUTER;
137: }
138:
139: /**
140: * Receive notification of the beginning of an element.
141: */
142: public void startElement(String namespaceURI, String localName,
143: String qName, Attributes atts) throws SAXException {
144: locatorImpl.setLineNumber(locator.getLineNumber());
145: locatorImpl.setColumnNumber(locator.getColumnNumber());
146:
147: if (state == STATE_INNER)
148: throw new SAXException("Unexpected element " + qName);
149:
150: if (state == STATE_OUTER) {
151: if ((namespaceURI != null) && (namespaceURI.equals(NS))) {
152: if (!localName.equals(TEXT))
153: throw new SAXException("Unknown element " + qName);
154: } else {
155: contentHandler.startElement(namespaceURI, localName,
156: qName, atts);
157: return;
158: }
159: }
160:
161: state = STATE_INNER;
162:
163: System.out.println("start processing");
164:
165: // ======================= Start Text Document =======================
166: current.clear();
167: current.push(new TerminalStackNode('\u0000', automaton.first,
168: null));
169: next.clear();
170: }
171:
172: /**
173: * Receive notification of character data.
174: */
175: public void characters(char[] text, int textstart, int textlength)
176: throws SAXException {
177: locatorImpl.setLineNumber(locator.getLineNumber());
178: locatorImpl.setColumnNumber(locator.getColumnNumber());
179:
180: if (state == STATE_OUTER) {
181: contentHandler.characters(text, textstart, textlength);
182: return;
183: }
184:
185: System.out.println("process text \""
186: + (new String(text, textstart, textlength)) + "\"");
187:
188: for (int position = textstart; position < (textstart + textlength); position++) {
189: System.out
190: .println("\n===================================\nProcess "
191: + text[position]);
192:
193: if (current.isEmpty())
194: throw new IllegalStateException(
195: "Parsing process is aborted");
196:
197: printStates();
198:
199: //if (current.size()>maxActiveStates)
200: // throw new IllegalStateException("Processor occupied too many states");
201: /* ============================ Reduce =================================== */
202: System.out.println("Count of states : " + current.size());
203:
204: if ((log != null) && (log.isDebugEnabled()))
205: log.debug("------- check reduce actions ---------");
206:
207: int watchdog = 0;
208: while (!current.isEmpty()) {
209: printStates();
210:
211: //if (watchdog++ > 220)
212: // throw new IllegalStateException("overflow");
213: //printStates();
214: StackNode stackNode = current.pop();
215:
216: if (stackNode.state.getShiftAction(text[position]) != null)
217: ;
218:
219: next.push(stackNode);
220:
221: LookaheadReduceAction[] reduceActions = stackNode.state
222: .getLookaheadReduceActions();
223:
224: for (int i = 0; i < reduceActions.length; i++)
225: if (reduceActions[i].contains(text[position])) {
226: LookaheadReduceAction reduceAction = reduceActions[i];
227:
228: if (reduceAction.length == 0) {
229: GotoAction gotoAction = stackNode.state
230: .getGotoAction(reduceAction);
231:
232: if (gotoAction != null) {
233: if ((log != null)
234: && (log.isDebugEnabled()))
235: log
236: .debug("State "
237: + automaton
238: .indexOf(stackNode.state)
239: + " "
240: + reduceAction);
241:
242: if (gotoAction.state == stackNode.state)
243: System.out
244: .println("node rejected because states are equal");
245: else
246: current
247: .push(new DefinitionStackNode(
248: reduceAction, 0,
249: null, null,
250: gotoAction.state,
251: stackNode));
252: }
253: } else {
254: StackNode second = stackNode;
255: for (int j = 0; j < second.ancestors.length; j++) {
256: StackNode first = second.ancestors[j];
257: for (int k = 0; k < first.ancestors.length; k++) {
258: StackNode previousStackNode = first.ancestors[k];
259:
260: GotoAction gotoAction = previousStackNode.state
261: .getGotoAction(reduceAction);
262:
263: if (gotoAction != null) {
264: if ((log != null)
265: && (log
266: .isDebugEnabled()))
267: log
268: .debug("State "
269: + automaton
270: .indexOf(stackNode.state)
271: + " "
272: + reduceAction);
273:
274: current
275: .push(new DefinitionStackNode(
276: reduceAction,
277: 0,
278: first,
279: second,
280: gotoAction.state,
281: previousStackNode));
282: }
283: }
284: }
285: }
286: }
287: }
288:
289: swapStacks();
290:
291: printStates();
292:
293: /* ==================================== Shift =================================== */
294:
295: //System.out.println("Count of states : "+current.size());
296: if ((log != null) && (log.isDebugEnabled()))
297: log.debug("------- check shift actions ---------");
298:
299: while (!current.isEmpty()) {
300: //printStates();
301: StackNode stackNode = current.pop();
302:
303: ShiftAction shiftAction = stackNode.state
304: .getShiftAction(text[position]);
305:
306: if (shiftAction != null) {
307: if ((log != null) && (log.isDebugEnabled()))
308: log.debug( /*"State "+state+*/
309: " shift character '" + text[position] + "'");
310:
311: next.push(new TerminalStackNode(text[position],
312: shiftAction.state, stackNode));
313: }
314: }
315:
316: if (next.isEmpty())
317: throw new IllegalArgumentException("Character '"
318: + text[position] + "' is not expected");
319:
320: swapStacks();
321:
322: System.out
323: .println("------- finished check actions ---------");
324:
325: printStates();
326: }
327: }
328:
329: /**
330: * Receive notification of the end of an element.
331: */
332: public void endElement(String namespaceURI, String localName,
333: String qName) throws SAXException {
334: locatorImpl.setLineNumber(locator.getLineNumber());
335: locatorImpl.setColumnNumber(locator.getColumnNumber());
336:
337: if (state == STATE_OUTER)
338: contentHandler.endElement(namespaceURI, localName, qName);
339:
340: if (state == STATE_INNER) {
341: if ((namespaceURI != null) && (namespaceURI.equals(NS))) {
342: if (!localName.equals(TEXT))
343: throw new SAXException("Unknown element " + qName);
344: } else
345: throw new SAXException("Unexpected element " + qName);
346: }
347:
348: System.out.println("end processing");
349:
350: // ======================= End Text Document =======================
351: System.out
352: .println("\n===================================\nProcess EOF");
353:
354: while (!current.isEmpty()) {
355: printStates();
356:
357: StackNode stackNode = current.pop();
358:
359: ReduceAction[] reduceActions = stackNode.state
360: .getReduceActions();
361:
362: for (int i = 0; i < reduceActions.length; i++) {
363: ReduceAction reduceAction = reduceActions[i];
364:
365: if (reduceAction.length == 0) {
366: GotoAction gotoAction = stackNode.state
367: .getGotoAction(reduceAction);
368:
369: if ((automaton.first == stackNode.state)
370: && (grammar.getStartSymbol()
371: .equals(reduceAction.symbol))) {
372: if ((log != null) && (log.isDebugEnabled()))
373: log.debug("State "
374: + automaton
375: .indexOf(stackNode.state)
376: + " accept");
377:
378: next.push(new DefinitionStackNode(reduceAction,
379: 0, null, null, null, stackNode));
380: } else {
381: if ((log != null) && (log.isDebugEnabled()))
382: log.debug("State "
383: + automaton
384: .indexOf(stackNode.state)
385: + " " + reduceAction);
386:
387: if (gotoAction.state == stackNode.state)
388: System.out
389: .println("node rejected because states are equal");
390: else
391: current.push(new DefinitionStackNode(
392: reduceAction, 0, null, null,
393: gotoAction.state, stackNode));
394: }
395: } else {
396: StackNode second = stackNode;
397:
398: //System.out.println("second="+automaton.indexOf(second.state));
399: for (int j = 0; j < second.ancestors.length; j++) {
400: StackNode first = second.ancestors[j];
401:
402: //System.out.println("first="+automaton.indexOf(first.state));
403: //if (second.ancestors.length>1)
404: // System.out.println("nodes="+second.toCanonicalString(automaton));
405: for (int k = 0; k < first.ancestors.length; k++) {
406: StackNode previousStackNode = first.ancestors[k];
407:
408: GotoAction gotoAction = previousStackNode.state
409: .getGotoAction(reduceAction);
410:
411: //System.out.println("j="+j+" k="+k);
412: if ((automaton.first == previousStackNode.state)
413: && (grammar.getStartSymbol()
414: .equals(reduceAction.symbol))) {
415: if ((log != null)
416: && (log.isDebugEnabled()))
417: log
418: .debug("State "
419: + automaton
420: .indexOf(stackNode.state)
421: + " accept");
422:
423: next.push(new DefinitionStackNode(
424: reduceAction, 0, first, second,
425: null, previousStackNode));
426: } else {
427: if ((log != null)
428: && (log.isDebugEnabled()))
429: log
430: .debug("State "
431: + automaton
432: .indexOf(stackNode.state)
433: + " "
434: + reduceAction);
435:
436: current.push(new DefinitionStackNode(
437: reduceAction, 0, first, second,
438: gotoAction.state,
439: previousStackNode));
440:
441: //System.out.println("origin="+automaton.indexOf(previousStackNode.state));
442: }
443: }
444: }
445: }
446: }
447: }
448:
449: if (log.isDebugEnabled())
450: log.debug("Parser found " + next.size() + " alternatives");
451:
452: System.out.println();
453:
454: contentHandler.startPrefixMapping("", NS_OUTPUT);
455: contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT,
456: new AttributesImpl());
457:
458: int index = 1;
459: while (!next.isEmpty()) {
460: StackNode node = next.pop();
461:
462: node.toXML(contentHandler);
463: index++;
464: }
465:
466: if (next.size() > 1)
467: log.warn("ExtendedGrammar is ambig, found " + next.size()
468: + " alternative trees");
469:
470: contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT);
471: contentHandler.endPrefixMapping("");
472:
473: state = STATE_OUTER;
474: }
475:
476: /**
477: * Receive notification of ignorable whitespace in element content.
478: */
479: public void ignorableWhitespace(char[] ch, int start, int length)
480: throws SAXException {
481: locatorImpl.setLineNumber(locator.getLineNumber());
482: locatorImpl.setColumnNumber(locator.getColumnNumber());
483:
484: if (state == STATE_OUTER)
485: contentHandler.ignorableWhitespace(ch, start, length);
486: }
487:
488: /**
489: * Begin the scope of a prefix-URI Namespace mapping.
490: */
491: public void startPrefixMapping(String prefix, String uri)
492: throws SAXException {
493: locatorImpl.setLineNumber(locator.getLineNumber());
494: locatorImpl.setColumnNumber(locator.getColumnNumber());
495:
496: contentHandler.startPrefixMapping(prefix, uri);
497: }
498:
499: /**
500: * End the scope of a prefix-URI mapping.
501: */
502: public void endPrefixMapping(String prefix) throws SAXException {
503: locatorImpl.setLineNumber(locator.getLineNumber());
504: locatorImpl.setColumnNumber(locator.getColumnNumber());
505:
506: contentHandler.endPrefixMapping(prefix);
507: }
508:
509: /**
510: * Receive notification of a processing instruction.
511: */
512: public void processingInstruction(String target, String data)
513: throws SAXException {
514: locatorImpl.setLineNumber(locator.getLineNumber());
515: locatorImpl.setColumnNumber(locator.getColumnNumber());
516:
517: if (state == STATE_OUTER)
518: contentHandler.processingInstruction(target, data);
519: }
520:
521: /**
522: * Receive notification of a skipped entity.
523: */
524: public void skippedEntity(String name) throws SAXException {
525: locatorImpl.setLineNumber(locator.getLineNumber());
526: locatorImpl.setColumnNumber(locator.getColumnNumber());
527:
528: if (state == STATE_OUTER)
529: contentHandler.skippedEntity(name);
530: }
531:
532: /**
533: * Receive notification of the end of a document.
534: */
535: public void endDocument() throws SAXException {
536: locatorImpl.setLineNumber(locator.getLineNumber());
537: locatorImpl.setColumnNumber(locator.getColumnNumber());
538:
539: if (state == STATE_OUTER)
540: contentHandler.endDocument();
541: }
542:
543: /**
544: * Report the start of DTD declarations, if any.
545: */
546: public void startDTD(String name, String publicId, String systemId)
547: throws SAXException {
548: if (lexicalHandler != null)
549: lexicalHandler.startDTD(name, publicId, systemId);
550: }
551:
552: /**
553: * Report the end of DTD declarations.
554: */
555: public void endDTD() throws SAXException {
556: if (lexicalHandler != null)
557: lexicalHandler.endDTD();
558: }
559:
560: /**
561: * Report the beginning of an entity.
562: */
563: public void startEntity(String name) throws SAXException {
564: if (lexicalHandler != null)
565: lexicalHandler.startEntity(name);
566: }
567:
568: /**
569: * Report the end of an entity.
570: */
571: public void endEntity(String name) throws SAXException {
572: if (lexicalHandler != null)
573: lexicalHandler.endEntity(name);
574: }
575:
576: /**
577: * Report the start of a CDATA section.
578: */
579: public void startCDATA() throws SAXException {
580: if (lexicalHandler != null)
581: lexicalHandler.startCDATA();
582: }
583:
584: /**
585: * Report the end of a CDATA section.
586: */
587: public void endCDATA() throws SAXException {
588: if (lexicalHandler != null)
589: lexicalHandler.endCDATA();
590: }
591:
592: /**
593: * Report an XML comment anywhere in the document.
594: */
595: public void comment(char[] ch, int start, int len)
596: throws SAXException {
597: if (lexicalHandler != null)
598: lexicalHandler.comment(ch, start, len);
599: }
600:
601: private void printStates() {
602: System.out.println("Current states:");
603: System.out.println(current.toCanonicalString());
604: System.out.println("Next states:");
605: System.out.println(next.toCanonicalString());
606: }
607:
608: private void swapStacks() {
609: StackNodeSet dummy = next;
610: next = current;
611: current = dummy;
612: next.clear();
613: }
614: }
|