001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.model.lexicon;
010:
011: import net.sourceforge.chaperon.model.pattern.*;
012: import net.sourceforge.chaperon.model.symbol.Terminal;
013:
014: import org.xml.sax.*;
015: import org.xml.sax.helpers.*;
016:
017: import java.util.Stack;
018:
019: /**
020: * This class should generate a lexicon from a SAX stream
021: *
022: * @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
023: * @version CVS $Id: LexiconFactory.java,v 1.3 2003/12/09 19:55:52 benedikta Exp $
024: */
025: public class LexiconFactory extends DefaultHandler {
026: /** The namspace of the lexicon configuration */
027: public static final String NS = "http://chaperon.sourceforge.net/schema/lexicon/1.0";
028:
029: /** Element name */
030: public static final String LEXEME_ELEMENT = "lexeme";
031:
032: /** Attribute name of the symbol property */
033: public static final String SYMBOL_ATTRIBUTE = "symbol";
034:
035: /** Element name */
036: public static final String LEXICON_ELEMENT = "lexicon";
037:
038: /** Element name */
039: public static final String ALTERNATION_ELEMENT = "alt";
040:
041: /** Element name */
042: public static final String BEGINOFLINE_ELEMENT = "bol";
043:
044: /** Element name */
045: public static final String CHARACTERCLASS_ELEMENT = "cclass";
046:
047: /** Attribute name of the exclusive property */
048: public static final String EXCLUSIVE_ATTRIBUTE = "exclusive";
049:
050: /** Element name */
051: public static final String CHARACTERINTERVAL_ELEMENT = "cinterval";
052:
053: /** Attribute name of the min property */
054: public static final String CHARACTERINTERVAL_MIN_ATTRIBUTE = "min";
055:
056: /** Attribute name of the max property */
057: public static final String CHARACTERINTERVAL_MAX_ATTRIBUTE = "max";
058:
059: /** Element name */
060: public static final String CHARACTERSTRING_ELEMENT = "cstring";
061:
062: /** Attribute name of the sequence property */
063: public static final String CHARACTERSTRING_SEQUENCE_ATTRIBUTE = "content";
064:
065: /** Element name */
066: public static final String CHARACTERSET_ELEMENT = "cset";
067:
068: /** Attribute name of the characters property */
069: public static final String CHARACTERSET_CHARACTERS_ATTRIBUTE = "content";
070:
071: /* public final static String CHARACTERGENERIC_ELEMENT = "cgeneric";
072:
073: public final static String CHARACTERGENERIC_CODE_ATTRIBUTE = "code";*/
074: public static final String CODE_ATTRIBUTE = "code";
075:
076: /** Element name */
077: public static final String CONCATENATION_ELEMENT = "concat";
078:
079: /** Element name */
080: public static final String GROUP_ELEMENT = "group";
081:
082: /** Element name */
083: public static final String UNIVERSALCHARACTER_ELEMENT = "cuniversal";
084:
085: /** Element name */
086: public static final String ENDOFLINE_ELEMENT = "eol";
087:
088: /** Attribute name of the minOccurs property */
089: public static final String MINOCCURS_ATTRIBUTE = "minOccurs";
090:
091: /** Attribute name of the minOccurs property */
092: public static final String MAXOCCURS_ATTRIBUTE = "maxOccurs";
093: private static final int STATE_OUTER = 0;
094: private static final int STATE_LEXICON = 1;
095: private static final int STATE_LEXEME = 2;
096: private static final int STATE_CHARACTERCLASS = 3;
097: private static final int STATE_CHARACTERCLASSELEMENT = 4;
098: private int state = STATE_OUTER;
099: private Lexicon lexicon;
100: private Locator locator = null;
101: private Stack stack;
102:
103: /**
104: * Returns the generated lexicon
105: *
106: * @return Lexicon
107: */
108: public Lexicon getLexicon() {
109: return lexicon;
110: }
111:
112: private String getLocation() {
113: if (locator == null)
114: return "unknown";
115:
116: return locator.getSystemId() + ":" + locator.getLineNumber()
117: + ":" + locator.getColumnNumber();
118: }
119:
120: /**
121: * Receive an object for locating the origin of SAX document events.
122: */
123: public void setDocumentLocator(Locator locator) {
124: this .locator = locator;
125: }
126:
127: /**
128: * Receive notification of the beginning of a document.
129: */
130: public void startDocument() {
131: stack = new Stack();
132: }
133:
134: /**
135: * Return the content of the minOccurs attribute
136: *
137: * @param atts Attributes of an element
138: *
139: * @return minOccurs attribute
140: */
141: private int getMinOccursFromAttributes(Attributes atts) {
142: int minOccurs = 1;
143: String attribute = atts.getValue(MINOCCURS_ATTRIBUTE);
144:
145: if ((attribute != null) && (attribute.length() > 0)) {
146: try {
147: minOccurs = Integer.parseInt(attribute);
148: } catch (NumberFormatException e) {
149: // System.err.println("error: "+attribute+" ist not an integer number");
150: minOccurs = 1;
151: }
152:
153: if (minOccurs < 0)
154: minOccurs = 0;
155: }
156:
157: return minOccurs;
158: }
159:
160: /**
161: * Return the content of the maxOccurs attribute
162: *
163: * @param atts Attributes of an element
164: *
165: * @return maxOccurs attribute
166: */
167: private int getMaxOccursFromAttributes(Attributes atts) {
168: int maxOccurs = 1;
169: String attribute = atts.getValue(MAXOCCURS_ATTRIBUTE);
170:
171: if ((attribute != null) && (attribute.length() > 0)) {
172: if (attribute.equals("*"))
173: maxOccurs = Integer.MAX_VALUE;
174: else {
175: try {
176: maxOccurs = Integer.parseInt(attribute);
177: } catch (NumberFormatException e) {
178: // System.err.println("error: "+attribute+" ist not an integer number");
179: maxOccurs = 1;
180: }
181:
182: if (maxOccurs < 1)
183: maxOccurs = 1;
184: }
185: }
186:
187: return maxOccurs;
188: }
189:
190: /**
191: * @param atts
192: *
193: * @return
194: */
195: private boolean getExclusiveFromAttributes(Attributes atts) {
196: String attribute = atts.getValue(EXCLUSIVE_ATTRIBUTE);
197:
198: if ((attribute != null) && (attribute.length() > 0)) {
199: boolean value = false;
200:
201: try {
202: value = Boolean.valueOf(attribute).booleanValue();
203: return value;
204: } catch (Exception e) {
205: return false;
206: }
207: }
208:
209: return false;
210: }
211:
212: /**
213: * Receive notification of the beginning of an element.
214: *
215: * @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
216: * or if Namespace processing is not being performed.
217: * @param localName The local name (without prefix), or the empty string if Namespace processing
218: * is not being performed.
219: * @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
220: * available.
221: * @param atts The attributes attached to the element. If there are no attributes, it shall be an
222: * empty Attributes object.
223: */
224: public void startElement(String namespaceURI, String localName,
225: String qName, Attributes atts) throws SAXException {
226: if (namespaceURI.equals(NS)) {
227: if ((localName.equals(LEXICON_ELEMENT))
228: && (state == STATE_OUTER)) {
229: Lexicon lexicon = new Lexicon();
230: lexicon.setLocation(getLocation());
231: stack.push(lexicon);
232:
233: state = STATE_LEXICON;
234: } else if ((localName.equals(LEXEME_ELEMENT))
235: && (state == STATE_LEXICON)) {
236: Lexeme lexeme = new Lexeme();
237: lexeme.setLocation(getLocation());
238: if (atts.getValue(SYMBOL_ATTRIBUTE) != null)
239: lexeme.setSymbol(new Terminal(atts
240: .getValue(SYMBOL_ATTRIBUTE)));
241:
242: stack.push(lexeme);
243:
244: state = STATE_LEXEME;
245: } else if ((localName.equals(ALTERNATION_ELEMENT))
246: && (state == STATE_LEXEME)) {
247: Alternation alternation = new Alternation();
248: alternation.setLocation(getLocation());
249:
250: alternation
251: .setMinOccurs(getMinOccursFromAttributes(atts));
252: alternation
253: .setMaxOccurs(getMaxOccursFromAttributes(atts));
254: stack.push(alternation);
255: } else if ((localName.equals(CONCATENATION_ELEMENT))
256: && (state == STATE_LEXEME)) {
257: Concatenation concatenation = new Concatenation();
258: concatenation.setLocation(getLocation());
259:
260: concatenation
261: .setMinOccurs(getMinOccursFromAttributes(atts));
262: concatenation
263: .setMaxOccurs(getMaxOccursFromAttributes(atts));
264: stack.push(concatenation);
265: } else if ((localName.equals(CHARACTERSTRING_ELEMENT))
266: && (state == STATE_LEXEME)) {
267: CharacterString characterstring = new CharacterString();
268: characterstring.setLocation(getLocation());
269:
270: characterstring
271: .setMinOccurs(getMinOccursFromAttributes(atts));
272: characterstring
273: .setMaxOccurs(getMaxOccursFromAttributes(atts));
274:
275: if (atts.getValue(CODE_ATTRIBUTE) != null) {
276: char character = (char) Integer.parseInt(atts
277: .getValue(CODE_ATTRIBUTE));
278: characterstring
279: .setString(String.valueOf(character));
280: } else
281: characterstring
282: .setString(atts
283: .getValue(CHARACTERSTRING_SEQUENCE_ATTRIBUTE));
284:
285: stack.push(characterstring);
286: } else if ((localName.equals(GROUP_ELEMENT))
287: && (state == STATE_LEXEME)) {
288: PatternGroup group = new PatternGroup();
289: group.setLocation(getLocation());
290:
291: group.setMinOccurs(getMinOccursFromAttributes(atts));
292: group.setMaxOccurs(getMaxOccursFromAttributes(atts));
293: stack.push(group);
294: } else if ((localName.equals(UNIVERSALCHARACTER_ELEMENT))
295: && (state == STATE_LEXEME)) {
296: UniversalCharacter uni = new UniversalCharacter();
297: uni.setLocation(getLocation());
298:
299: uni.setMinOccurs(getMinOccursFromAttributes(atts));
300: uni.setMaxOccurs(getMaxOccursFromAttributes(atts));
301:
302: stack.push(uni);
303: } else if ((localName.equals(BEGINOFLINE_ELEMENT))
304: && (state == STATE_LEXEME)) {
305: BeginOfLine bol = new BeginOfLine();
306: bol.setLocation(getLocation());
307:
308: stack.push(bol);
309: } else if ((localName.equals(ENDOFLINE_ELEMENT))
310: && (state == STATE_LEXEME)) {
311: EndOfLine eol = new EndOfLine();
312:
313: stack.push(eol);
314: } else if ((localName.equals(CHARACTERCLASS_ELEMENT))
315: && (state == STATE_LEXEME)) {
316: CharacterClass characterclass = new CharacterClass();
317: characterclass.setLocation(getLocation());
318:
319: characterclass
320: .setExclusive(getExclusiveFromAttributes(atts));
321: characterclass
322: .setMinOccurs(getMinOccursFromAttributes(atts));
323: characterclass
324: .setMaxOccurs(getMaxOccursFromAttributes(atts));
325: stack.push(characterclass);
326:
327: state = STATE_CHARACTERCLASS;
328: } else if ((localName.equals(CHARACTERSET_ELEMENT))
329: && (state == STATE_CHARACTERCLASS)) {
330: CharacterSet characterset = new CharacterSet();
331: characterset.setLocation(getLocation());
332:
333: if (atts.getValue(CODE_ATTRIBUTE) != null) {
334: char character = (char) Integer.decode(
335: atts.getValue(CODE_ATTRIBUTE)).intValue();
336: characterset.setCharacters(String
337: .valueOf(character));
338: } else
339: characterset
340: .setCharacters(atts
341: .getValue(CHARACTERSET_CHARACTERS_ATTRIBUTE));
342:
343: stack.push(characterset);
344:
345: state = STATE_CHARACTERCLASSELEMENT;
346: } else if ((localName.equals(CHARACTERINTERVAL_ELEMENT))
347: && (state == STATE_CHARACTERCLASS)) {
348: CharacterInterval characterinterval = new CharacterInterval();
349: characterinterval.setLocation(getLocation());
350:
351: characterinterval.setMinimum(atts.getValue(
352: CHARACTERINTERVAL_MIN_ATTRIBUTE).charAt(0));
353: characterinterval.setMaximum(atts.getValue(
354: CHARACTERINTERVAL_MAX_ATTRIBUTE).charAt(0));
355: stack.push(characterinterval);
356:
357: state = STATE_CHARACTERCLASSELEMENT;
358: } else
359: throw new SAXException("Unexpected element " + qName
360: + " at " + getLocation());
361: } else
362: throw new SAXException("Unexpected element " + qName
363: + " at " + getLocation());
364: }
365:
366: /**
367: * Receive notification of the end of an element.
368: *
369: * @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
370: * or if Namespace processing is not being performed.
371: * @param localName The local name (without prefix), or the empty string if Namespace processing
372: * is not being performed.
373: * @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
374: * available.
375: *
376: * @throws SAXException
377: */
378: public void endElement(String namespaceURI, String localName,
379: String qName) throws SAXException {
380: if (namespaceURI.equals(NS)) {
381: if ((localName.equals(LEXICON_ELEMENT))
382: && (state == STATE_LEXICON)) {
383: lexicon = (Lexicon) stack.pop();
384: state = STATE_OUTER;
385: } else if ((localName.equals(LEXEME_ELEMENT))
386: && (state == STATE_LEXEME)) {
387: Lexeme lexeme = (Lexeme) stack.pop();
388: Lexicon lexicon = (Lexicon) stack.peek();
389:
390: lexicon.addLexeme(lexeme);
391: state = STATE_LEXICON;
392: } else if (((localName.equals(ALTERNATION_ELEMENT))
393: || (localName.equals(CONCATENATION_ELEMENT))
394: || (localName.equals(CHARACTERSTRING_ELEMENT))
395: || (localName.equals(GROUP_ELEMENT))
396: || (localName.equals(UNIVERSALCHARACTER_ELEMENT))
397: || (localName.equals(BEGINOFLINE_ELEMENT)) || (localName
398: .equals(ENDOFLINE_ELEMENT)))
399: && (state == STATE_LEXEME)) {
400: Pattern patternelement = (Pattern) stack.pop();
401:
402: if (stack.peek() instanceof Alternation) {
403: Alternation alternation = (Alternation) stack
404: .peek();
405:
406: alternation.addPattern(patternelement);
407: } else if (stack.peek() instanceof Concatenation) {
408: Concatenation concatenation = (Concatenation) stack
409: .peek();
410:
411: concatenation.addPattern(patternelement);
412: } else if (stack.peek() instanceof PatternGroup) {
413: PatternGroup group = (PatternGroup) stack.peek();
414:
415: group.addPattern(patternelement);
416: } else if (stack.peek() instanceof Lexeme) {
417: Lexeme lexeme = (Lexeme) stack.peek();
418:
419: lexeme.setDefinition(patternelement);
420: }
421: } else if ((localName.equals(CHARACTERCLASS_ELEMENT))
422: && (state == STATE_CHARACTERCLASS)) {
423: Pattern patternelement = (Pattern) stack.pop();
424:
425: if (stack.peek() instanceof Alternation) {
426: Alternation alternation = (Alternation) stack
427: .peek();
428:
429: alternation.addPattern(patternelement);
430: } else if (stack.peek() instanceof Concatenation) {
431: Concatenation concatenation = (Concatenation) stack
432: .peek();
433:
434: concatenation.addPattern(patternelement);
435: } else if (stack.peek() instanceof PatternGroup) {
436: PatternGroup group = (PatternGroup) stack.peek();
437:
438: group.addPattern(patternelement);
439: } else if (stack.peek() instanceof Lexeme) {
440: Lexeme lexeme = (Lexeme) stack.peek();
441:
442: lexeme.setDefinition(patternelement);
443: }
444:
445: state = STATE_LEXEME;
446: } else if (((localName.equals(CHARACTERSET_ELEMENT)) || (localName
447: .equals(CHARACTERINTERVAL_ELEMENT)))
448: && (state == STATE_CHARACTERCLASSELEMENT)) {
449: CharacterClassElement characterclasselement = (CharacterClassElement) stack
450: .pop();
451: CharacterClass characterclass = (CharacterClass) stack
452: .peek();
453:
454: characterclass
455: .addCharacterClassElement(characterclasselement);
456:
457: state = STATE_CHARACTERCLASS;
458: } else
459: throw new SAXException("Unexpected element " + qName
460: + " at " + getLocation());
461: } else
462: throw new SAXException("Unexpected element " + qName
463: + " at " + getLocation());
464: }
465: }
|