001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: *
023: * Free Software Foundation, Inc.
024: * 59 Temple Place, Suite 330
025: * Boston, MA 02111-1307 USA
026: *
027: * @author Scott Ferguson
028: */
029:
030: package com.caucho.relaxng;
031:
032: import com.caucho.relaxng.pattern.*;
033: import com.caucho.util.CharBuffer;
034: import com.caucho.util.IntMap;
035: import com.caucho.util.L10N;
036: import com.caucho.vfs.Path;
037: import com.caucho.vfs.ReadStream;
038: import com.caucho.vfs.Vfs;
039: import com.caucho.xml.QName;
040: import com.caucho.xml.XmlChar;
041:
042: import org.xml.sax.InputSource;
043: import org.xml.sax.SAXException;
044:
045: import java.io.IOException;
046: import java.io.InputStream;
047: import java.util.HashMap;
048: import java.util.logging.Level;
049: import java.util.logging.Logger;
050:
051: /**
052: * Builder for the relax.
053: */
054: public class CompactParser {
055: private static final L10N L = new L10N(CompactParser.class);
056: private static final Logger log = Logger
057: .getLogger(CompactParser.class.getName());
058:
059: private static final int IDENTIFIER = 256;
060:
061: private static final int NAMESPACE = IDENTIFIER + 1;
062: private static final int DEFAULT = NAMESPACE + 1;
063:
064: private static final int START = DEFAULT + 1;
065: private static final int DIV = START + 1;
066: private static final int INCLUDE = DIV + 1;
067:
068: private static final int ELEMENT = INCLUDE + 1;
069: private static final int ATTRIBUTE = ELEMENT + 1;
070:
071: private static final int TEXT = ATTRIBUTE + 1;
072: private static final int STRING = TEXT + 1;
073: private static final int TOKEN = STRING + 1;
074: private static final int LITERAL = TOKEN + 1;
075:
076: private static final int EMPTY = LITERAL + 1;
077:
078: private static final int COMMENT = EMPTY + 1;
079:
080: private static final IntMap _tokenMap = new IntMap();
081:
082: private GrammarPattern _grammar;
083: private Pattern _pattern;
084:
085: private String _ns = "";
086: private HashMap<String, String> _nsMap;
087:
088: private Path _pwd;
089: private ReadStream _is;
090: private String _filename;
091: private int _line;
092:
093: private int _peek = -1;
094: private int _peekToken = -1;
095:
096: private CharBuffer _cb = new CharBuffer();
097: private String _lexeme;
098:
099: private int _generatedId;
100:
101: CompactParser() {
102: }
103:
104: /**
105: * Gets the root pattern.
106: */
107: public GrammarPattern getGrammar() {
108: return _grammar;
109: }
110:
111: public void setGeneratedId(int id) {
112: _generatedId = id;
113: }
114:
115: public String generateId() {
116: return "__caucho_" + _generatedId++;
117: }
118:
119: /**
120: * Parses the relax file.
121: */
122: public void parse(InputSource source) throws SAXException,
123: IOException, RelaxException {
124: InputStream is = source.getByteStream();
125:
126: _pwd = null;
127:
128: if (is instanceof ReadStream) {
129: _is = (ReadStream) is;
130: _filename = _is.getUserPath();
131: _pwd = _is.getPath().getParent();
132: }
133: if (is != null)
134: _is = Vfs.openRead(is);
135: else
136: _is = Vfs.openRead(source.getSystemId());
137:
138: if (_filename == null)
139: _filename = source.getSystemId();
140: _line = 1;
141:
142: if (_pwd == null)
143: _pwd = Vfs.lookup(_filename).getParent();
144:
145: try {
146: parse();
147: } catch (RelaxException e) {
148: log.log(Level.FINER, e.toString(), e);
149:
150: // xml/1196
151: //throw new SAXException(_filename + ":" + _line + ": " + e.getMessage(), e);
152: throw new SAXException(_filename + ":" + _line + ": "
153: + e.getMessage());
154: } finally {
155: _is.close();
156: }
157: }
158:
159: /**
160: * Internal parser.
161: */
162: private void parse() throws SAXException, IOException,
163: RelaxException {
164: _grammar = new GrammarPattern();
165: _nsMap = new HashMap<String, String>();
166:
167: parseDeclarations();
168:
169: int token = parseToken();
170: _peekToken = token;
171:
172: switch (token) {
173: case START:
174: case IDENTIFIER:
175: case INCLUDE:
176: parseGrammar(_grammar);
177: break;
178:
179: case COMMENT:
180: break;
181:
182: default:
183: _grammar.setStart(parsePattern(_grammar));
184: break;
185: }
186: }
187:
188: /**
189: * Parses declarations.
190: */
191: private void parseDeclarations() throws SAXException, IOException,
192: RelaxException {
193: while (true) {
194: int token = parseToken();
195:
196: _peekToken = token;
197:
198: switch (token) {
199: case DEFAULT:
200: case NAMESPACE:
201: parseNamespace();
202: break;
203:
204: case COMMENT:
205: break;
206:
207: default:
208: return;
209: }
210: }
211: }
212:
213: /**
214: * Parses the namespace declaration
215: */
216: private void parseNamespace() throws SAXException, IOException,
217: RelaxException {
218: boolean isDefault = false;
219: int token = parseToken();
220:
221: if (token == DEFAULT) {
222: isDefault = true;
223: token = parseToken();
224: }
225:
226: if (token != NAMESPACE)
227: throw error(L.l("expected 'namespace' at {0}", _cb));
228:
229: token = parseToken();
230:
231: if (token != IDENTIFIER)
232: throw error(L.l("expected identifier at {0}", _cb));
233:
234: String prefix = _lexeme;
235:
236: token = parseToken();
237:
238: if (token != '=')
239: throw error(L.l("expected '=' at {0}", _cb));
240:
241: String value = parseLiteral();
242:
243: if (isDefault)
244: _ns = value;
245:
246: _nsMap.put(prefix, value);
247: }
248:
249: /**
250: * Parses top-level grammar stuff.
251: */
252: private void parseGrammar(GrammarPattern grammar)
253: throws IOException, SAXException, RelaxException,
254: RelaxException {
255: while (true) {
256: int token = parseToken();
257: Pattern pattern;
258:
259: switch (token) {
260: case -1:
261: return;
262:
263: case COMMENT:
264: break;
265:
266: case START:
267: int next = parseToken();
268: if (next == '=')
269: grammar.setStart(parsePattern(grammar));
270: else
271: throw error(L.l("expected '=' at {0}", _cb));
272: break;
273:
274: case IDENTIFIER:
275: String name = _lexeme;
276: Pattern oldPattern = grammar.getDefinition(name);
277: pattern = new GroupPattern();
278: next = parseToken();
279: if (next == '=') {
280: grammar.setDefinition(name, parsePattern(grammar));
281: } else
282: throw error(L.l("expected '=' at {0}", _cb));
283: break;
284:
285: case INCLUDE:
286: parseInclude(grammar);
287: break;
288:
289: default:
290: throw error(L.l("unexpected token {0}", _cb));
291: }
292: }
293: }
294:
295: private void parseInclude(GrammarPattern grammar)
296: throws IOException, SAXException, RelaxException {
297: String uri = parseLiteral();
298:
299: Path sub = _pwd.lookup(uri);
300:
301: ReadStream is = null;
302:
303: try {
304: is = sub.openRead();
305:
306: InputSource source = new InputSource(is);
307: source.setSystemId(uri);
308:
309: CompactParser parser = new CompactParser();
310: parser.setGeneratedId(_generatedId);
311: parser.parse(source);
312:
313: GrammarPattern subGrammar = parser.getGrammar();
314:
315: _generatedId = parser._generatedId;
316:
317: grammar.mergeInclude(subGrammar);
318: } finally {
319: if (is != null)
320: is.close();
321: }
322: }
323:
324: /**
325: * Parses a pattern.
326: */
327: private Pattern parsePattern(GrammarPattern grammar)
328: throws IOException, SAXException, RelaxException {
329: Pattern pattern = parseTerm(grammar);
330:
331: int token = parseToken();
332:
333: switch (token) {
334: case '|':
335: return parseChoicePattern(grammar, pattern);
336: case '&':
337: return parseInterleavePattern(grammar, pattern);
338: case ',':
339: return parseGroupPattern(grammar, pattern);
340:
341: default:
342: _peekToken = token;
343: return pattern;
344: }
345: }
346:
347: /**
348: * Parses a interleave pattern.
349: */
350: private Pattern parseInterleavePattern(GrammarPattern grammar,
351: Pattern pattern) throws IOException, SAXException,
352: RelaxException {
353: int token;
354:
355: do {
356: if (!(pattern instanceof InterleavePattern)) {
357: Pattern child = pattern;
358: pattern = new InterleavePattern();
359: pattern.addChild(child);
360: }
361:
362: pattern.addChild(parseTerm(grammar));
363: } while ((token = parseToken()) == '&');
364:
365: _peekToken = token;
366:
367: return pattern;
368: }
369:
370: /**
371: * Parses a group pattern.
372: */
373: private Pattern parseGroupPattern(GrammarPattern grammar,
374: Pattern pattern) throws IOException, SAXException,
375: RelaxException {
376: int token;
377:
378: do {
379: if (!(pattern instanceof GroupPattern)) {
380: Pattern child = pattern;
381: pattern = new GroupPattern();
382: pattern.addChild(child);
383: }
384:
385: pattern.addChild(parseTerm(grammar));
386: } while ((token = parseToken()) == ',');
387:
388: _peekToken = token;
389:
390: return pattern;
391: }
392:
393: /**
394: * Parses a choice pattern.
395: */
396: private Pattern parseChoicePattern(GrammarPattern grammar,
397: Pattern pattern) throws IOException, SAXException,
398: RelaxException {
399: int token;
400:
401: do {
402: if (!(pattern instanceof ChoicePattern)) {
403: Pattern child = pattern;
404: pattern = new ChoicePattern();
405: pattern.addChild(child);
406: }
407:
408: pattern.addChild(parseTerm(grammar));
409: } while ((token = parseToken()) == '|');
410:
411: _peekToken = token;
412:
413: return pattern;
414: }
415:
416: /**
417: * Parses a term
418: */
419: private Pattern parseTerm(GrammarPattern grammar)
420: throws IOException, SAXException, RelaxException {
421: int token = parseToken();
422:
423: while (token == COMMENT) {
424: token = parseToken();
425: }
426:
427: Pattern pattern;
428: switch (token) {
429: case EMPTY:
430: return new EmptyPattern();
431:
432: case TEXT:
433: return new TextPattern();
434:
435: case STRING:
436: case LITERAL:
437: return new DataPattern("string");
438:
439: case TOKEN:
440: return new DataPattern("token");
441:
442: case ELEMENT:
443: pattern = parseElement(grammar);
444: break;
445:
446: case ATTRIBUTE:
447: pattern = parseAttribute(grammar);
448: break;
449:
450: case '(':
451: pattern = parsePattern(grammar);
452:
453: token = parseToken();
454: if (token != ')')
455: throw error(L.l("expected ')' at {0}", _cb));
456: break;
457:
458: case IDENTIFIER:
459: pattern = new RefPattern(_grammar, _lexeme);
460: pattern.setFilename(_filename);
461: pattern.setLine(_line);
462: break;
463:
464: default:
465: throw error(L.l("unknown token {0}", _cb));
466: }
467:
468: token = parseToken();
469:
470: if (token == '*')
471: pattern = new ZeroOrMorePattern(pattern);
472: else if (token == '?') {
473: ChoicePattern choice = new ChoicePattern();
474: choice.addChild(new EmptyPattern());
475: choice.addChild(pattern);
476: return choice;
477: } else if (token == '+') {
478: GroupPattern group = new GroupPattern();
479: group.addChild(pattern);
480: group.addChild(new ZeroOrMorePattern(pattern));
481: return group;
482: } else {
483: _peekToken = token;
484: }
485:
486: return pattern;
487: }
488:
489: /**
490: * Parses an element.
491: */
492: private Pattern parseElement(GrammarPattern grammar)
493: throws IOException, SAXException, RelaxException {
494: String id = generateId();
495: ElementPattern elt = new ElementPattern(id);
496: grammar.setDefinition(id, elt);
497:
498: elt.addNameChild(parseNameClass(grammar, true));
499:
500: int token = parseToken();
501: if (token == '{') {
502: elt.addChild(parsePattern(grammar));
503:
504: token = parseToken();
505: if (token != '}')
506: throw error(L.l("expected '}' at {0}", _cb));
507: }
508:
509: return elt;
510: }
511:
512: /**
513: * Parses an element.
514: */
515: private Pattern parseAttribute(GrammarPattern grammar)
516: throws IOException, SAXException, RelaxException {
517: AttributePattern elt = new AttributePattern();
518: elt.addNameChild(parseNameClass(grammar, false));
519:
520: int token = parseToken();
521: if (token == '{') {
522: token = parseToken();
523:
524: if (token == '}')
525: return elt;
526:
527: _peekToken = token;
528:
529: elt.addChild(parsePattern(grammar));
530:
531: token = parseToken();
532: if (token != '}')
533: throw error(L.l("expected '}' at {0}", _cb));
534: }
535:
536: return elt;
537: }
538:
539: /**
540: * Parses a name class.
541: */
542: private NameClassPattern parseNameClass(GrammarPattern grammar,
543: boolean isElement) throws IOException, SAXException,
544: RelaxException {
545: NameClassPattern left = parseName(grammar, isElement);
546: ChoiceNamePattern choice = null;
547:
548: int ch;
549: while ((ch = skipWhitespace()) == '|') {
550: NameClassPattern right = parseName(grammar, isElement);
551:
552: if (choice == null) {
553: choice = new ChoiceNamePattern();
554: choice.addNameChild(left);
555: }
556:
557: choice.addNameChild(right);
558: }
559:
560: _peek = ch;
561:
562: if (choice != null)
563: return choice;
564: else
565: return left;
566: }
567:
568: /**
569: * Parses a name class.
570: */
571: private NameClassPattern parseName(GrammarPattern grammar,
572: boolean isElement) throws IOException, SAXException,
573: RelaxException {
574: _cb.clear();
575:
576: int ch = skipWhitespace();
577: if (ch == '(') {
578: NameClassPattern name = parseNameClass(grammar, isElement);
579: ch = skipWhitespace();
580: if (ch != ')')
581: throw error(L.l("expected ')' at '{0}'", String
582: .valueOf((char) ch)));
583: return name;
584: }
585:
586: for (; XmlChar.isNameChar(ch); ch = read())
587: _cb.append((char) ch);
588:
589: if (ch == '*')
590: _cb.append('*');
591: else
592: _peek = ch;
593:
594: if (_cb.length() == 0)
595: throw error(L.l("expected name at '{0}'", String
596: .valueOf((char) ch)));
597:
598: NameClassPattern pattern;
599:
600: String lexeme = _cb.toString();
601:
602: int p = lexeme.lastIndexOf(':');
603: String ns = _ns;
604: String localName;
605:
606: if (p < 0) {
607: localName = lexeme;
608:
609: if (!isElement)
610: ns = null;
611: } else {
612: String prefix = lexeme.substring(0, p);
613: localName = lexeme.substring(p + 1);
614: ns = _nsMap.get(prefix);
615:
616: if (ns == null && localName.equals("*"))
617: throw error(L.l(
618: "'{0}' does not match a defined namespace.",
619: lexeme));
620:
621: if (ns == null) {// && isElement) {
622: pattern = createNamePattern(lexeme, "");
623:
624: return pattern;
625: }
626: }
627:
628: if (lexeme.equals("*")) {
629: AnyNamePattern namePattern = new AnyNamePattern();
630:
631: namePattern.setExcept(parseExcept(grammar, isElement));
632:
633: return namePattern;
634: } else if (localName.equals("*")) {
635: NsNamePattern namePattern = new NsNamePattern(lexeme, ns);
636:
637: namePattern.setExcept(parseExcept(grammar, isElement));
638:
639: return namePattern;
640: } else if ("".equals(ns) || ns == null) {
641: pattern = createNamePattern(localName, "");
642:
643: return pattern;
644: } else {
645: pattern = createNamePattern(lexeme, ns);
646:
647: return pattern;
648: }
649: }
650:
651: private NamePattern createNamePattern(String localName,
652: String namespace) {
653: return new NamePattern(new QName(localName, namespace));
654: }
655:
656: /**
657: * Parses a name class.
658: */
659: private NameClassPattern parseExcept(GrammarPattern grammar,
660: boolean isElement) throws IOException, SAXException,
661: RelaxException {
662: int ch = skipWhitespace();
663:
664: if (ch != '-') {
665: _peek = ch;
666: return null;
667: }
668:
669: return parseName(grammar, isElement);
670: }
671:
672: /**
673: * Parses a token.
674: */
675: private int parseToken() throws IOException, SAXException,
676: RelaxException {
677: int ch = _peekToken;
678:
679: if (ch >= 0) {
680: _peekToken = -1;
681: return ch;
682: }
683:
684: ch = skipWhitespace();
685:
686: _cb.clear();
687:
688: if (ch < 0) {
689: _cb.append("end of file");
690: return -1;
691: }
692:
693: switch (ch) {
694: case '?':
695: case '*':
696: case '+':
697: case ',':
698: case '|':
699: case '&':
700: case '{':
701: case '}':
702: case '(':
703: case ')':
704: case '=':
705: _cb.append((char) ch);
706: return ch;
707:
708: case '\"':
709: case '\'':
710: _peek = ch;
711: _lexeme = parseLiteral();
712: return LITERAL;
713:
714: case '#':
715: do {
716: ch = read();
717: if (ch != '#')
718: throw error(L.l("expeced '#' at '{0}'", String
719: .valueOf((char) ch)));
720:
721: if (_cb.length() > 0)
722: _cb.append('\n');
723:
724: for (ch = read(); ch > 0 && ch != '\n' && ch != '\r'; ch = read())
725: _cb.append((char) ch);
726:
727: if (ch == '\r') {
728: ch = read();
729: if (ch != '\n')
730: _peek = ch;
731: }
732:
733: ch = read();
734: } while (ch == '#');
735:
736: _peek = ch;
737: return COMMENT;
738:
739: default:
740: if (XmlChar.isNameStart(ch)) {
741: for (; XmlChar.isNameChar(ch); ch = read()) {
742: _cb.append((char) ch);
743: }
744: _peek = ch;
745:
746: int token = _tokenMap.get(_cb);
747:
748: if (token > 0) {
749: _lexeme = null;
750: return token;
751: } else {
752: _lexeme = _cb.toString().intern();
753: return IDENTIFIER;
754: }
755: } else {
756: throw error(L.l("Unknown character '{0}'", String
757: .valueOf((char) ch)));
758: }
759: }
760: }
761:
762: private String parseLiteral() throws IOException, SAXException,
763: RelaxException {
764: int end = skipWhitespace();
765:
766: if (end != '"' && end != '\'')
767: throw error(L.l("expected '\"' at '{0}'", String
768: .valueOf((char) end)));
769:
770: _cb.clear();
771: int ch = read();
772: for (; ch >= 0 && ch != end; ch = read()) {
773: _cb.append((char) ch);
774: }
775:
776: if (ch != end)
777: throw error(L.l("expected '\"' at '{0}'", String
778: .valueOf((char) ch)));
779:
780: return _cb.toString();
781: }
782:
783: private String parseIdentifier() throws IOException, SAXException,
784: RelaxException {
785: int ch = skipWhitespace();
786:
787: if (!XmlChar.isNameChar(ch))
788: throw error(L.l("expected identifier character at '{0}'",
789: String.valueOf((char) ch)));
790:
791: _cb.clear();
792: for (; XmlChar.isNameChar(ch); ch = read()) {
793: _cb.append((char) ch);
794: }
795:
796: return _cb.toString();
797: }
798:
799: /**
800: * Parses whitespace.
801: */
802: private int skipWhitespace() throws IOException, SAXException {
803: int ch;
804:
805: for (ch = read(); XmlChar.isWhitespace(ch); ch = read()) {
806: }
807:
808: return ch;
809: }
810:
811: /**
812: * Creates an error.
813: */
814: private SAXException error(String msg) {
815: return new SAXException(_filename + ":" + _line + ": " + msg);
816: }
817:
818: /**
819: * Returns the current location string.
820: */
821: /*
822: public String getLocation()
823: {
824: return _filename + ":" + _line;
825: }
826: */
827:
828: /**
829: * Reads a character.
830: */
831: private int read() throws IOException {
832: int ch = _peek;
833:
834: if (ch >= 0) {
835: _peek = -1;
836: return ch;
837: }
838:
839: ch = _is.read();
840:
841: if (ch == '\n')
842: _line++;
843: else if (ch == '\r') {
844: _line++;
845: ch = _is.read();
846:
847: if (ch != '\n') {
848: _peek = ch;
849: ch = '\n';
850: }
851: }
852:
853: return ch;
854: }
855:
856: static {
857: _tokenMap.put(new CharBuffer("namespace"), NAMESPACE);
858: _tokenMap.put(new CharBuffer("default"), DEFAULT);
859:
860: _tokenMap.put(new CharBuffer("start"), START);
861: _tokenMap.put(new CharBuffer("div"), DIV);
862:
863: _tokenMap.put(new CharBuffer("element"), ELEMENT);
864: _tokenMap.put(new CharBuffer("attribute"), ATTRIBUTE);
865:
866: _tokenMap.put(new CharBuffer("text"), TEXT);
867: _tokenMap.put(new CharBuffer("string"), STRING);
868: _tokenMap.put(new CharBuffer("token"), TOKEN);
869:
870: _tokenMap.put(new CharBuffer("empty"), EMPTY);
871:
872: _tokenMap.put(new CharBuffer("include"), INCLUDE);
873: }
874: }
|