001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.xml;
030:
031: import com.caucho.util.CharBuffer;
032: import com.caucho.util.L10N;
033: import com.caucho.vfs.ReadStream;
034:
035: import java.util.HashMap;
036:
037: /**
038: * The Policy class defines the parsing policy. It configures the parser
039: * between HTML, XML, and loose versions of HTML and XML.
040: */
041: class Policy {
042: static L10N L = new L10N(Policy.class);
043:
044: final static int ERROR = 0;
045: final static int IGNORE = ERROR + 1;
046: final static int PUSH = IGNORE + 1;
047: final static int PUSH_EMPTY = PUSH + 1;
048: final static int PUSH_OPT = PUSH_EMPTY + 1;
049: final static int PUSH_VERBATIM = PUSH_OPT + 1;
050: final static int POP = PUSH_VERBATIM + 1;
051: final static int POP_AND_LOOP = POP + 1;
052:
053: private NamespaceMap namespaces;
054: private HashMap nameCache = new HashMap();
055: private HashMap _attrCache = new HashMap();
056: protected QName opt;
057: protected ReadStream is;
058:
059: boolean expandReferences = true;
060: boolean optionalTags = true;
061: boolean skipWhitespace;
062: boolean skipComments;
063: boolean strictComments;
064: boolean strictAttributes;
065: boolean entitiesAsText = false;
066: boolean expandEntities = true;
067: boolean strictCharacters;
068: boolean strictXml;
069: boolean singleTopElement;
070: boolean normalizeWhitespace = false;
071: boolean forgiving;
072: boolean _isNamespaceAware = false;
073:
074: /**
075: * Initialize the policy.
076: */
077: void init() {
078: namespaces = null;
079: nameCache.clear();
080: _attrCache.clear();
081: opt = null;
082: is = null;
083:
084: expandReferences = true;
085: optionalTags = true;
086: skipWhitespace = false;
087: skipComments = false;
088: strictComments = false;
089: strictAttributes = false;
090: entitiesAsText = false;
091: expandEntities = true;
092: strictCharacters = false;
093: strictXml = false;
094: singleTopElement = false;
095: normalizeWhitespace = false;
096: forgiving = false;
097: _isNamespaceAware = false;
098: }
099:
100: /**
101: * Sets the current read stream.
102: */
103: void setStream(ReadStream is) {
104: this .is = is;
105: }
106:
107: QName getOpt() {
108: return opt;
109: }
110:
111: /**
112: * Sets the new namespace binding.
113: *
114: * @param ns the namespace
115: */
116: void setNamespace(NamespaceMap ns) {
117: if (namespaces != ns) {
118: nameCache.clear();
119: _attrCache.clear();
120: }
121:
122: namespaces = ns;
123: }
124:
125: /**
126: * Set true for namespace aware.
127: */
128: void setNamespaceAware(boolean isNamespaceAware) {
129: _isNamespaceAware = isNamespaceAware;
130: }
131:
132: /**
133: * Clears the namespace cache when the namespace changes.
134: */
135: void clearNamespaceCache() {
136: namespaces = null;
137: nameCache.clear();
138: _attrCache.clear();
139: }
140:
141: QName getAttributeName(CharBuffer eltName, CharBuffer source) {
142: return getAttributeName(eltName, source, false);
143: }
144:
145: /**
146: * Returns the qname for the named attribute.
147: *
148: * @param eltName the current node
149: * @param source the name of the attribute
150: *
151: * @param the QName including namespace for the attribute name.
152: */
153: QName getAttributeName(CharBuffer eltName, CharBuffer source,
154: boolean nsNull) {
155: QName qname = (QName) _attrCache.get(source);
156: if (qname != null)
157: return qname;
158:
159: int i = source.lastIndexOf(':');
160: String fullName = source.toString();
161: String prefix = null;
162: String localName = null;
163: String ns = null;
164: ;
165:
166: if (!_isNamespaceAware) {
167: } else if (i < 0) {
168: localName = fullName;
169: } else {
170: prefix = source.substring(0, i);
171:
172: ns = NamespaceMap.get(namespaces, prefix);
173:
174: if (ns != null) {
175: localName = source.substring(i + 1);
176: } else if ("xml".equals(prefix)) {
177: ns = XmlParser.XML;
178: localName = source.substring(i + 1);
179: } else {
180: prefix = null;
181: localName = source.toString();
182: }
183: }
184:
185: qname = new QName(fullName, prefix, localName, ns);
186:
187: _attrCache.put(source.clone(), qname);
188:
189: return qname;
190: }
191:
192: /**
193: * Returns the fully qualified name, including namespaces, for the
194: * new qname.
195: *
196: * @param node the current parent node
197: * @param source the qname string needing resolving.
198: *
199: * @return the QName including namespace for the source.
200: */
201: QName getName(CharBuffer source) {
202: QName qname = (QName) nameCache.get(source);
203: if (qname != null)
204: return qname;
205:
206: int i = source.lastIndexOf(':');
207: String fullName = source.toString();
208: String prefix = null;
209: String localName = null;
210: String ns = null;
211: ;
212:
213: if (!_isNamespaceAware) {
214: } else if (i < 0) {
215: ns = NamespaceMap.get(namespaces, "");
216: localName = source.toString();
217: } else {
218: prefix = source.substring(0, i);
219:
220: ns = NamespaceMap.get(namespaces, prefix);
221:
222: if (ns != null) {
223: localName = source.substring(i + 1);
224: } else {
225: prefix = null;
226: localName = source.toString();
227: }
228: }
229:
230: qname = new QName(fullName, prefix, localName, ns);
231:
232: nameCache.put(source.clone(), qname);
233:
234: return qname;
235: }
236:
237: /**
238: * Returns the fully qualified name, including namespaces, for the
239: * new qname.
240: *
241: * @param source the qname string needing resolving.
242: *
243: * @return the QName including namespace for the source.
244: */
245: QName getNamespaceName(CharBuffer source) {
246: QName qname = (QName) nameCache.get(source);
247: if (qname != null)
248: return qname;
249:
250: int i = source.lastIndexOf(':');
251: String prefix;
252: String localName;
253:
254: // xml/01ek
255: if (true) {
256: prefix = null;
257: localName = source.toString();
258: } else if (i < 0) {
259: prefix = null;
260: localName = source.toString();
261: } else {
262: prefix = source.substring(0, i);
263: localName = source.substring(i + 1);
264: }
265:
266: // xml/01ek vs xml/01eg
267: qname = new QName(prefix, localName, null); // XmlParser.XMLNS
268:
269: nameCache.put(source.clone(), qname);
270:
271: return qname;
272: }
273:
274: /**
275: * Returns true if the string contains only whitespace.
276: *
277: * @param s string to test
278: * @return true if the string is completely whitespace
279: */
280: boolean isWhitespaceOnly(String s) {
281: for (int i = s.length() - 1; i >= 0; i--)
282: if (!XmlChar.isWhitespace(s.charAt(i)))
283: return false;
284:
285: return true;
286: }
287:
288: /**
289: * Returns the action to be performed with the next node on an open
290: * tag. In general, for XML, the next node is just pushed into the tree.
291: *
292: * @param parser the current XML parser
293: * @param node the current node
294: * @param next the node that needs an action
295: *
296: * @return the action code for the next node
297: */
298: int openAction(XmlParser parser, QName node, QName next)
299: throws XmlParseException {
300: String nodeName = node.getName();
301: /*
302: if (nodeName.equals("#document")) {
303: QDocument document = (QDocument) node;
304:
305: switch (next.getNodeType()) {
306: case Node.TEXT_NODE:
307: if (isWhitespaceOnly(next.getNodeValue()))
308: return PUSH; // XXX: ignore
309: break;
310:
311: case Node.COMMENT_NODE:
312: case Node.PROCESSING_INSTRUCTION_NODE:
313: return PUSH;
314: }
315:
316: if (document.getDocumentElement() == null &&
317: next.getNodeType() == Node.ELEMENT_NODE) {
318: document.setDocumentElement((Element) next);
319: return PUSH;
320: }
321:
322: Element elt = document.getDocumentElement();
323: return PUSH;
324: } else
325: return PUSH;
326: */
327: return PUSH;
328: }
329:
330: /**
331: * Returns the action to be performed with the next node on a close
332: * tag. In general, for XML, the current node is changed to its parent
333: *
334: * @param parser the current XML parser
335: * @param node the current node
336: * @param tagEnd the name of the close tag
337: *
338: * @return the action code for the next node
339: */
340: int elementCloseAction(XmlParser parser, QName node, String tagEnd)
341: throws XmlParseException {
342: String nodeName = node.getName();
343:
344: if (nodeName.equals("#document") && tagEnd.equals(""))
345: return POP;
346: else if (nodeName.equals(tagEnd))
347: return POP;
348: else {
349: String expect = nodeName;
350: if (expect.equals("#document"))
351: expect = L.l("end of document");
352: else
353: expect = "`</" + expect + ">'";
354: if (tagEnd.equals(""))
355: tagEnd = L.l("end of file");
356: else
357: tagEnd = "`</" + tagEnd + ">'";
358:
359: throw parser.error(L.l("expected {0} at {1}", expect,
360: tagEnd));
361: }
362: }
363: }
|