001: /*
002: * Copyright 1999,2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.jasper.compiler;
018:
019: import java.io.FileNotFoundException;
020: import java.io.IOException;
021: import java.io.InputStream;
022: import java.io.InputStreamReader;
023: import java.net.JarURLConnection;
024: import java.net.URL;
025: import java.util.Stack;
026: import java.util.jar.JarFile;
027:
028: import org.apache.jasper.JasperException;
029: import org.apache.jasper.JspCompilationContext;
030: import org.apache.jasper.xmlparser.XMLEncodingDetector;
031: import org.xml.sax.Attributes;
032:
033: /**
034: * Controller for the parsing of a JSP page.
035: * <p>
036: * The same ParserController instance is used for a JSP page and any JSP
037: * segments included by it (via an include directive), where each segment may
038: * be provided in standard or XML syntax. This class selects and invokes the
039: * appropriate parser for the JSP page and its included segments.
040: *
041: * @author Pierre Delisle
042: * @author Jan Luehe
043: */
044: class ParserController implements TagConstants {
045:
046: private static final String CHARSET = "charset=";
047:
048: private JspCompilationContext ctxt;
049: private Compiler compiler;
050: private ErrorDispatcher err;
051:
052: /*
053: * Indicates the syntax (XML or standard) of the file being processed
054: */
055: private boolean isXml;
056:
057: /*
058: * A stack to keep track of the 'current base directory'
059: * for include directives that refer to relative paths.
060: */
061: private Stack baseDirStack = new Stack();
062:
063: private boolean isEncodingSpecifiedInProlog;
064:
065: private String sourceEnc;
066:
067: private boolean isDefaultPageEncoding;
068: private boolean isTagFile;
069: private boolean directiveOnly;
070:
071: /*
072: * Constructor
073: */
074: public ParserController(JspCompilationContext ctxt,
075: Compiler compiler) {
076: this .ctxt = ctxt;
077: this .compiler = compiler;
078: this .err = compiler.getErrorDispatcher();
079: }
080:
081: public JspCompilationContext getJspCompilationContext() {
082: return ctxt;
083: }
084:
085: public Compiler getCompiler() {
086: return compiler;
087: }
088:
089: /**
090: * Parses a JSP page or tag file. This is invoked by the compiler.
091: *
092: * @param inFileName The path to the JSP page or tag file to be parsed.
093: */
094: public Node.Nodes parse(String inFileName)
095: throws FileNotFoundException, JasperException, IOException {
096: // If we're parsing a packaged tag file or a resource included by it
097: // (using an include directive), ctxt.getTagFileJar() returns the
098: // JAR file from which to read the tag file or included resource,
099: // respectively.
100: isTagFile = ctxt.isTagFile();
101: directiveOnly = false;
102: return doParse(inFileName, null, ctxt.getTagFileJarUrl());
103: }
104:
105: /**
106: * Processes an include directive with the given path.
107: *
108: * @param inFileName The path to the resource to be included.
109: * @param parent The parent node of the include directive.
110: * @param jarFile The JAR file from which to read the included resource,
111: * or null of the included resource is to be read from the filesystem
112: */
113: public Node.Nodes parse(String inFileName, Node parent,
114: URL jarFileUrl) throws FileNotFoundException,
115: JasperException, IOException {
116: // For files that are statically included, isTagfile and directiveOnly
117: // remain unchanged.
118: return doParse(inFileName, parent, jarFileUrl);
119: }
120:
121: /**
122: * Extracts tag file directive information from the tag file with the
123: * given name.
124: *
125: * This is invoked by the compiler
126: *
127: * @param inFileName The name of the tag file to be parsed.
128: */
129: public Node.Nodes parseTagFileDirectives(String inFileName)
130: throws FileNotFoundException, JasperException, IOException {
131: boolean isTagFileSave = isTagFile;
132: boolean directiveOnlySave = directiveOnly;
133: isTagFile = true;
134: directiveOnly = true;
135: Node.Nodes page = doParse(inFileName, null, (URL) ctxt
136: .getTagFileJarUrls().get(inFileName));
137: directiveOnly = directiveOnlySave;
138: isTagFile = isTagFileSave;
139: return page;
140: }
141:
142: /**
143: * Parses the JSP page or tag file with the given path name.
144: *
145: * @param inFileName The name of the JSP page or tag file to be parsed.
146: * @param parent The parent node (non-null when processing an include
147: * directive)
148: * @param isTagFile true if file to be parsed is tag file, and false if it
149: * is a regular JSP page
150: * @param directivesOnly true if the file to be parsed is a tag file and
151: * we are only interested in the directives needed for constructing a
152: * TagFileInfo.
153: * @param jarFile The JAR file from which to read the JSP page or tag file,
154: * or null if the JSP page or tag file is to be read from the filesystem
155: */
156: private Node.Nodes doParse(String inFileName, Node parent,
157: URL jarFileUrl) throws FileNotFoundException,
158: JasperException, IOException {
159:
160: Node.Nodes parsedPage = null;
161: isEncodingSpecifiedInProlog = false;
162: isDefaultPageEncoding = false;
163:
164: JarFile jarFile = getJarFile(jarFileUrl);
165: String absFileName = resolveFileName(inFileName);
166: String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
167:
168: // Figure out what type of JSP document and encoding type we are
169: // dealing with
170: determineSyntaxAndEncoding(absFileName, jarFile,
171: jspConfigPageEnc);
172:
173: if (parent != null) {
174: // Included resource, add to dependent list
175: compiler.getPageInfo().addDependant(absFileName);
176: }
177:
178: if (isXml && isEncodingSpecifiedInProlog) {
179: /*
180: * Make sure the encoding explicitly specified in the XML
181: * prolog (if any) matches that in the JSP config element
182: * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
183: * identical.
184: */
185: if (jspConfigPageEnc != null
186: && !jspConfigPageEnc.equals(sourceEnc)
187: && (!jspConfigPageEnc.startsWith("UTF-16") || !sourceEnc
188: .startsWith("UTF-16"))) {
189: err.jspError(
190: "jsp.error.prolog_config_encoding_mismatch",
191: sourceEnc, jspConfigPageEnc);
192: }
193: }
194:
195: // Dispatch to the appropriate parser
196: if (isXml) {
197: // JSP document (XML syntax)
198: InputStream inStream = null;
199: try {
200: parsedPage = JspDocumentParser.parse(this , absFileName,
201: jarFile, parent, isTagFile, directiveOnly,
202: sourceEnc, jspConfigPageEnc,
203: isEncodingSpecifiedInProlog);
204: } finally {
205: if (inStream != null) {
206: try {
207: inStream.close();
208: } catch (Exception any) {
209: }
210: }
211: }
212: } else {
213: // Standard syntax
214: InputStreamReader inStreamReader = null;
215: try {
216: inStreamReader = JspUtil.getReader(absFileName,
217: sourceEnc, jarFile, ctxt, err);
218: JspReader jspReader = new JspReader(ctxt, absFileName,
219: sourceEnc, inStreamReader, err);
220: parsedPage = Parser.parse(this , jspReader, parent,
221: isTagFile, directiveOnly, jarFileUrl,
222: sourceEnc, jspConfigPageEnc,
223: isDefaultPageEncoding);
224: } finally {
225: if (inStreamReader != null) {
226: try {
227: inStreamReader.close();
228: } catch (Exception any) {
229: }
230: }
231: }
232: }
233:
234: if (jarFile != null) {
235: try {
236: jarFile.close();
237: } catch (Throwable t) {
238: }
239: }
240:
241: baseDirStack.pop();
242:
243: return parsedPage;
244: }
245:
246: /*
247: * Checks to see if the given URI is matched by a URL pattern specified in
248: * a jsp-property-group in web.xml, and if so, returns the value of the
249: * <page-encoding> element.
250: *
251: * @param absFileName The URI to match
252: *
253: * @return The value of the <page-encoding> attribute of the
254: * jsp-property-group with matching URL pattern
255: */
256: private String getJspConfigPageEncoding(String absFileName)
257: throws JasperException {
258:
259: JspConfig jspConfig = ctxt.getOptions().getJspConfig();
260: JspConfig.JspProperty jspProperty = jspConfig
261: .findJspProperty(absFileName);
262: return jspProperty.getPageEncoding();
263: }
264:
265: /**
266: * Determines the syntax (standard or XML) and page encoding properties
267: * for the given file, and stores them in the 'isXml' and 'sourceEnc'
268: * instance variables, respectively.
269: */
270: private void determineSyntaxAndEncoding(String absFileName,
271: JarFile jarFile, String jspConfigPageEnc)
272: throws JasperException, IOException {
273:
274: isXml = false;
275:
276: /*
277: * 'true' if the syntax (XML or standard) of the file is given
278: * from external information: either via a JSP configuration element,
279: * the ".jspx" suffix, or the enclosing file (for included resources)
280: */
281: boolean isExternal = false;
282:
283: /*
284: * Indicates whether we need to revert from temporary usage of
285: * "ISO-8859-1" back to "UTF-8"
286: */
287: boolean revert = false;
288:
289: JspConfig jspConfig = ctxt.getOptions().getJspConfig();
290: JspConfig.JspProperty jspProperty = jspConfig
291: .findJspProperty(absFileName);
292: if (jspProperty.isXml() != null) {
293: // If <is-xml> is specified in a <jsp-property-group>, it is used.
294: isXml = JspUtil.booleanValue(jspProperty.isXml());
295: isExternal = true;
296: } else if (absFileName.endsWith(".jspx")
297: || absFileName.endsWith(".tagx")) {
298: isXml = true;
299: isExternal = true;
300: }
301:
302: if (isExternal && !isXml) {
303: // JSP (standard) syntax. Use encoding specified in jsp-config
304: // if provided.
305: sourceEnc = jspConfigPageEnc;
306: if (sourceEnc != null) {
307: return;
308: }
309: // We don't know the encoding
310: sourceEnc = "ISO-8859-1";
311: } else {
312: // XML syntax or unknown, (auto)detect encoding ...
313: Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
314: jarFile, ctxt, err);
315: sourceEnc = (String) ret[0];
316: if (((Boolean) ret[1]).booleanValue()) {
317: isEncodingSpecifiedInProlog = true;
318: }
319:
320: if (!isXml && sourceEnc.equals("UTF-8")) {
321: /*
322: * We don't know if we're dealing with XML or standard syntax.
323: * Therefore, we need to check to see if the page contains
324: * a <jsp:root> element.
325: *
326: * We need to be careful, because the page may be encoded in
327: * ISO-8859-1 (or something entirely different), and may
328: * contain byte sequences that will cause a UTF-8 converter to
329: * throw exceptions.
330: *
331: * It is safe to use a source encoding of ISO-8859-1 in this
332: * case, as there are no invalid byte sequences in ISO-8859-1,
333: * and the byte/character sequences we're looking for (i.e.,
334: * <jsp:root>) are identical in either encoding (both UTF-8
335: * and ISO-8859-1 are extensions of ASCII).
336: */
337: sourceEnc = "ISO-8859-1";
338: revert = true;
339: }
340: }
341:
342: if (isXml) {
343: // (This implies 'isExternal' is TRUE.)
344: // We know we're dealing with a JSP document (via JSP config or
345: // ".jspx" suffix), so we're done.
346: return;
347: }
348:
349: /*
350: * At this point, 'isExternal' or 'isXml' is FALSE.
351: * Search for jsp:root action, in order to determine if we're dealing
352: * with XML or standard syntax (unless we already know what we're
353: * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
354: * No check for XML prolog, since nothing prevents a page from
355: * outputting XML and still using JSP syntax (in this case, the
356: * XML prolog is treated as template text).
357: */
358: JspReader jspReader = null;
359: try {
360: jspReader = new JspReader(ctxt, absFileName, sourceEnc,
361: jarFile, err);
362: } catch (FileNotFoundException ex) {
363: throw new JasperException(ex);
364: }
365: jspReader.setSingleFile(true);
366: Mark startMark = jspReader.mark();
367: if (!isExternal) {
368: jspReader.reset(startMark);
369: if (hasJspRoot(jspReader)) {
370: isXml = true;
371: if (revert)
372: sourceEnc = "UTF-8";
373: return;
374: } else {
375: isXml = false;
376: }
377: }
378:
379: /*
380: * At this point, we know we're dealing with JSP syntax.
381: * If an XML prolog is provided, it's treated as template text.
382: * Determine the page encoding from the page directive, unless it's
383: * specified via JSP config.
384: */
385: sourceEnc = jspConfigPageEnc;
386: if (sourceEnc == null) {
387: sourceEnc = getPageEncodingForJspSyntax(jspReader,
388: startMark);
389: if (sourceEnc == null) {
390: // Default to "ISO-8859-1" per JSP spec
391: sourceEnc = "ISO-8859-1";
392: isDefaultPageEncoding = true;
393: }
394: }
395: }
396:
397: /*
398: * Determines page source encoding for page or tag file in JSP syntax,
399: * by reading (in this order) the value of the 'pageEncoding' page
400: * directive attribute, or the charset value of the 'contentType' page
401: * directive attribute.
402: *
403: * @return The page encoding, or null if not found
404: */
405: private String getPageEncodingForJspSyntax(JspReader jspReader,
406: Mark startMark) throws JasperException {
407:
408: String encoding = null;
409: String saveEncoding = null;
410:
411: jspReader.reset(startMark);
412:
413: /*
414: * Determine page encoding from directive of the form <%@ page %> or
415: * <%@ tag %>
416: */
417: while (true) {
418: Mark current = jspReader.mark();
419:
420: Mark beginDirective = jspReader.skipUntil("<%@");
421: if (beginDirective == null) {
422: break;
423: }
424: // Move past the '<%@' delimiter
425: Mark beginDirectiveBody = jspReader.mark();
426:
427: // Check to see if directive is nested inside comment
428: jspReader.reset(current);
429: Mark beginComment = jspReader.skipUntil("<%--");
430: if (beginComment != null) {
431: Mark endComment = jspReader.skipUntil("--%>");
432: if (endComment == null) {
433: err.jspError(beginComment,
434: "jsp.error.unterminated", "<%--");
435: }
436:
437: if (beginDirective.isGreater(beginComment)
438: && endComment.isGreater(beginDirective)) {
439: // Directive is nested inside comment, skip until end of
440: // comment
441: jspReader.reset(endComment);
442: continue;
443: }
444: }
445:
446: jspReader.reset(beginDirectiveBody);
447: jspReader.skipSpaces();
448:
449: // compare for "tag ", so we don't match "taglib"
450: if (jspReader.matches("tag ") || jspReader.matches("page")) {
451:
452: jspReader.skipSpaces();
453: Attributes attrs = Parser.parseAttributes(this ,
454: jspReader);
455: encoding = getPageEncodingFromDirective(attrs,
456: "pageEncoding");
457: if (encoding != null) {
458: break;
459: }
460: encoding = getPageEncodingFromDirective(attrs,
461: "contentType");
462: if (encoding != null) {
463: saveEncoding = encoding;
464: }
465: }
466: }
467:
468: if (encoding == null) {
469: encoding = saveEncoding;
470: }
471:
472: if (encoding == null) {
473: /*
474: * Determine page encoding from page directive of the form
475: * <jsp:directive.page>
476: */
477: jspReader.reset(startMark);
478: while (jspReader.skipUntil("<jsp:directive.page") != null) {
479: jspReader.skipSpaces();
480: Attributes attrs = Parser.parseAttributes(this ,
481: jspReader);
482:
483: encoding = getPageEncodingFromDirective(attrs,
484: "pageEncoding");
485: if (encoding != null) {
486: break;
487: }
488: encoding = getPageEncodingFromDirective(attrs,
489: "contentType");
490: if (encoding != null) {
491: saveEncoding = encoding;
492: }
493: }
494: if (encoding == null) {
495: encoding = saveEncoding;
496: }
497: }
498:
499: return encoding;
500: }
501:
502: /*
503: * Scans the given attributes for the attribute with the given name,
504: * which is either 'pageEncoding' or 'contentType', and returns the
505: * specified page encoding.
506: *
507: * In the case of 'contentType', the page encoding is taken from the
508: * content type's 'charset' component.
509: *
510: * @param attrs The page directive attributes
511: * @param attrName The name of the attribute to search for (either
512: * 'pageEncoding' or 'contentType')
513: *
514: * @return The page encoding, or null
515: */
516: private String getPageEncodingFromDirective(Attributes attrs,
517: String attrName) {
518: String value = attrs.getValue(attrName);
519: if (attrName.equals("pageEncoding")) {
520: return value;
521: }
522:
523: // attrName = contentType
524: String contentType = value;
525: String encoding = null;
526: if (contentType != null) {
527: int loc = contentType.indexOf(CHARSET);
528: if (loc != -1) {
529: encoding = contentType
530: .substring(loc + CHARSET.length());
531: }
532: }
533:
534: return encoding;
535: }
536:
537: /*
538: * Resolve the name of the file and update baseDirStack() to keep track of
539: * the current base directory for each included file.
540: * The 'root' file is always an 'absolute' path, so no need to put an
541: * initial value in the baseDirStack.
542: */
543: private String resolveFileName(String inFileName) {
544: String fileName = inFileName.replace('\\', '/');
545: boolean isAbsolute = fileName.startsWith("/");
546: fileName = isAbsolute ? fileName : (String) baseDirStack.peek()
547: + fileName;
548: String baseDir = fileName.substring(0, fileName
549: .lastIndexOf("/") + 1);
550: baseDirStack.push(baseDir);
551: return fileName;
552: }
553:
554: /*
555: * Checks to see if the given page contains, as its first element, a <root>
556: * element whose prefix is bound to the JSP namespace, as in:
557: *
558: * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
559: * ...
560: * </wombat:root>
561: *
562: * @param reader The reader for this page
563: *
564: * @return true if this page contains a root element whose prefix is bound
565: * to the JSP namespace, and false otherwise
566: */
567: private boolean hasJspRoot(JspReader reader) throws JasperException {
568:
569: // <prefix>:root must be the first element
570: Mark start = null;
571: while ((start = reader.skipUntil("<")) != null) {
572: int c = reader.nextChar();
573: if (c != '!' && c != '?')
574: break;
575: }
576: if (start == null) {
577: return false;
578: }
579: Mark stop = reader.skipUntil(":root");
580: if (stop == null) {
581: return false;
582: }
583: // call substring to get rid of leading '<'
584: String prefix = reader.getText(start, stop).substring(1);
585:
586: start = stop;
587: stop = reader.skipUntil(">");
588: if (stop == null) {
589: return false;
590: }
591:
592: // Determine namespace associated with <root> element's prefix
593: String root = reader.getText(start, stop);
594: String xmlnsDecl = "xmlns:" + prefix;
595: int index = root.indexOf(xmlnsDecl);
596: if (index == -1) {
597: return false;
598: }
599: index += xmlnsDecl.length();
600: while (index < root.length()
601: && Character.isWhitespace(root.charAt(index))) {
602: index++;
603: }
604: if (index < root.length() && root.charAt(index) == '=') {
605: index++;
606: while (index < root.length()
607: && Character.isWhitespace(root.charAt(index))) {
608: index++;
609: }
610: if (index < root.length()
611: && root.charAt(index++) == '"'
612: && root.regionMatches(index, JSP_URI, 0, JSP_URI
613: .length())) {
614: return true;
615: }
616: }
617:
618: return false;
619: }
620:
621: private JarFile getJarFile(URL jarFileUrl) throws IOException {
622: JarFile jarFile = null;
623:
624: if (jarFileUrl != null) {
625: JarURLConnection conn = (JarURLConnection) jarFileUrl
626: .openConnection();
627: conn.setUseCaches(false);
628: conn.connect();
629: jarFile = conn.getJarFile();
630: }
631:
632: return jarFile;
633: }
634:
635: }
|