001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.jasper.compiler;
019:
020: import java.io.FileNotFoundException;
021: import java.io.IOException;
022: import java.io.InputStreamReader;
023: import java.net.JarURLConnection;
024: import java.net.URL;
025: import java.util.Stack;
026: import java.util.jar.JarFile;
027:
028: import org.apache.jasper.JasperException;
029: import org.apache.jasper.JspCompilationContext;
030: import org.apache.jasper.xmlparser.XMLEncodingDetector;
031: import org.xml.sax.Attributes;
032:
033: /**
034: * Controller for the parsing of a JSP page.
035: * <p>
036: * The same ParserController instance is used for a JSP page and any JSP
037: * segments included by it (via an include directive), where each segment may
038: * be provided in standard or XML syntax. This class selects and invokes the
039: * appropriate parser for the JSP page and its included segments.
040: *
041: * @author Pierre Delisle
042: * @author Jan Luehe
043: */
044: class ParserController implements TagConstants {
045:
046: private static final String CHARSET = "charset=";
047:
048: private JspCompilationContext ctxt;
049: private Compiler compiler;
050: private ErrorDispatcher err;
051:
052: /*
053: * Indicates the syntax (XML or standard) of the file being processed
054: */
055: private boolean isXml;
056:
057: /*
058: * A stack to keep track of the 'current base directory'
059: * for include directives that refer to relative paths.
060: */
061: private Stack baseDirStack = new Stack();
062:
063: private boolean isEncodingSpecifiedInProlog;
064: private boolean isBomPresent;
065: private int skip;
066:
067: private String sourceEnc;
068:
069: private boolean isDefaultPageEncoding;
070: private boolean isTagFile;
071: private boolean directiveOnly;
072:
073: /*
074: * Constructor
075: */
076: public ParserController(JspCompilationContext ctxt,
077: Compiler compiler) {
078: this .ctxt = ctxt;
079: this .compiler = compiler;
080: this .err = compiler.getErrorDispatcher();
081: }
082:
083: public JspCompilationContext getJspCompilationContext() {
084: return ctxt;
085: }
086:
087: public Compiler getCompiler() {
088: return compiler;
089: }
090:
091: /**
092: * Parses a JSP page or tag file. This is invoked by the compiler.
093: *
094: * @param inFileName The path to the JSP page or tag file to be parsed.
095: */
096: public Node.Nodes parse(String inFileName)
097: throws FileNotFoundException, JasperException, IOException {
098: // If we're parsing a packaged tag file or a resource included by it
099: // (using an include directive), ctxt.getTagFileJar() returns the
100: // JAR file from which to read the tag file or included resource,
101: // respectively.
102: isTagFile = ctxt.isTagFile();
103: directiveOnly = false;
104: return doParse(inFileName, null, ctxt.getTagFileJarUrl());
105: }
106:
107: /**
108: * Processes an include directive with the given path.
109: *
110: * @param inFileName The path to the resource to be included.
111: * @param parent The parent node of the include directive.
112: * @param jarFile The JAR file from which to read the included resource,
113: * or null of the included resource is to be read from the filesystem
114: */
115: public Node.Nodes parse(String inFileName, Node parent,
116: URL jarFileUrl) throws FileNotFoundException,
117: JasperException, IOException {
118: // For files that are statically included, isTagfile and directiveOnly
119: // remain unchanged.
120: return doParse(inFileName, parent, jarFileUrl);
121: }
122:
123: /**
124: * Extracts tag file directive information from the tag file with the
125: * given name.
126: *
127: * This is invoked by the compiler
128: *
129: * @param inFileName The name of the tag file to be parsed.
130: */
131: public Node.Nodes parseTagFileDirectives(String inFileName)
132: throws FileNotFoundException, JasperException, IOException {
133: boolean isTagFileSave = isTagFile;
134: boolean directiveOnlySave = directiveOnly;
135: isTagFile = true;
136: directiveOnly = true;
137: Node.Nodes page = doParse(inFileName, null, ctxt
138: .getTagFileJarUrl(inFileName));
139: directiveOnly = directiveOnlySave;
140: isTagFile = isTagFileSave;
141: return page;
142: }
143:
144: /**
145: * Parses the JSP page or tag file with the given path name.
146: *
147: * @param inFileName The name of the JSP page or tag file to be parsed.
148: * @param parent The parent node (non-null when processing an include
149: * directive)
150: * @param isTagFile true if file to be parsed is tag file, and false if it
151: * is a regular JSP page
152: * @param directivesOnly true if the file to be parsed is a tag file and
153: * we are only interested in the directives needed for constructing a
154: * TagFileInfo.
155: * @param jarFile The JAR file from which to read the JSP page or tag file,
156: * or null if the JSP page or tag file is to be read from the filesystem
157: */
158: private Node.Nodes doParse(String inFileName, Node parent,
159: URL jarFileUrl) throws FileNotFoundException,
160: JasperException, IOException {
161:
162: Node.Nodes parsedPage = null;
163: isEncodingSpecifiedInProlog = false;
164: isBomPresent = false;
165: isDefaultPageEncoding = false;
166:
167: JarFile jarFile = getJarFile(jarFileUrl);
168: String absFileName = resolveFileName(inFileName);
169: String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
170:
171: // Figure out what type of JSP document and encoding type we are
172: // dealing with
173: determineSyntaxAndEncoding(absFileName, jarFile,
174: jspConfigPageEnc);
175:
176: if (parent != null) {
177: // Included resource, add to dependent list
178: compiler.getPageInfo().addDependant(absFileName);
179: }
180:
181: if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
182: /*
183: * Make sure the encoding explicitly specified in the XML
184: * prolog (if any) matches that in the JSP config element
185: * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
186: * identical.
187: */
188: if (jspConfigPageEnc != null
189: && !jspConfigPageEnc.equals(sourceEnc)
190: && (!jspConfigPageEnc.startsWith("UTF-16") || !sourceEnc
191: .startsWith("UTF-16"))) {
192: err.jspError(
193: "jsp.error.prolog_config_encoding_mismatch",
194: sourceEnc, jspConfigPageEnc);
195: }
196: }
197:
198: // Dispatch to the appropriate parser
199: if (isXml) {
200: // JSP document (XML syntax)
201: // InputStream for jspx page is created and properly closed in
202: // JspDocumentParser.
203: parsedPage = JspDocumentParser.parse(this , absFileName,
204: jarFile, parent, isTagFile, directiveOnly,
205: sourceEnc, jspConfigPageEnc,
206: isEncodingSpecifiedInProlog, isBomPresent);
207: } else {
208: // Standard syntax
209: InputStreamReader inStreamReader = null;
210: try {
211: inStreamReader = JspUtil.getReader(absFileName,
212: sourceEnc, jarFile, ctxt, err, skip);
213: JspReader jspReader = new JspReader(ctxt, absFileName,
214: sourceEnc, inStreamReader, err);
215: parsedPage = Parser.parse(this , jspReader, parent,
216: isTagFile, directiveOnly, jarFileUrl,
217: sourceEnc, jspConfigPageEnc,
218: isDefaultPageEncoding, isBomPresent);
219: } finally {
220: if (inStreamReader != null) {
221: try {
222: inStreamReader.close();
223: } catch (Exception any) {
224: }
225: }
226: }
227: }
228:
229: if (jarFile != null) {
230: try {
231: jarFile.close();
232: } catch (Throwable t) {
233: }
234: }
235:
236: baseDirStack.pop();
237:
238: return parsedPage;
239: }
240:
241: /*
242: * Checks to see if the given URI is matched by a URL pattern specified in
243: * a jsp-property-group in web.xml, and if so, returns the value of the
244: * <page-encoding> element.
245: *
246: * @param absFileName The URI to match
247: *
248: * @return The value of the <page-encoding> attribute of the
249: * jsp-property-group with matching URL pattern
250: */
251: private String getJspConfigPageEncoding(String absFileName)
252: throws JasperException {
253:
254: JspConfig jspConfig = ctxt.getOptions().getJspConfig();
255: JspConfig.JspProperty jspProperty = jspConfig
256: .findJspProperty(absFileName);
257: return jspProperty.getPageEncoding();
258: }
259:
260: /**
261: * Determines the syntax (standard or XML) and page encoding properties
262: * for the given file, and stores them in the 'isXml' and 'sourceEnc'
263: * instance variables, respectively.
264: */
265: private void determineSyntaxAndEncoding(String absFileName,
266: JarFile jarFile, String jspConfigPageEnc)
267: throws JasperException, IOException {
268:
269: isXml = false;
270:
271: /*
272: * 'true' if the syntax (XML or standard) of the file is given
273: * from external information: either via a JSP configuration element,
274: * the ".jspx" suffix, or the enclosing file (for included resources)
275: */
276: boolean isExternal = false;
277:
278: /*
279: * Indicates whether we need to revert from temporary usage of
280: * "ISO-8859-1" back to "UTF-8"
281: */
282: boolean revert = false;
283:
284: JspConfig jspConfig = ctxt.getOptions().getJspConfig();
285: JspConfig.JspProperty jspProperty = jspConfig
286: .findJspProperty(absFileName);
287: if (jspProperty.isXml() != null) {
288: // If <is-xml> is specified in a <jsp-property-group>, it is used.
289: isXml = JspUtil.booleanValue(jspProperty.isXml());
290: isExternal = true;
291: } else if (absFileName.endsWith(".jspx")
292: || absFileName.endsWith(".tagx")) {
293: isXml = true;
294: isExternal = true;
295: }
296:
297: if (isExternal && !isXml) {
298: // JSP (standard) syntax. Use encoding specified in jsp-config
299: // if provided.
300: sourceEnc = jspConfigPageEnc;
301: if (sourceEnc != null) {
302: return;
303: }
304: // We don't know the encoding, so use BOM to determine it
305: sourceEnc = "ISO-8859-1";
306: } else {
307: // XML syntax or unknown, (auto)detect encoding ...
308: Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
309: jarFile, ctxt, err);
310: sourceEnc = (String) ret[0];
311: if (((Boolean) ret[1]).booleanValue()) {
312: isEncodingSpecifiedInProlog = true;
313: }
314: if (((Boolean) ret[2]).booleanValue()) {
315: isBomPresent = true;
316: }
317: skip = ((Integer) ret[3]).intValue();
318:
319: if (!isXml && sourceEnc.equals("UTF-8")) {
320: /*
321: * We don't know if we're dealing with XML or standard syntax.
322: * Therefore, we need to check to see if the page contains
323: * a <jsp:root> element.
324: *
325: * We need to be careful, because the page may be encoded in
326: * ISO-8859-1 (or something entirely different), and may
327: * contain byte sequences that will cause a UTF-8 converter to
328: * throw exceptions.
329: *
330: * It is safe to use a source encoding of ISO-8859-1 in this
331: * case, as there are no invalid byte sequences in ISO-8859-1,
332: * and the byte/character sequences we're looking for (i.e.,
333: * <jsp:root>) are identical in either encoding (both UTF-8
334: * and ISO-8859-1 are extensions of ASCII).
335: */
336: sourceEnc = "ISO-8859-1";
337: revert = true;
338: }
339: }
340:
341: if (isXml) {
342: // (This implies 'isExternal' is TRUE.)
343: // We know we're dealing with a JSP document (via JSP config or
344: // ".jspx" suffix), so we're done.
345: return;
346: }
347:
348: /*
349: * At this point, 'isExternal' or 'isXml' is FALSE.
350: * Search for jsp:root action, in order to determine if we're dealing
351: * with XML or standard syntax (unless we already know what we're
352: * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
353: * No check for XML prolog, since nothing prevents a page from
354: * outputting XML and still using JSP syntax (in this case, the
355: * XML prolog is treated as template text).
356: */
357: JspReader jspReader = null;
358: try {
359: jspReader = new JspReader(ctxt, absFileName, sourceEnc,
360: jarFile, err);
361: } catch (FileNotFoundException ex) {
362: throw new JasperException(ex);
363: }
364: jspReader.setSingleFile(true);
365: Mark startMark = jspReader.mark();
366: if (!isExternal) {
367: jspReader.reset(startMark);
368: if (hasJspRoot(jspReader)) {
369: if (revert) {
370: sourceEnc = "UTF-8";
371: }
372: isXml = true;
373: return;
374: } else {
375: if (revert && isBomPresent) {
376: sourceEnc = "UTF-8";
377: }
378: isXml = false;
379: }
380: }
381:
382: /*
383: * At this point, we know we're dealing with JSP syntax.
384: * If an XML prolog is provided, it's treated as template text.
385: * Determine the page encoding from the page directive, unless it's
386: * specified via JSP config.
387: */
388: if (!isBomPresent) {
389: sourceEnc = jspConfigPageEnc;
390: if (sourceEnc == null) {
391: sourceEnc = getPageEncodingForJspSyntax(jspReader,
392: startMark);
393: if (sourceEnc == null) {
394: // Default to "ISO-8859-1" per JSP spec
395: sourceEnc = "ISO-8859-1";
396: isDefaultPageEncoding = true;
397: }
398: }
399: }
400:
401: }
402:
403: /*
404: * Determines page source encoding for page or tag file in JSP syntax,
405: * by reading (in this order) the value of the 'pageEncoding' page
406: * directive attribute, or the charset value of the 'contentType' page
407: * directive attribute.
408: *
409: * @return The page encoding, or null if not found
410: */
411: private String getPageEncodingForJspSyntax(JspReader jspReader,
412: Mark startMark) throws JasperException {
413:
414: String encoding = null;
415: String saveEncoding = null;
416:
417: jspReader.reset(startMark);
418:
419: /*
420: * Determine page encoding from directive of the form <%@ page %>,
421: * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
422: */
423: while (true) {
424: if (jspReader.skipUntil("<") == null) {
425: break;
426: }
427: // If this is a comment, skip until its end
428: if (jspReader.matches("%--")) {
429: if (jspReader.skipUntil("--%>") == null) {
430: // error will be caught in Parser
431: break;
432: }
433: continue;
434: }
435: boolean isDirective = jspReader.matches("%@");
436: if (isDirective) {
437: jspReader.skipSpaces();
438: } else {
439: isDirective = jspReader.matches("jsp:directive.");
440: }
441: if (!isDirective) {
442: continue;
443: }
444:
445: // compare for "tag ", so we don't match "taglib"
446: if (jspReader.matches("tag ") || jspReader.matches("page")) {
447:
448: jspReader.skipSpaces();
449: Attributes attrs = Parser.parseAttributes(this ,
450: jspReader);
451: encoding = getPageEncodingFromDirective(attrs,
452: "pageEncoding");
453: if (encoding != null) {
454: break;
455: }
456: encoding = getPageEncodingFromDirective(attrs,
457: "contentType");
458: if (encoding != null) {
459: saveEncoding = encoding;
460: }
461: }
462: }
463:
464: if (encoding == null) {
465: encoding = saveEncoding;
466: }
467:
468: return encoding;
469: }
470:
471: /*
472: * Scans the given attributes for the attribute with the given name,
473: * which is either 'pageEncoding' or 'contentType', and returns the
474: * specified page encoding.
475: *
476: * In the case of 'contentType', the page encoding is taken from the
477: * content type's 'charset' component.
478: *
479: * @param attrs The page directive attributes
480: * @param attrName The name of the attribute to search for (either
481: * 'pageEncoding' or 'contentType')
482: *
483: * @return The page encoding, or null
484: */
485: private String getPageEncodingFromDirective(Attributes attrs,
486: String attrName) {
487: String value = attrs.getValue(attrName);
488: if (attrName.equals("pageEncoding")) {
489: return value;
490: }
491:
492: // attrName = contentType
493: String contentType = value;
494: String encoding = null;
495: if (contentType != null) {
496: int loc = contentType.indexOf(CHARSET);
497: if (loc != -1) {
498: encoding = contentType
499: .substring(loc + CHARSET.length());
500: }
501: }
502:
503: return encoding;
504: }
505:
506: /*
507: * Resolve the name of the file and update baseDirStack() to keep track of
508: * the current base directory for each included file.
509: * The 'root' file is always an 'absolute' path, so no need to put an
510: * initial value in the baseDirStack.
511: */
512: private String resolveFileName(String inFileName) {
513: String fileName = inFileName.replace('\\', '/');
514: boolean isAbsolute = fileName.startsWith("/");
515: fileName = isAbsolute ? fileName : (String) baseDirStack.peek()
516: + fileName;
517: String baseDir = fileName.substring(0, fileName
518: .lastIndexOf("/") + 1);
519: baseDirStack.push(baseDir);
520: return fileName;
521: }
522:
523: /*
524: * Checks to see if the given page contains, as its first element, a <root>
525: * element whose prefix is bound to the JSP namespace, as in:
526: *
527: * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
528: * ...
529: * </wombat:root>
530: *
531: * @param reader The reader for this page
532: *
533: * @return true if this page contains a root element whose prefix is bound
534: * to the JSP namespace, and false otherwise
535: */
536: private boolean hasJspRoot(JspReader reader) throws JasperException {
537:
538: // <prefix>:root must be the first element
539: Mark start = null;
540: while ((start = reader.skipUntil("<")) != null) {
541: int c = reader.nextChar();
542: if (c != '!' && c != '?')
543: break;
544: }
545: if (start == null) {
546: return false;
547: }
548: Mark stop = reader.skipUntil(":root");
549: if (stop == null) {
550: return false;
551: }
552: // call substring to get rid of leading '<'
553: String prefix = reader.getText(start, stop).substring(1);
554:
555: start = stop;
556: stop = reader.skipUntil(">");
557: if (stop == null) {
558: return false;
559: }
560:
561: // Determine namespace associated with <root> element's prefix
562: String root = reader.getText(start, stop);
563: String xmlnsDecl = "xmlns:" + prefix;
564: int index = root.indexOf(xmlnsDecl);
565: if (index == -1) {
566: return false;
567: }
568: index += xmlnsDecl.length();
569: while (index < root.length()
570: && Character.isWhitespace(root.charAt(index))) {
571: index++;
572: }
573: if (index < root.length() && root.charAt(index) == '=') {
574: index++;
575: while (index < root.length()
576: && Character.isWhitespace(root.charAt(index))) {
577: index++;
578: }
579: if (index < root.length()
580: && root.charAt(index++) == '"'
581: && root.regionMatches(index, JSP_URI, 0, JSP_URI
582: .length())) {
583: return true;
584: }
585: }
586:
587: return false;
588: }
589:
590: private JarFile getJarFile(URL jarFileUrl) throws IOException {
591: JarFile jarFile = null;
592:
593: if (jarFileUrl != null) {
594: JarURLConnection conn = (JarURLConnection) jarFileUrl
595: .openConnection();
596: conn.setUseCaches(false);
597: conn.connect();
598: jarFile = conn.getJarFile();
599: }
600:
601: return jarFile;
602: }
603:
604: }
|