001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2008 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.web.jspparser;
043:
044: import java.io.ByteArrayInputStream;
045: import java.io.IOException;
046: import java.io.InputStream;
047: import java.io.InputStreamReader;
048: import java.io.StringReader;
049: import java.util.logging.Level;
050: import java.util.logging.Logger;
051: import javax.xml.parsers.ParserConfigurationException;
052: import javax.xml.parsers.SAXParser;
053: import javax.xml.parsers.SAXParserFactory;
054: import org.netbeans.modules.web.api.webmodule.WebModule;
055: import org.netbeans.modules.web.jsps.parserapi.JspParserAPI;
056: import org.openide.filesystems.FileObject;
057: import org.xml.sax.Attributes;
058: import org.xml.sax.InputSource;
059: import org.xml.sax.SAXException;
060: import org.xml.sax.helpers.DefaultHandler;
061:
062: import org.netbeans.modules.xml.api.EncodingUtil;
063:
064: /**
065: * JSP 'open info' parser allowing to fastly determine encoding for JSPs in standart syntax
066: * with DD NOT specifying JSPs encodinf or syntax (at least 95% af all JSPs)
067: *
068: * How the encoding is currently detected:
069: * 1) find deplyment descriptor from given webmodule
070: * 2) if found, parse it and find following elements
071: * <jsp-property-group>
072: * <page-encoding>
073: * ||
074: * <is-xml>
075: * </jsp-property-group>
076: * 3) if any of the nested elements found, give it up and return null (and let jasper parser to determine the encoding)
077: * 4) if the DD is not found or it doesn't contain the elements from #2 test if the file is JSP document (according to the extension)
078: * 5) if the file is a XML document give it up (so far - we can easily implement a simple enc. parser for XMLs as well)
079: * 6) the page is standard syntax - parse first 8kB of text and...
080: * 7) if <%@page encoding="xxx"%> is found return the encoding value
081: * 8) if <%@page encoding="xxx"%> is NOT found find <%@page contentType="mimetype; char-set=xxx"%>
082: * 9) if CT found return encoding from it
083: *
084: * @author Marek Fukala
085: */
086: public class FastOpenInfoParser {
087:
088: private static final Logger LOGGER = Logger
089: .getLogger(FastOpenInfoParser.class.getName());
090:
091: static FastOpenInfoParser get(WebModule wm) {
092: return new FastOpenInfoParser(wm);
093: }
094:
095: private final WebModule wm;
096:
097: /** Creates a new instance of FastOpenInfoParser */
098: private FastOpenInfoParser(WebModule wm) {
099: this .wm = wm;
100: }
101:
102: public JspParserAPI.JspOpenInfo getJspOpenInfo(FileObject fo,
103: boolean useEditor) {
104: long start = 0;
105: if (LOGGER.isLoggable(Level.FINE)) {
106: start = System.currentTimeMillis();
107: }
108: try {
109: if (wm != null && wm.getDocumentBase() != null && useEditor) {
110: return null; //better let the parser do it
111: }
112:
113: //if there isn't a webmodule detect the encoding from the file only
114: if (wm != null) {
115: //find deployment descriptor
116: FileObject documentBase = wm.getDocumentBase();
117: if (documentBase != null) {
118: FileObject dd = wm.getDeploymentDescriptor();
119: //test whether the DD exists, if not parse the JSP file
120: if (dd != null) {
121: //parse the DD and try to find <jsp-property-group> element with <page-encoding> and <is-xml> elements
122: DDParseInfo ddParseInfo = parse(new InputSource(
123: dd.getInputStream())); //parse with default encoding
124: //if the DD defines encoding or marks jsps as xml documents return null
125: if (ddParseInfo.definesEncoding
126: || ddParseInfo.marksXMLDocuments) {
127: return null;
128: }
129: }
130: }
131: }
132:
133: String enc = null;
134:
135: //get encoding from the disk file if webmodule is null and useEditor is true (during file save)
136: //XXX may be fixed better - to get the editor document instance from the fileobject (but I need to add some deps)
137:
138: //#64418 - create a ByteArrayInputStream - we need a an inputstream with marks supported
139: byte[] buffer = new byte[8192 * 4];
140: InputStream _is = fo.getInputStream();
141: int readed = _is.read(buffer);
142: InputStream is = new ByteArrayInputStream(buffer, 0, readed);
143: _is.close();
144:
145: if (isXMLSyntax(fo)) {
146: //XML document - detect encoding acc. to fisrt 4 bytes or xml prolog
147: enc = EncodingUtil.detectEncoding(is);
148: } else {
149: //JSP in standart syntax
150: //find <%@page encoding or contentType attributes
151: enc = parseEncodingFromFile(is);
152: }
153:
154: LOGGER.fine("[fast open parser] detected " + enc
155: + " encoding.");
156: return enc == null ? null : new JspParserAPI.JspOpenInfo(
157: isXMLSyntax(fo), enc);
158:
159: } catch (IOException e) {
160: //do not handle
161: } catch (SAXException se) {
162: //do not handle
163: } catch (ParserConfigurationException pce) {
164: //do not handle
165: } finally {
166: if (LOGGER.isLoggable(Level.FINE)) {
167: LOGGER.fine("[fast open parser] taken "
168: + (System.currentTimeMillis() - start) + "ms.");
169: }
170: }
171: return null;
172: }
173:
174: private static String parseEncodingFromFile(InputStream is)
175: throws IOException {
176: InputStreamReader isr = new InputStreamReader(is); //read with default encoding
177: //read only first 8kB of text
178: char[] buffer = new char[8192];
179: int readed = isr.read(buffer);
180: isr.close();
181:
182: return parseJspText(buffer, readed);
183: }
184:
185: private static boolean isXMLSyntax(FileObject fo) {
186: String ext = fo.getExt();
187: if (ext != null
188: && ("jspx".equalsIgnoreCase(ext) || "tagx"
189: .equalsIgnoreCase(ext))) { // NOI18N
190: return true;
191: }
192: return false;
193: }
194:
195: //JSP encoding parser
196: private static final String PAGE = "page";
197: private static final String ENCODING = "pageEncoding";
198: private static final String CONTENTYPE = "contentType";
199: private static final String CHARSET = "charset=";
200:
201: private static final int P_INIT = 0;
202: private static final int P_LT = 1; //after <
203: private static final int P_LT_PER = 2; //after <%
204: private static final int P_LT_PER_ATS = 3; //after <%@
205: private static final int P_PD = 4; //in page directive
206: private static final int P_APER = 5; //after closing %
207:
208: private static final int P_ENC = 7; //after 'encoding' attribute
209: private static final int P_ENC_EQ = 8; //after encoding=
210: private static final int P_ENC_EQ_VAL = 9; //after encoding="
211:
212: private static final int P_CT = 11; //after 'contentType' attribute
213: private static final int P_CT_EQ = 12; //after contentType=
214: private static final int P_CT_EQ_VAL = 13; //after contentType="
215: private static final int P_CT_VAL_CHS = 14; //after contentType="TYPE; char-set=
216:
217: private static String parseJspText(char[] buffer, int len) {
218: String contentType = null;
219:
220: int state = P_INIT;
221: int i = 0;
222: int pos = -1;
223: while (i < len) {
224: char c = buffer[i];
225:
226: switch (state) {
227: case P_INIT:
228: if (c == '<') { // NOI18N
229: state = P_LT;
230: }
231: i++;
232: break;
233: case P_LT:
234: switch (c) {
235: case '%': // NOI18N
236: state = P_LT_PER;
237: break;
238: default:
239: state = P_INIT;
240: break;
241: }
242: i++;
243: break;
244:
245: case P_LT_PER:
246: switch (c) {
247: case '@': // NOI18N
248: state = P_LT_PER_ATS;
249: break;
250: default:
251: state = P_INIT;
252: break;
253: }
254: i++;
255: break;
256: case P_LT_PER_ATS:
257: if (c == ' ' || c == '\t') { // NOI18N
258: i++;
259: break;
260: } else if (prescanFor(buffer, i, PAGE)) {
261: state = P_PD;
262: i = i + PAGE.length();
263: break;
264: }
265: state = P_INIT;
266: i++;
267: break;
268: case P_PD:
269: if (prescanFor(buffer, i, ENCODING)) {
270: state = P_ENC;
271: i = i + ENCODING.length();
272: break;
273: } else if (prescanFor(buffer, i, CONTENTYPE)) {
274: state = P_CT;
275: i = i + CONTENTYPE.length();
276: break;
277: } else if (c == '%') { // NOI18N
278: state = P_APER;
279: }
280: i++;
281: break;
282: case P_APER:
283: if (c == '>') { // NOI18N
284: state = P_INIT;
285: } else {
286: state = P_PD;
287: }
288: i++;
289: break;
290: case P_ENC:
291: switch (c) {
292: case ' ': // NOI18N
293: case '\t': // NOI18N
294: ;
295: break;
296: case '=': // NOI18N
297: state = P_ENC_EQ;
298: break;
299: case '%': // NOI18N
300: state = P_APER;
301: break;
302: default:
303: state = P_PD;
304: break;
305: }
306: i++;
307: break;
308: case P_ENC_EQ:
309: switch (c) {
310: case ' ': // NOI18N
311: case '\t': // NOI18N
312: break;
313: case '"': // NOI18N
314: state = P_ENC_EQ_VAL;
315: pos = i + 1;
316: break;
317: case '%': // NOI18N
318: state = P_APER;
319: break;
320: default:
321: state = P_PD;
322: break;
323: }
324: i++;
325: break;
326: case P_ENC_EQ_VAL:
327: switch (c) {
328: case '"': // NOI18N
329: return new String(buffer, pos, i - pos); //return the encoding attr value
330: //break;
331: default:
332: }
333: i++;
334: break;
335:
336: case P_CT:
337: switch (c) {
338: case ' ': // NOI18N
339: case '\t': // NOI18N
340: break;
341: case '=': // NOI18N
342: state = P_CT_EQ;
343: break;
344: case '%': // NOI18N
345: state = P_APER;
346: break;
347: default:
348: state = P_PD;
349: break;
350: }
351: i++;
352: break;
353: case P_CT_EQ:
354: switch (c) {
355: case ' ': // NOI18N
356: case '\t': // NOI18N
357: break;
358: case '"': // NOI18N
359: state = P_CT_EQ_VAL;
360: break;
361: case '%': // NOI18N
362: state = P_APER;
363: break;
364: default:
365: state = P_PD;
366: break;
367: }
368: i++;
369: break;
370: case P_CT_EQ_VAL:
371: if (prescanFor(buffer, i, CHARSET)) {
372: state = P_CT_VAL_CHS;
373: i = i + CHARSET.length();
374: pos = i;
375: break;
376: } else if (c == '"') { // NOI18N
377: state = P_PD;
378: break;
379: }
380: i++;
381: break;
382: case P_CT_VAL_CHS:
383: switch (c) {
384: case '"': // NOI18N
385: contentType = new String(buffer, pos, i - pos); //return the encoding attr value
386: state = P_PD;
387: break;
388: default:
389: }
390: i++;
391: break;
392: } //eof state switch
393: }
394:
395: //returns either contentType value or null; encoding is returned directly from the parser (has priority over CT)
396: return contentType;
397: }
398:
399: private static boolean prescanFor(char[] buffer, int position,
400: String text) {
401: if ((buffer.length - position) < text.length()) {
402: return false; //too short buffer - the text cannot be there
403: }
404: for (int i = 0; i < text.length(); i++) {
405: if (buffer[position + i] != text.charAt(i)) {
406: return false;
407: }
408: }
409: return true;
410: }
411:
412: static final String JSP_PROPERTY_GROUP = "jsp-property-group";
413: static final String PAGE_ENCODING = "page-encoding";
414: static final String IS_XML = "is-xml";
415:
416: /** returns an array of booleans - the first states whether the dd contains a <jsp-property-group> element
417: * with defined encoding resp. marks a set of JSPs to be xml documents. */
418: private static DDParseInfo parse(InputSource src)
419: throws IOException, SAXException,
420: ParserConfigurationException {
421: SAXParserFactory factory = SAXParserFactory.newInstance();
422: factory.setValidating(false);
423: SAXParser parser = factory.newSAXParser();
424: final DDParseInfo ddParseInfo = new DDParseInfo();
425:
426: class Handler extends DefaultHandler {
427: private boolean inJspPropertyGroup = false;
428:
429: @Override
430: public void startElement(String uri, String localname,
431: String qname, Attributes attr) throws SAXException {
432: String tagName = qname.toLowerCase();
433: if (JSP_PROPERTY_GROUP.equals(tagName)) {
434: inJspPropertyGroup = true;
435: }
436: if (inJspPropertyGroup) {
437: if (PAGE_ENCODING.equals(tagName)) {
438: ddParseInfo.definesEncoding = true;
439: }
440: if (IS_XML.equals(tagName)) {
441: ddParseInfo.marksXMLDocuments = true;
442: }
443: }
444: }
445:
446: @Override
447: public void endElement(String uri, String localname,
448: String qname) throws SAXException {
449: String tagName = qname.toLowerCase();
450: if (JSP_PROPERTY_GROUP.equals(tagName)) {
451: inJspPropertyGroup = false;
452: }
453: }
454:
455: @Override
456: public InputSource resolveEntity(String publicId,
457: String systemId) {
458: return new InputSource(new StringReader("")); //prevent the parser to use catalog entity resolver // NOI18N
459: }
460: }
461: parser.parse(src, new Handler());
462: return ddParseInfo;
463: }
464:
465: private static final class DDParseInfo {
466: public boolean definesEncoding, marksXMLDocuments;
467:
468: public DDParseInfo() {
469: }
470: }
471: }
|