001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.wicket.markup;
018:
019: import java.io.IOException;
020: import java.text.ParseException;
021: import java.util.regex.Matcher;
022: import java.util.regex.Pattern;
023:
024: import org.apache.wicket.Application;
025: import org.apache.wicket.Page;
026: import org.apache.wicket.WicketRuntimeException;
027: import org.apache.wicket.markup.parser.IMarkupFilter;
028: import org.apache.wicket.markup.parser.IXmlPullParser;
029: import org.apache.wicket.markup.parser.XmlPullParser;
030: import org.apache.wicket.markup.parser.filter.EnclosureHandler;
031: import org.apache.wicket.markup.parser.filter.HeadForceTagIdHandler;
032: import org.apache.wicket.markup.parser.filter.HtmlHandler;
033: import org.apache.wicket.markup.parser.filter.HtmlHeaderSectionHandler;
034: import org.apache.wicket.markup.parser.filter.RelativePathPrefixHandler;
035: import org.apache.wicket.markup.parser.filter.TagTypeHandler;
036: import org.apache.wicket.markup.parser.filter.WicketLinkTagHandler;
037: import org.apache.wicket.markup.parser.filter.WicketMessageTagHandler;
038: import org.apache.wicket.markup.parser.filter.WicketNamespaceHandler;
039: import org.apache.wicket.markup.parser.filter.WicketRemoveTagHandler;
040: import org.apache.wicket.markup.parser.filter.WicketTagIdentifier;
041: import org.apache.wicket.settings.IMarkupSettings;
042: import org.apache.wicket.util.resource.ResourceStreamNotFoundException;
043: import org.apache.wicket.util.resource.StringResourceStream;
044: import org.apache.wicket.util.string.AppendingStringBuffer;
045:
046: /**
047: * This is a Wicket MarkupParser specifically for (X)HTML. It makes use of a
048: * streaming XML parser to read the markup and IMarkupFilters to remove
049: * comments, identify Wicket relevant tags, apply html specific treatments etc..
050: * <p>
051: * The result will be an Markup object, which is basically a list, containing
052: * Wicket relevant tags and RawMarkup.
053: *
054: * @see IMarkupFilter
055: * @see IMarkupParserFactory
056: * @see IMarkupSettings
057: * @see MarkupResourceData
058: *
059: * @author Jonathan Locke
060: * @author Juergen Donnerstag
061: */
062: public class MarkupParser {
063: /** Conditional comment section, which is NOT treated as a comment section */
064: private static final Pattern CONDITIONAL_COMMENT = Pattern
065: .compile("\\[if .+\\]>(.|\n|\r)*<!\\[endif\\]");
066:
067: /** The XML parser to use */
068: private final IXmlPullParser xmlParser;
069:
070: /** The markup handler chain: each filter has a specific task */
071: private IMarkupFilter markupFilterChain;
072:
073: /** The markup created by reading the markup file */
074: private final Markup markup;
075:
076: /** Temporary variable: Application.get().getMarkupSettings() */
077: private final IMarkupSettings markupSettings;
078:
079: /**
080: * Constructor.
081: *
082: * @param resource
083: * The markup resource (file)
084: */
085: public MarkupParser(final MarkupResourceStream resource) {
086: this (new XmlPullParser(), resource);
087: }
088:
089: /**
090: * Constructor. Usually for testing purposes only
091: *
092: * @param markup
093: * The markup resource.
094: */
095: public MarkupParser(final String markup) {
096: this (new XmlPullParser(), new MarkupResourceStream(
097: new StringResourceStream(markup)));
098: }
099:
100: /**
101: * Constructor.
102: *
103: * @param xmlParser
104: * The streaming xml parser to read and parse the markup
105: * @param resource
106: * The markup resource (file)
107: */
108: public MarkupParser(final IXmlPullParser xmlParser,
109: final MarkupResourceStream resource) {
110: this .xmlParser = xmlParser;
111: markupSettings = Application.get().getMarkupSettings();
112:
113: MarkupResourceData markup = new MarkupResourceData();
114: markup.setResource(resource);
115:
116: this .markup = new Markup(markup);
117:
118: // Initialize the markup filter chain
119: initializeMarkupFilters();
120: }
121:
122: /**
123: * In case you want to analyze markup which BY DEFAULT does not use "wicket"
124: * to find relevant tags.
125: *
126: * @param namespace
127: */
128: public final void setWicketNamespace(final String namespace) {
129: markup.getMarkupResourceData().setWicketNamespace(namespace);
130: }
131:
132: /**
133: * Applications which subclass initFilterChain() might also wish to access
134: * the markup resource stream.
135: *
136: * @return The markup resource stream
137: */
138: protected MarkupResourceStream getMarkupResourceStream() {
139: return markup.getMarkupResourceData().getResource();
140: }
141:
142: /**
143: * Create a new markup filter chain and initialize with all default filters
144: * required.
145: */
146: private final void initializeMarkupFilters() {
147: // Chain together all the different markup filters and configure them
148: markupFilterChain = xmlParser;
149:
150: MarkupResourceData markupResourceData = markup
151: .getMarkupResourceData();
152:
153: appendMarkupFilter(new WicketTagIdentifier(markupResourceData));
154: appendMarkupFilter(new TagTypeHandler());
155: appendMarkupFilter(new HtmlHandler());
156: appendMarkupFilter(new WicketRemoveTagHandler());
157: appendMarkupFilter(new WicketLinkTagHandler());
158: appendMarkupFilter(new WicketNamespaceHandler(
159: markupResourceData));
160:
161: // Provided the wicket component requesting the markup is known ...
162: final MarkupResourceStream resource = markupResourceData
163: .getResource();
164: if (resource != null) {
165: final ContainerInfo containerInfo = resource
166: .getContainerInfo();
167: if (containerInfo != null) {
168: appendMarkupFilter(new WicketMessageTagHandler());
169:
170: // Pages require additional handlers
171: if (Page.class.isAssignableFrom(containerInfo
172: .getContainerClass())) {
173: appendMarkupFilter(new HtmlHeaderSectionHandler(
174: markup));
175: }
176:
177: appendMarkupFilter(new HeadForceTagIdHandler(
178: containerInfo.getContainerClass()));
179: }
180: }
181:
182: appendMarkupFilter(new RelativePathPrefixHandler());
183: appendMarkupFilter(new EnclosureHandler());
184: }
185:
186: /**
187: * By default don't do anything. Subclasses may append additional markup
188: * filters if required.
189: *
190: * @see #appendMarkupFilter(IMarkupFilter)
191: * @deprecated since 1.3
192: */
193: protected void initFilterChain() {
194: throw new WicketRuntimeException(
195: "This method is no longer suppoert: since 1.3");
196: }
197:
198: /**
199: * Append a new filter to the list of already pre-configured markup filters.
200: *
201: * @param filter
202: * The filter to be appended
203: */
204: public final void appendMarkupFilter(final IMarkupFilter filter) {
205: appendMarkupFilter(filter, RelativePathPrefixHandler.class);
206: }
207:
208: /**
209: * Append a new filter to the list of already pre-configured markup filters.
210: * Add the new filter before the "beforeFilter" which is identified by its
211: * class.
212: *
213: * @param filter
214: * The filter to be appended
215: * @param beforeFilter
216: * The filter will be added before the beforeFilter. If
217: * beforeFilter == null or beforeFilter not found than append to
218: * the end
219: */
220: public final void appendMarkupFilter(final IMarkupFilter filter,
221: final Class beforeFilter) {
222: if ((beforeFilter == null) || (markupFilterChain == null)) {
223: filter.setParent(markupFilterChain);
224: markupFilterChain = filter;
225: } else {
226: IMarkupFilter current = markupFilterChain;
227: while (current != null) {
228: if (current.getClass() == beforeFilter) {
229: filter.setParent(current.getParent());
230: current.setParent(filter);
231: break;
232: }
233: current = current.getParent();
234: }
235:
236: if (current == null) {
237: filter.setParent(markupFilterChain);
238: markupFilterChain = filter;
239: }
240: }
241: }
242:
243: /**
244: * Reads and parses markup from a file.
245: *
246: * @return The markup
247: * @throws IOException
248: * @throws ResourceStreamNotFoundException
249: */
250: public final Markup parse() throws IOException,
251: ResourceStreamNotFoundException {
252: MarkupResourceData markupResourceData = markup
253: .getMarkupResourceData();
254:
255: // Initialize the xml parser
256: xmlParser.parse(markupResourceData.getResource()
257: .getInputStream(), markupSettings
258: .getDefaultMarkupEncoding());
259:
260: // parse the xml markup and tokenize it into wicket relevant markup
261: // elements
262: parseMarkup();
263:
264: markupResourceData.setEncoding(xmlParser.getEncoding());
265: markupResourceData.setXmlDeclaration(xmlParser
266: .getXmlDeclaration());
267:
268: return markup;
269: }
270:
271: /**
272: * Get the next tag from the markup file
273: *
274: * @return The next tag
275: * @throws ParseException
276: */
277: public ComponentTag getNextTag() throws ParseException {
278: return (ComponentTag) markupFilterChain.nextTag();
279: }
280:
281: /**
282: * Scans the given markup and extracts balancing tags.
283: *
284: */
285: private void parseMarkup() {
286: // Get relevant settings from the Application
287: final boolean stripComments = markupSettings.getStripComments();
288: final boolean compressWhitespace = markupSettings
289: .getCompressWhitespace();
290:
291: try {
292: // always remember the latest index (size)
293: int size = markup.size();
294:
295: // Loop through tags
296: ComponentTag tag;
297: while (null != (tag = getNextTag())) {
298: boolean add = (tag.getId() != null);
299: if (!add && tag.getXmlTag().isClose()) {
300: add = ((tag.getOpenTag() != null) && (tag
301: .getOpenTag().getId() != null));
302: }
303:
304: // Add tag to list?
305: if (add || tag.isModified() || (markup.size() != size)) {
306: // Add text from last position to the current tag position
307: final CharSequence text = xmlParser
308: .getInputFromPositionMarker(tag.getPos());
309: if (text.length() > 0) {
310: String rawMarkup = text.toString();
311:
312: if (stripComments) {
313: rawMarkup = removeComment(rawMarkup);
314: }
315:
316: if (compressWhitespace) {
317: rawMarkup = compressWhitespace(rawMarkup);
318: }
319:
320: // Make sure you add it at the correct location.
321: // IMarkupFilters might have added elements as well.
322: markup.addMarkupElement(size, new RawMarkup(
323: rawMarkup));
324: }
325:
326: xmlParser.setPositionMarker();
327:
328: if (add) {
329: // Add to the markup unless the tag has been flagged as
330: // to be removed from the markup. (e.g. <wicket:remove>
331: if (tag.isIgnore() == false) {
332: markup.addMarkupElement(tag);
333: }
334: } else if (tag.isModified()) {
335: markup.addMarkupElement(new RawMarkup(tag
336: .toCharSequence()));
337: } else {
338: xmlParser.setPositionMarker(tag.getPos());
339: }
340: }
341:
342: // always remember the latest index (size)
343: size = markup.size();
344: }
345: } catch (final ParseException ex) {
346: // Add remaining input string
347: final CharSequence text = xmlParser
348: .getInputFromPositionMarker(-1);
349: if (text.length() > 0) {
350: markup.addMarkupElement(new RawMarkup(text));
351: }
352:
353: markup.getMarkupResourceData().setEncoding(
354: xmlParser.getEncoding());
355: markup.getMarkupResourceData().setXmlDeclaration(
356: xmlParser.getXmlDeclaration());
357:
358: final MarkupStream markupStream = new MarkupStream(markup);
359: markupStream.setCurrentIndex(markup.size() - 1);
360: throw new MarkupException(markupStream, ex.getMessage(), ex);
361: }
362:
363: // Add tail?
364: final CharSequence text = xmlParser
365: .getInputFromPositionMarker(-1);
366: if (text.length() > 0) {
367: String rawMarkup = text.toString();
368:
369: if (stripComments) {
370: rawMarkup = removeComment(rawMarkup);
371: }
372:
373: if (compressWhitespace) {
374: rawMarkup = compressWhitespace(rawMarkup);
375: }
376:
377: // Make sure you add it at the correct location.
378: // IMarkupFilters might have added elements as well.
379: markup.addMarkupElement(new RawMarkup(rawMarkup));
380: }
381:
382: // Make all tags immutable and the list of elements unmodifable
383: markup.makeImmutable();
384: }
385:
386: /**
387: * Remove whitespaces from the raw markup
388: *
389: * @param rawMarkup
390: * @return rawMarkup
391: */
392: protected String compressWhitespace(String rawMarkup) {
393: // We don't want to compress whitespace inside <pre> tags, so we look
394: // for matches and:
395: // - Do whitespace compression on everything before the first match.
396: // - Append the <pre>.*?</pre> match with no compression.
397: // - Loop to find the next match.
398: // - Append with compression everything between the two matches.
399: // - Repeat until no match, then special-case the fragment after the
400: // last <pre>.
401:
402: Pattern preBlock = Pattern.compile("<pre>.*?</pre>",
403: Pattern.DOTALL | Pattern.MULTILINE);
404: Matcher m = preBlock.matcher(rawMarkup);
405: int lastend = 0;
406: StringBuffer sb = null;
407: while (true) {
408: boolean matched = m.find();
409: String nonPre = matched ? rawMarkup.substring(lastend, m
410: .start()) : rawMarkup.substring(lastend);
411: nonPre = nonPre.replaceAll("[ \\t]+", " ");
412: nonPre = nonPre.replaceAll("( ?[\\r\\n] ?)+", "\n");
413:
414: // Don't create a StringBuffer if we don't actually need one.
415: // This optimises the trivial common case where there is no <pre>
416: // tag at all down to just doing the replaceAlls above.
417: if (lastend == 0) {
418: if (matched) {
419: sb = new StringBuffer(rawMarkup.length());
420: } else {
421: return nonPre;
422: }
423: }
424: sb.append(nonPre);
425: if (matched) {
426: sb.append(m.group());
427: lastend = m.end();
428: } else {
429: break;
430: }
431: }
432: return sb.toString();
433: }
434:
435: /**
436: * Remove all comment sections (<!-- .. -->) from the raw markup. For
437: * reasons I don't understand, the following regex
438: * <code>"<!--(.|\n|\r)*?-->"<code>
439: * causes a stack overflow in some circumstances (jdk 1.5)
440: *
441: * @param rawMarkup
442: * @return raw markup
443: */
444: private String removeComment(String rawMarkup) {
445: int pos1 = rawMarkup.indexOf("<!--");
446: while (pos1 >= 0) {
447: final int pos2 = rawMarkup.indexOf("-->", pos1 + 4);
448:
449: final AppendingStringBuffer buf = new AppendingStringBuffer(
450: rawMarkup.length());
451: if ((pos2 >= 0) && (pos1 > 0)) {
452: final String comment = rawMarkup.substring(pos1 + 4,
453: pos2);
454: if (CONDITIONAL_COMMENT.matcher(comment).matches() == false) {
455: buf.append(rawMarkup.substring(0, pos1));
456: if (rawMarkup.length() >= pos2 + 4) {
457: buf.append(rawMarkup.substring(pos2 + 4));
458: }
459: rawMarkup = buf.toString();
460: }
461: }
462: pos1 = rawMarkup.length() <= pos1 + 2 ? -1 : rawMarkup
463: .indexOf("<!--", pos1 + 4);
464: }
465: return rawMarkup;
466: }
467: }
|