001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. The ASF licenses this file to You
004: * under the Apache License, Version 2.0 (the "License"); you may not
005: * use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License. For additional information regarding
015: * copyright in this work, please see the NOTICE file in the top level
016: * directory of this distribution.
017: */
018:
019: package org.apache.roller.ui.rendering.servlets;
020:
021: import java.io.IOException;
022: import java.util.Date;
023: import java.util.HashMap;
024: import java.util.Map;
025: import java.util.regex.Pattern;
026: import javax.servlet.ServletConfig;
027: import javax.servlet.ServletException;
028: import javax.servlet.http.HttpServlet;
029: import javax.servlet.http.HttpServletRequest;
030: import javax.servlet.http.HttpServletResponse;
031: import javax.servlet.jsp.JspFactory;
032: import javax.servlet.jsp.PageContext;
033: import org.apache.commons.logging.Log;
034: import org.apache.commons.logging.LogFactory;
035: import org.apache.roller.RollerException;
036: import org.apache.roller.business.HitCountQueue;
037: import org.apache.roller.business.referrers.IncomingReferrer;
038: import org.apache.roller.business.referrers.ReferrerQueueManager;
039: import org.apache.roller.config.RollerConfig;
040: import org.apache.roller.config.RollerRuntimeConfig;
041: import org.apache.roller.business.RollerFactory;
042: import org.apache.roller.business.WeblogManager;
043: import org.apache.roller.pojos.Template;
044: import org.apache.roller.pojos.WeblogEntryData;
045: import org.apache.roller.pojos.WeblogTemplate;
046: import org.apache.roller.pojos.WebsiteData;
047: import org.apache.roller.ui.core.RollerContext;
048: import org.apache.roller.ui.rendering.util.InvalidRequestException;
049: import org.apache.roller.ui.rendering.util.WeblogPageRequest;
050: import org.apache.roller.util.cache.CachedContent;
051: import org.apache.roller.ui.rendering.Renderer;
052: import org.apache.roller.ui.rendering.RendererManager;
053: import org.apache.roller.ui.rendering.model.ModelLoader;
054: import org.apache.roller.ui.rendering.util.cache.SiteWideCache;
055: import org.apache.roller.ui.rendering.util.WeblogEntryCommentForm;
056: import org.apache.roller.ui.rendering.util.cache.WeblogPageCache;
057: import org.apache.roller.ui.rendering.util.ModDateHeaderUtil;
058: import org.apache.roller.util.SpamChecker;
059:
060: /**
061: * Provides access to weblog pages.
062: *
063: * @web.servlet name="PageServlet" load-on-startup="5"
064: * @web.servlet-mapping url-pattern="/roller-ui/rendering/page/*"
065: */
066: public class PageServlet extends HttpServlet {
067:
068: private static Log log = LogFactory.getLog(PageServlet.class);
069:
070: // for referrer processing
071: private boolean processReferrers = true;
072: private static Pattern robotPattern = null;
073:
074: // for caching
075: private boolean excludeOwnerPages = false;
076: private WeblogPageCache weblogPageCache = null;
077: private SiteWideCache siteWideCache = null;
078:
079: /**
080: * Init method for this servlet
081: */
082: public void init(ServletConfig servletConfig)
083: throws ServletException {
084:
085: super .init(servletConfig);
086:
087: log.info("Initializing PageServlet");
088:
089: this .excludeOwnerPages = RollerConfig
090: .getBooleanProperty("cache.excludeOwnerEditPages");
091:
092: // get a reference to the weblog page cache
093: this .weblogPageCache = WeblogPageCache.getInstance();
094:
095: // get a reference to the site wide cache
096: this .siteWideCache = SiteWideCache.getInstance();
097:
098: // see if built-in referrer processing is enabled
099: this .processReferrers = RollerConfig
100: .getBooleanProperty("referrers.processing.enabled");
101:
102: log.info("Referrer processing enabled = "
103: + this .processReferrers);
104:
105: // check for possible robot pattern
106: String robotPatternStr = RollerConfig
107: .getProperty("referrer.robotCheck.userAgentPattern");
108: if (robotPatternStr != null && robotPatternStr.length() > 0) {
109: // Parse the pattern, and store the compiled form.
110: try {
111: robotPattern = Pattern.compile(robotPatternStr);
112: } catch (Exception e) {
113: // Most likely a PatternSyntaxException; log and continue as if it is not set.
114: log.error(
115: "Error parsing referrer.robotCheck.userAgentPattern value '"
116: + robotPatternStr
117: + "'. Robots will not be filtered. ",
118: e);
119: }
120: }
121: }
122:
123: /**
124: * Handle GET requests for weblog pages.
125: */
126: public void doGet(HttpServletRequest request,
127: HttpServletResponse response) throws ServletException,
128: IOException {
129:
130: log.debug("Entering");
131:
132: // do referrer processing, if it's enabled
133: // NOTE: this *must* be done first because it triggers a hibernate flush
134: // which will close the active session and cause lazy init exceptions otherwise
135: if (this .processReferrers) {
136: boolean spam = this .processReferrer(request);
137: if (spam) {
138: log.debug("spammer, giving 'em a 403");
139: if (!response.isCommitted())
140: response.reset();
141: response.sendError(HttpServletResponse.SC_FORBIDDEN);
142: return;
143: }
144: }
145:
146: WebsiteData weblog = null;
147: boolean isSiteWide = false;
148:
149: WeblogPageRequest pageRequest = null;
150: try {
151: pageRequest = new WeblogPageRequest(request);
152:
153: weblog = pageRequest.getWeblog();
154: if (weblog == null) {
155: throw new RollerException("unable to lookup weblog: "
156: + pageRequest.getWeblogHandle());
157: }
158:
159: // is this the site-wide weblog?
160: isSiteWide = RollerRuntimeConfig
161: .isSiteWideWeblog(pageRequest.getWeblogHandle());
162:
163: } catch (Exception e) {
164: // some kind of error parsing the request or looking up weblog
165: log.debug("error creating page request", e);
166: response.sendError(HttpServletResponse.SC_NOT_FOUND);
167: return;
168: }
169:
170: // determine the lastModified date for this content
171: long lastModified = System.currentTimeMillis();
172: if (isSiteWide) {
173: lastModified = siteWideCache.getLastModified().getTime();
174: } else if (weblog.getLastModified() != null) {
175: lastModified = weblog.getLastModified().getTime();
176: }
177:
178: // 304 Not Modified handling.
179: // We skip this for logged in users to avoid the scenerio where a user
180: // views their weblog, logs in, then gets a 304 without the 'edit' links
181: if (!pageRequest.isLoggedIn()) {
182: if (ModDateHeaderUtil.respondIfNotModified(request,
183: response, lastModified)) {
184: return;
185: } else {
186: // set last-modified date
187: ModDateHeaderUtil.setLastModifiedHeader(response,
188: lastModified);
189: }
190: }
191:
192: // generate cache key
193: String cacheKey = null;
194: if (isSiteWide) {
195: cacheKey = siteWideCache.generateKey(pageRequest);
196: } else {
197: cacheKey = weblogPageCache.generateKey(pageRequest);
198: }
199:
200: // cached content checking
201: if ((!this .excludeOwnerPages || !pageRequest.isLoggedIn())
202: && request.getAttribute("skipCache") == null) {
203:
204: CachedContent cachedContent = null;
205: if (isSiteWide) {
206: cachedContent = (CachedContent) siteWideCache
207: .get(cacheKey);
208: } else {
209: cachedContent = (CachedContent) weblogPageCache.get(
210: cacheKey, lastModified);
211: }
212:
213: if (cachedContent != null) {
214: log.debug("HIT " + cacheKey);
215:
216: // allow for hit counting
217: if (!isSiteWide) {
218: this .processHit(weblog, request.getRequestURL()
219: .toString(), request.getHeader("referer"));
220: }
221:
222: response
223: .setContentLength(cachedContent.getContent().length);
224: response.setContentType(cachedContent.getContentType());
225: response.getOutputStream().write(
226: cachedContent.getContent());
227: return;
228:
229: } else {
230: log.debug("MISS " + cacheKey);
231: }
232: }
233:
234: // figure out what we are going to render
235: Template page = null;
236:
237: // If this is a popup request, then deal with it specially
238: // TODO: do we really need to keep supporting this?
239: if (request.getParameter("popup") != null) {
240: try {
241: // Does user have a popupcomments page?
242: page = weblog.getPageByName("_popupcomments");
243: } catch (Exception e) {
244: // ignored ... considered page not found
245: }
246:
247: // User doesn't have one so return the default
248: if (page == null) {
249: page = new WeblogTemplate(
250: "templates/weblog/popupcomments.vm", weblog,
251: "Comments", "Comments", "dummy_link",
252: "dummy_template", new Date(), "velocity", true,
253: false, null);
254: }
255:
256: // If request specified the page, then go with that
257: } else if (pageRequest.getWeblogPageName() != null) {
258: page = pageRequest.getWeblogPage();
259:
260: // If page not available from request, then use weblog's default
261: } else {
262: try {
263: page = weblog.getDefaultPage();
264: } catch (Exception e) {
265: log.error("Error getting weblogs default page", e);
266: }
267: }
268:
269: // Still no page? Then that is a 404
270: if (page == null) {
271: if (!response.isCommitted())
272: response.reset();
273: response.sendError(HttpServletResponse.SC_NOT_FOUND);
274: return;
275: }
276:
277: log.debug("page found, dealing with it");
278:
279: // validation. make sure that request input makes sense.
280: boolean invalid = false;
281: if (pageRequest.getWeblogPageName() != null && page.isHidden()) {
282: invalid = true;
283: }
284: if (pageRequest.getLocale() != null) {
285:
286: // locale view only allowed if weblog has enabled it
287: if (!pageRequest.getWeblog().isEnableMultiLang()) {
288: invalid = true;
289: }
290:
291: }
292: if (pageRequest.getWeblogAnchor() != null) {
293:
294: // permalink specified.
295: // entry must exist, be published before current time, and locale must match
296: WeblogEntryData entry = pageRequest.getWeblogEntry();
297: if (entry == null) {
298: invalid = true;
299: } else if (pageRequest.getLocale() != null
300: && !entry.getLocale().startsWith(
301: pageRequest.getLocale())) {
302: invalid = true;
303: } else if (!entry.isPublished()) {
304: invalid = true;
305: } else if (new Date().before(entry.getPubTime())) {
306: invalid = true;
307: }
308:
309: } else if (pageRequest.getWeblogCategoryName() != null) {
310:
311: // category specified. category must exist.
312: if (pageRequest.getWeblogCategory() == null) {
313: invalid = true;
314: }
315:
316: } else if (pageRequest.getTags() != null
317: && pageRequest.getTags().size() > 0) {
318:
319: try {
320: // tags specified. make sure they exist.
321: WeblogManager wmgr = RollerFactory.getRoller()
322: .getWeblogManager();
323: invalid = !wmgr.getTagComboExists(
324: pageRequest.getTags(), (isSiteWide) ? null
325: : weblog);
326: } catch (RollerException ex) {
327: invalid = true;
328: }
329: }
330:
331: if (invalid) {
332: if (!response.isCommitted())
333: response.reset();
334: response.sendError(HttpServletResponse.SC_NOT_FOUND);
335: return;
336: }
337:
338: // allow for hit counting
339: if (!isSiteWide) {
340: this .processHit(weblog, request.getRequestURL().toString(),
341: request.getHeader("referer"));
342: }
343:
344: // looks like we need to render content
345:
346: // set the content type
347: String mimeType = RollerContext.getServletContext()
348: .getMimeType(page.getLink());
349: String contentType = "text/html; charset=utf-8";
350: if (mimeType != null) {
351: // we found a match ... set the content type
352: contentType = mimeType + "; charset=utf-8";
353: }
354:
355: HashMap model = new HashMap();
356: try {
357: PageContext pageContext = JspFactory.getDefaultFactory()
358: .getPageContext(this , request, response, "", false,
359: 8192, true);
360:
361: // special hack for menu tag
362: request.setAttribute("pageRequest", pageRequest);
363:
364: // populate the rendering model
365: Map initData = new HashMap();
366: initData.put("request", request);
367: initData
368: .put("requestParameters", request.getParameterMap());
369: initData.put("weblogRequest", pageRequest);
370: initData.put("pageContext", pageContext);
371:
372: // if this was a comment posting, check for comment form
373: WeblogEntryCommentForm commentForm = (WeblogEntryCommentForm) request
374: .getAttribute("commentForm");
375: if (commentForm != null) {
376: initData.put("commentForm", commentForm);
377: }
378:
379: // Load models for pages
380: String pageModels = RollerConfig
381: .getProperty("rendering.pageModels");
382: ModelLoader.loadModels(pageModels, model, initData, true);
383:
384: // Load special models for site-wide blog
385: if (RollerRuntimeConfig
386: .isSiteWideWeblog(weblog.getHandle())) {
387: String siteModels = RollerConfig
388: .getProperty("rendering.siteModels");
389: ModelLoader.loadModels(siteModels, model, initData,
390: true);
391: }
392:
393: // Load weblog custom models
394: ModelLoader.loadCustomModels(weblog, model, initData);
395:
396: // ick, gotta load pre-3.0 model stuff as well :(
397: ModelLoader.loadOldModels(model, request, response,
398: pageContext, pageRequest);
399:
400: } catch (RollerException ex) {
401: log.error("Error loading model objects for page", ex);
402:
403: if (!response.isCommitted())
404: response.reset();
405: response
406: .sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
407: return;
408: }
409:
410: // lookup Renderer we are going to use
411: Renderer renderer = null;
412: try {
413: log.debug("Looking up renderer");
414: renderer = RendererManager.getRenderer(page);
415: } catch (Exception e) {
416: // nobody wants to render my content :(
417: log.error(
418: "Couldn't find renderer for page " + page.getId(),
419: e);
420:
421: if (!response.isCommitted())
422: response.reset();
423: response.sendError(HttpServletResponse.SC_NOT_FOUND);
424: return;
425: }
426:
427: // render content. use size of about 24K for a standard page
428: CachedContent rendererOutput = new CachedContent(24567,
429: contentType);
430: try {
431: log.debug("Doing rendering");
432: renderer.render(model, rendererOutput.getCachedWriter());
433:
434: // flush rendered output and close
435: rendererOutput.flush();
436: rendererOutput.close();
437: } catch (Exception e) {
438: // bummer, error during rendering
439: log.error(
440: "Error during rendering for page " + page.getId(),
441: e);
442:
443: if (!response.isCommitted())
444: response.reset();
445: response.sendError(HttpServletResponse.SC_NOT_FOUND);
446: return;
447: }
448:
449: // post rendering process
450:
451: // flush rendered content to response
452: log.debug("Flushing response output");
453: response.setContentType(contentType);
454: response.setContentLength(rendererOutput.getContent().length);
455: response.getOutputStream().write(rendererOutput.getContent());
456:
457: // cache rendered content. only cache if user is not logged in?
458: if ((!this .excludeOwnerPages || !pageRequest.isLoggedIn())
459: && request.getAttribute("skipCache") == null) {
460: log.debug("PUT " + cacheKey);
461:
462: // put it in the right cache
463: if (isSiteWide) {
464: siteWideCache.put(cacheKey, rendererOutput);
465: } else {
466: weblogPageCache.put(cacheKey, rendererOutput);
467: }
468: } else {
469: log.debug("SKIPPED " + cacheKey);
470: }
471:
472: log.debug("Exiting");
473: }
474:
475: /**
476: * Handle POST requests.
477: *
478: * We have this here because the comment servlet actually forwards some of
479: * its requests on to us to render some pages with cusom messaging. We
480: * may want to revisit this approach in the future and see if we can do
481: * this in a different way, but for now this is the easy way.
482: */
483: public void doPost(HttpServletRequest request,
484: HttpServletResponse response) throws ServletException,
485: IOException {
486:
487: // make sure caching is disabled
488: request.setAttribute("skipCache", "true");
489:
490: // handle just like a GET request
491: this .doGet(request, response);
492: }
493:
494: /**
495: * Notify the hit tracker that it has an incoming page hit.
496: */
497: private void processHit(WebsiteData weblog, String url,
498: String referrer) {
499:
500: HitCountQueue counter = HitCountQueue.getInstance();
501: counter.processHit(weblog, url, referrer);
502: }
503:
504: /**
505: * Process the incoming request to extract referrer info and pass it on
506: * to the referrer processing queue for tracking.
507: *
508: * @returns true if referrer was spam, false otherwise
509: */
510: private boolean processReferrer(HttpServletRequest request) {
511:
512: log.debug("processing referrer for " + request.getRequestURI());
513:
514: // bleh! because ref processing does a flush it will close
515: // our hibernate session and cause lazy init exceptions on
516: // objects we have fetched, so we need to use a separate
517: // page request object for this
518: WeblogPageRequest pageRequest;
519: try {
520: pageRequest = new WeblogPageRequest(request);
521: } catch (InvalidRequestException ex) {
522: return false;
523: }
524:
525: // if this came from site-wide frontpage then skip it
526: if (RollerRuntimeConfig.isSiteWideWeblog(pageRequest
527: .getWeblogHandle())) {
528: return false;
529: }
530:
531: // if this came from a robot then don't process it
532: if (robotPattern != null) {
533: String userAgent = request.getHeader("User-Agent");
534: if (userAgent != null && userAgent.length() > 0
535: && robotPattern.matcher(userAgent).matches()) {
536: log.debug("skipping referrer from robot");
537: return false;
538: }
539: }
540:
541: String referrerUrl = request.getHeader("Referer");
542: StringBuffer reqsb = request.getRequestURL();
543: if (request.getQueryString() != null) {
544: reqsb.append("?");
545: reqsb.append(request.getQueryString());
546: }
547: String requestUrl = reqsb.toString();
548:
549: log.debug("referrer = " + referrerUrl);
550:
551: // if this came from persons own blog then don't process it
552: String selfSiteFragment = "/" + pageRequest.getWeblogHandle();
553: if (referrerUrl != null
554: && referrerUrl.indexOf(selfSiteFragment) != -1) {
555: log.debug("skipping referrer from own blog");
556: return false;
557: }
558:
559: // validate the referrer
560: if (pageRequest != null
561: && pageRequest.getWeblogHandle() != null) {
562:
563: // Base page URLs, with and without www.
564: String basePageUrlWWW = RollerRuntimeConfig
565: .getAbsoluteContextURL()
566: + "/" + pageRequest.getWeblogHandle();
567: String basePageUrl = basePageUrlWWW;
568: if (basePageUrlWWW.startsWith("http://www.")) {
569: // chop off the http://www.
570: basePageUrl = "http://" + basePageUrlWWW.substring(11);
571: }
572:
573: // ignore referrers coming from users own blog
574: if (referrerUrl == null
575: || (!referrerUrl.startsWith(basePageUrl) && !referrerUrl
576: .startsWith(basePageUrlWWW))) {
577:
578: // validate the referrer
579: if (referrerUrl != null) {
580: // treat editor referral as direct
581: int lastSlash = requestUrl.indexOf("/", 8);
582: if (lastSlash == -1)
583: lastSlash = requestUrl.length();
584: String requestSite = requestUrl.substring(0,
585: lastSlash);
586:
587: if (referrerUrl.matches(requestSite + ".*\\.do.*")) {
588: referrerUrl = null;
589: } else if (SpamChecker.checkReferrer(pageRequest
590: .getWeblog(), referrerUrl)) {
591: return true;
592: }
593: }
594:
595: } else {
596: log.debug("Ignoring referer = " + referrerUrl);
597: return false;
598: }
599: }
600:
601: // referrer is valid, lets record it
602: try {
603: IncomingReferrer referrer = new IncomingReferrer();
604: referrer.setReferrerUrl(referrerUrl);
605: referrer.setRequestUrl(requestUrl);
606: referrer.setWeblogHandle(pageRequest.getWeblogHandle());
607: referrer.setWeblogAnchor(pageRequest.getWeblogAnchor());
608: referrer.setWeblogDateString(pageRequest.getWeblogDate());
609:
610: ReferrerQueueManager refQueue = RollerFactory.getRoller()
611: .getReferrerQueueManager();
612: refQueue.processReferrer(referrer);
613: } catch (Exception e) {
614: log.error("Error processing referrer", e);
615: }
616:
617: return false;
618: }
619:
620: }
|