"""This class understands Aquarium, pyscripts, and server contexts."""
__docformat__ = "restructuredtext"
import mimetypes
import posixpath
import re
import rfc822
import shutil
import urllib
import urlparse
from BaseHTTPServer import BaseHTTPRequestHandler
from aquarium.util import HTTPResponses
from aquarium.parse.Host import parseHost
from aquarium.parse.IfModifiedSince import wasModifiedSince
from glass.vfs.Standard import Standard
class HTTPHandler(BaseHTTPRequestHandler):
"""This class understands Aquarium, pyscripts, and server contexts.
It is based on ``SimpleHTTPServer`` and ``CGIHTTPServer`` from the Python
standard library. It subclasses ``BaseHTTPServer.BaseHTTPRequestHandler``
also from the Python standard library. It adds the concept of a server
context which is used to set things such as the document root. It also
adds support for Aquarium. Last of all, it adds support for pyscripts. A
pyscripts is a script that is executed via ``exec``. It is given the
following globals: ``stdin``, ``stdout``, ``env`` (from
``create_cgi_env``), and the bound method ``send_response``. The support
for Aquarium is just a slight addition to the support for pyscripts. When
using Glass, configure Aquarium to use the ``GlassAdaptor`` Web server
adaptor. This code acts in place of the normal Aquarium ``index.py`` entry
point.
The following attributes are added in this subclass:
server_ctx
This is the ``ServerContext``.
server_name, server_port
These are the same as ``server.server_name`` and ``server.server_port``,
unless a ``Host`` header overrides them.
url_path
This is just the path part of ``self.path`` (``urllib.unquoted`` and
normalized by ``normalize_url_path``). The ``self.path`` set by
``BaseHTTPServer.BaseHTTPRequestHandler`` contains additional stuff like
GET parameters.
path_translated, vfs
If ``url_path`` corresponds to an actual file or directory within one of
the mounts, these will be set. Otherwise, they'll be set to None.
``path_translated`` is the path translated to the appropriate mount's
filesystem syntax. ``vfs`` is a virtual filesystem object (i.e. an
instance of some class from the ``glass.vfs`` package). It is the vfs
corresponding to the mount the ``url_path`` was found in.
extension
This is the extension of ``path_translated``.
query_string
This the query part of ``self.path``.
stat
For static content, this is a ``stat`` of the file.
content_type
For static content, this is the content type.
aquarium
For Aquarium requests, this is the instance of the Aquarium class or
subclass.
The following class attributes are added in this subclass:
server_version
This is the ``server_version`` reported in the HTTP headers.
error_message_format
Improve the format of error messages.
index_files
This is a list of index files to look for when we receive a request for a
directory.
pyscript_extensions
This is a list of extensions used by pyscripts. Include the ".".
"""
server_version = "glass/1.0"
index_files = ["index.html", "index.py"]
pyscript_extensions = [".py"]
error_message_format = """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html40/loose.dtd">
<html>
<head>
<title>Error Response</title>
</head>
<body>
<h1>Error Response</h1>
<p>
Error code %(code)d: %(explain)s: <i>%(message)s.</i>
</body>
</html>"""
def __init__(self, request, client_address, server, server_ctx):
"""Override, not extend, ``BaseRequestHandler.__init__`` for coro.
Normally, no ``__init__`` method would be needed here, and
``BaseRequestHandler.__init__`` would suffice. However, see the
comment in ``HTTPServer.finish_request``. Hence, we must break this
method up into ``__init__`` and ``__call__``.
"""
self.request = request
self.client_address = client_address
self.server = server
self.server_ctx = server_ctx
def __call__(self):
"""Call ``setup``, ``handle``, and ``finish``.
See ``__init__``.
"""
try:
self.setup()
self.handle()
finally:
self.finish()
def do_ALL(self):
"""Intercept ``do_(GET|HEAD|POST)`` and call the correct handler.
This method forwards the request to the correct handler:
``handle_static``, ``handle_aquarium``, or ``handle_pyscript``. This
code also supports index files. Things that can't be handled here such
as directory listings and 404's are passed to Aquarium so that it can
produce pretty error pages.
"""
class FallThrough(Exception): pass
(ignored_scheme, ignored_host, self.url_path, ignored_params,
self.query_string, ignored_fragment) = urlparse.urlparse(self.path)
self.parse_host_header()
self.normalize_url_path()
self.rewrite_path()
if (self.try_handle_redirect() or
self.try_handle_forbidden()):
return
self.translate_path()
try:
if not self.path_translated:
raise FallThrough()
if self.vfs.isdir(self.path_translated):
for index in self.index_files:
index = self.vfs.join(self.path_translated, index)
if self.vfs.exists(index):
self.path_translated = index
break
else:
raise FallThrough()
(ignored_filename, self.extension) = \
self.vfs.splitext(self.path_translated)
if not self.vfs.isfile(self.path_translated):
raise FallThrough()
if self.extension in self.pyscript_extensions:
self.handle_pyscript()
else:
self.handle_static()
except FallThrough:
self.handle_aquarium()
do_HEAD = do_ALL
do_GET = do_ALL
do_POST = do_ALL
def parse_host_header(self):
"""Set ``server_name`` and ``server_port``.
HACK: For MSIE, ignore the port from the Host header; just use the port
the server is actually using. This is to work around a `known bug`_
in MSIE.
.. _known bug: http://publib.boulder.ibm.com/infocenter/tiv2help/index.jsp?topic=/com.ibm.itame.doc_5.1/am51_relnotes111.htm
"""
defaultReturnValue = (self.server.server_name,
self.server.server_port)
(self.server_name, self.server_port) = parseHost(
self.headers.get("Host"), defaultReturnValue,
self.server.is_secure)
if self.headers.get("user-agent", "").find("MSIE") != -1:
self.server_port = self.server.server_port
def normalize_url_path(self):
"""Normalize ``self.url_path``.
This calls ``urllib.unquote`` and ``posixpath.normpath``.
"""
self.url_path = posixpath.normpath(urllib.unquote(self.url_path))
def rewrite_path(self):
"""Do an URL rewriting necessary.
This is called after ``self.url_path`` and ``self.query_string`` are
set. Update ``self.url_path`` in any way you wish. This class does
nothing, but it provides a hook for subclasses.
"""
pass
def translate_path(self):
"""Search for ``self.url_path`` in each of the mounts.
Set ``self.vfs`` and ``self.path_translated``.
Components that mean special things to the vfs (e.g. drive or directory
names) are stripped here.
"""
self.path_translated = self.vfs = None
for (mount_url_path, vfs) in self.server.mounts:
assert mount_url_path.endswith("/")
if not (self.url_path.startswith(mount_url_path) or
self.url_path + "/" == mount_url_path):
continue
relative_path = self.url_path[len(mount_url_path):]
pieces = []
for word in relative_path.split("/"):
drive, word = vfs.splitdrive(word)
head, word = vfs.split(word)
if word in (vfs.curdir, vfs.pardir):
continue
pieces.append(word)
path_translated = vfs.translate_path(vfs.join(*pieces))
if vfs.exists(path_translated):
self.path_translated = path_translated
self.vfs = vfs
return
def try_handle_redirect(self):
"""Can we handle this request via a simple redirect?
This is called after ``self.url_path`` and ``self.query_string`` are
set. If appropriate, call ``redirect`` and return ``1``. Otherwise,
return ``0``. This class does nothing but return ``0``. However, it
can be overriden by subclasses.
"""
return 0
def try_handle_forbidden(self):
"""Forbid access to certain places by regular expressions.
This is called after ``try_handle_redirect`` is called. If
appropriate, call ``send_error`` with ``HTTPResponses.FORBIDDEN`` or
perhaps ``HTTPResponses.NOT_FOUND`` and return ``1``. Otherwise,
return ``0``. This class forbids access to "/CVS/", but you may want
to override it and do more.
"""
if self.url_path.find("/CVS/") != -1:
self.send_error(HTTPResponses.FORBIDDEN,
"Requests matching /CVS/ are not allowed")
return 1
return 0
def redirect(self, url, httpResponse=HTTPResponses.TEMPORARY_REDIRECT):
"""Do an HTTP redirect.
url
The URL to redirect the user to.
httpResponse
The HTTP response code.
"""
self.send_response(httpResponse,
self.responses.get(httpResponse, "Redirected")[0])
self.send_header("Location", url)
self.end_headers()
def handle_static(self):
"""Handle a request for static content.
POST is not supported.
"""
if self.command == "POST":
self.send_error(HTTPResponses.NOT_IMPLEMENTED,
"URL requested does not accept POST")
return
try:
self.stat = self.vfs.stat(self.path_translated)
except IOError, e:
self.send_error(HTTPResponses.INTERNAL_ERROR,
"Could not stat file, even after self.vfs.isfile:" + `e`)
return
if self.try_if_modified_since():
return
self.choose_content_type()
if self.content_type.startswith("text/"):
mode = 'r'
else:
mode = 'rb'
try:
f = self.vfs.open(self.path_translated, mode)
except:
self.send_error(HTTPResponses.INTERNAL_ERROR,
"Could not open file")
raise
try:
self.send_response(HTTPResponses.OK)
self.send_header("Content-type", self.content_type)
self.send_header("Last-Modified",
rfc822.formatdate(self.stat.st_mtime))
self.end_headers()
if self.command == "GET":
shutil.copyfileobj(f, self.wfile)
finally:
f.close()
def try_if_modified_since(self):
"""Can we return ``NOT_MODIFIED`` for a ``If-Modified-Since`` header?
If this is possible, handle the request, and return ``1``. Otherwise,
return ``0``.
"""
if wasModifiedSince(self.headers.get("if-modified-since"),
self.stat.st_mtime, self.stat.st_size):
return 0
self.send_response(HTTPResponses.NOT_MODIFIED, "Not modified")
self.end_headers()
return 1
def choose_content_type(self):
"""Set ``self.content_type`` for ``self.path_translated``.
This code uses ``mimetypes.types_map`` and defaults to "text/plain".
"""
types_map = mimetypes.types_map
self.content_type = (types_map.get(self.extension, None) or
types_map.get(self.extension.lower(), None) or
"text/plain")
def handle_aquarium(self):
"""Forward a request to Aquarium.
Call ``self.launch_aquarium``. I will try to catch any exceptions that
Aquarium lets through, but it might have already set the response and
generated some output. After catching the exception and sending an
error message, I will reraise it so that it'll end up in the logs. Oh
well, it's better than nothing.
Note that HEAD is not supported.
"""
if self.command == "HEAD":
self.send_error(HTTPResponses.NOT_IMPLEMENTED,
"URL requested does not accept HEAD")
return
try:
self.launch_aquarium()
except Exception, e:
if not self.server.is_disconnect_error(e):
self.send_error(HTTPResponses.INTERNAL_ERROR,
"The application raised an exception")
raise
def launch_aquarium(self):
"""Actually launch Aquarium.
Launch the ``self.server_ctx.aquarium_entry_point``. Set ``aquarium``.
"""
from aquarium.wsadaptor.GlassAdaptor import GlassAdaptor
moduleName = self.server_ctx.aquarium_entry_point
name = moduleName.split(".")[-1]
module = __import__(moduleName, {}, {}, [name])
Aquarium = getattr(module, name)
self.aquarium = Aquarium(GlassAdaptor(self.create_pyscript_globals()))
self.aquarium()
def handle_pyscript(self):
"""Pass a request on to a pyscript.
The script will be executed via ``exec``.
The application *should* deal with its own exceptions. I will try to
catch exceptions raised in ``exec``, but the script might have
already set the response and generated some output. After catching the
exception and sending an error message, I will reraise it so that it'll
end up in the logs. Oh well, it's better than nothing.
``PATH_INFO`` is not supported. It would be *difficult* to handle the
Aquarium case as well as the ``PATH_INFO`` case. ``HEAD`` is not
supported. Last of all, do *not* use pyscripts if you are using coro
since you'll probably end up doing a context switch, which isn't
allowed in an ``exec``.
"""
if self.command == "HEAD":
self.send_error(HTTPResponses.NOT_IMPLEMENTED,
"URL requested does not accept HEAD")
return
try:
f = self.vfs.open(self.path_translated)
try:
# I can't just pass f to exec because it won't accept a
# StringIO object as a valid file handle, but that's the best
# that vfs.Zip can provide.
contents = f.read()
finally:
f.close()
exec contents in self.create_pyscript_globals()
except:
self.send_error(HTTPResponses.INTERNAL_ERROR,
"The pyscript raised an exception")
raise
def create_pyscript_globals(self):
"""Create the dictionary of globals passed to pyscripts or Aquarium."""
return {
"__name__": "__main__",
"stdin": self.rfile,
"stdout": self.wfile,
"env": self.create_cgi_env(),
"send_response": self.send_response
}
def create_cgi_env(self):
"""Create a dict containing a `CGI environment`_.
I do not support the following:
GATEWAY_INTERFACE
It's not *really* CGI.
AUTH_TYPE, REMOTE_USER, REMOTE_IDENT
No authentication implemented.
PATH_INFO
It's always "".
Per `the following`_, I also set ``HTTPS="on"`` if SSL is being used.
.. _CGI environment: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
.. _the following: http://www.cgi101.com/class/ch3/text.html
Reverse DNS lookups are turned off, so ``REMOTE_HOST = REMOTE_ADDR``.
Subclasses are welcome to use ``self.address_string()`` if they so
desire.
"""
env = {
"SERVER_SOFTWARE": self.version_string(),
"SERVER_NAME": (self.server_ctx.server_name or
self.server_name),
"SERVER_PROTOCOL": self.protocol_version,
"SERVER_PORT": str(self.server_port),
"REQUEST_METHOD": self.command,
"PATH_INFO": "",
"PATH_TRANSLATED": self.path_translated,
"SCRIPT_NAME": self.url_path,
"QUERY_STRING": self.query_string,
"REMOTE_ADDR": self.client_address[0],
"REMOTE_HOST": self.client_address[0],
"CONTENT_TYPE": (self.headers.typeheader or
self.headers.type)
}
for (key, value) in self.headers.items():
key = "HTTP_" + re.sub("-", "_", key).upper()
env[key] = value
if env.has_key("HTTP_CONTENT_LENGTH"):
env ["CONTENT_LENGTH"] = env["HTTP_CONTENT_LENGTH"]
if self.server.is_secure:
env["HTTPS"] = "on"
return env
|