Lib/CGIHTTPServer.py

   1 """CGI-savvy HTTP Server.
   2
   3 This module builds on SimpleHTTPServer by implementing GET and POST
   4 requests to cgi-bin scripts.
   5
   6 If the os.fork() function is not present (e.g. on Windows),
   7 os.popen2() is used as a fallback, with slightly altered semantics; if
   8 that function is not present either (e.g. on Macintosh), only Python
   9 scripts are supported, and they are executed by the current process.
  10
  11 In all cases, the implementation is intentionally naive -- all
  12 requests are executed sychronously.
  13
  14 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
  15 -- it may execute arbitrary Python code or external programs.
  16
  17 Note that status code 200 is sent prior to execution of a CGI script, so
  18 scripts cannot send other status codes such as 302 (redirect).
  19 """
  20
  21
  22 __version__ = "0.4"
  23
  24 __all__ = ["CGIHTTPRequestHandler"]
  25
  26 import os
  27 import sys
  28 import urllib
  29 import BaseHTTPServer
  30 import SimpleHTTPServer
  31 import select
  32
  33
  34 class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
  35
  36     """Complete HTTP server with GET, HEAD and POST commands.
  37
  38     GET and HEAD also support running CGI scripts.
  39
  40     The POST command is *only* implemented for CGI scripts.
  41
  42     """
  43
  44     # Determine platform specifics
  45     have_fork = hasattr(os, 'fork')
  46     have_popen2 = hasattr(os, 'popen2')
  47     have_popen3 = hasattr(os, 'popen3')
  48
  49     # Make rfile unbuffered -- we need to read one line and then pass
  50     # the rest to a subprocess, so we can't use buffered input.
  51     rbufsize = 0
  52
  53     def do_POST(self):
  54         """Serve a POST request.
  55
  56         This is only implemented for CGI scripts.
  57
  58         """
  59
  60         if self.is_cgi():
  61             self.run_cgi()
  62         else:
  63             self.send_error(501, "Can only POST to CGI scripts")
  64
  65     def send_head(self):
  66         """Version of send_head that support CGI scripts"""
  67         if self.is_cgi():
  68             return self.run_cgi()
  69         else:
  70             return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
  71
  72     def is_cgi(self):
  73         """Test whether self.path corresponds to a CGI script.
  74
  75         Returns True and updates the cgi_info attribute to the tuple
  76         (dir, rest) if self.path requires running a CGI script.
  77         Returns False otherwise.
  78
  79         If any exception is raised, the caller should assume that
  80         self.path was rejected as invalid and act accordingly.
  81
  82         The default implementation tests whether the normalized url
  83         path begins with one of the strings in self.cgi_directories
  84         (and the next character is a '/' or the end of the string).
  85         """
  86         splitpath = _url_collapse_path_split(self.path)
  87         if splitpath[0] in self.cgi_directories:
  88             self.cgi_info = splitpath
  89             return True
  90         return False
  91
  92     cgi_directories = ['/cgi-bin', '/htbin']
  93
  94     def is_executable(self, path):
  95         """Test whether argument path is an executable file."""
  96         return executable(path)
  97
  98     def is_python(self, path):
  99         """Test whether argument path is a Python script."""
 100         head, tail = os.path.splitext(path)
 101         return tail.lower() in (".py", ".pyw")
 102
 103     def run_cgi(self):
 104         """Execute a CGI script."""
 105         path = self.path
 106         dir, rest = self.cgi_info
 107
 108         i = path.find('/', len(dir) + 1)
 109         while i >= 0:
 110             nextdir = path[:i]
 111             nextrest = path[i+1:]
 112
 113             scriptdir = self.translate_path(nextdir)
 114             if os.path.isdir(scriptdir):
 115                 dir, rest = nextdir, nextrest
 116                 i = path.find('/', len(dir) + 1)
 117             else:
 118                 break
 119
 120         # find an explicit query string, if present.
 121         i = rest.rfind('?')
 122         if i >= 0:
 123             rest, query = rest[:i], rest[i+1:]
 124         else:
 125             query = ''
 126
 127         # dissect the part after the directory name into a script name &
 128         # a possible additional path, to be stored in PATH_INFO.
 129         i = rest.find('/')
 130         if i >= 0:
 131             script, rest = rest[:i], rest[i:]
 132         else:
 133             script, rest = rest, ''
 134
 135         scriptname = dir + '/' + script
 136         scriptfile = self.translate_path(scriptname)
 137         if not os.path.exists(scriptfile):
 138             self.send_error(404, "No such CGI script (%r)" % scriptname)
 139             return
 140         if not os.path.isfile(scriptfile):
 141             self.send_error(403, "CGI script is not a plain file (%r)" %
 142                             scriptname)
 143             return
 144         ispy = self.is_python(scriptname)
 145         if not ispy:
 146             if not (self.have_fork or self.have_popen2 or self.have_popen3):
 147                 self.send_error(403, "CGI script is not a Python script (%r)" %
 148                                 scriptname)
 149                 return
 150             if not self.is_executable(scriptfile):
 151                 self.send_error(403, "CGI script is not executable (%r)" %
 152                                 scriptname)
 153                 return
 154
 155         # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
 156         # XXX Much of the following could be prepared ahead of time!
 157         env = {}
 158         env['SERVER_SOFTWARE'] = self.version_string()
 159         env['SERVER_NAME'] = self.server.server_name
 160         env['GATEWAY_INTERFACE'] = 'CGI/1.1'
 161         env['SERVER_PROTOCOL'] = self.protocol_version
 162         env['SERVER_PORT'] = str(self.server.server_port)
 163         env['REQUEST_METHOD'] = self.command
 164         uqrest = urllib.unquote(rest)
 165         env['PATH_INFO'] = uqrest
 166         env['PATH_TRANSLATED'] = self.translate_path(uqrest)
 167         env['SCRIPT_NAME'] = scriptname
 168         if query:
 169             env['QUERY_STRING'] = query
 170         host = self.address_string()
 171         if host != self.client_address[0]:
 172             env['REMOTE_HOST'] = host
 173         env['REMOTE_ADDR'] = self.client_address[0]
 174         authorization = self.headers.getheader("authorization")
 175         if authorization:
 176             authorization = authorization.split()
 177             if len(authorization) == 2:
 178                 import base64, binascii
 179                 env['AUTH_TYPE'] = authorization[0]
 180                 if authorization[0].lower() == "basic":
 181                     try:
 182                         authorization = base64.decodestring(authorization[1])
 183                     except binascii.Error:
 184                         pass
 185                     else:
 186                         authorization = authorization.split(':')
 187                         if len(authorization) == 2:
 188                             env['REMOTE_USER'] = authorization[0]
 189         # XXX REMOTE_IDENT
 190         if self.headers.typeheader is None:
 191             env['CONTENT_TYPE'] = self.headers.type
 192         else:
 193             env['CONTENT_TYPE'] = self.headers.typeheader
 194         length = self.headers.getheader('content-length')
 195         if length:
 196             env['CONTENT_LENGTH'] = length
 197         referer = self.headers.getheader('referer')
 198         if referer:
 199             env['HTTP_REFERER'] = referer
 200         accept = []
 201         for line in self.headers.getallmatchingheaders('accept'):
 202             if line[:1] in "\t\n\r ":
 203                 accept.append(line.strip())
 204             else:
 205                 accept = accept + line[7:].split(',')
 206         env['HTTP_ACCEPT'] = ','.join(accept)
 207         ua = self.headers.getheader('user-agent')
 208         if ua:
 209             env['HTTP_USER_AGENT'] = ua
 210         co = filter(None, self.headers.getheaders('cookie'))
 211         if co:
 212             env['HTTP_COOKIE'] = ', '.join(co)
 213         # XXX Other HTTP_* headers
 214         # Since we're setting the env in the parent, provide empty
 215         # values to override previously set values
 216         for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
 217                   'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
 218             env.setdefault(k, "")
 219         os.environ.update(env)
 220
 221         self.send_response(200, "Script output follows")
 222
 223         decoded_query = query.replace('+', ' ')
 224
 225         if self.have_fork:
 226             # Unix -- fork as we should
 227             args = [script]
 228             if '=' not in decoded_query:
 229                 args.append(decoded_query)
 230             nobody = nobody_uid()
 231             self.wfile.flush() # Always flush before forking
 232             pid = os.fork()
 233             if pid != 0:
 234                 # Parent
 235                 pid, sts = os.waitpid(pid, 0)
 236                 # throw away additional data [see bug #427345]
 237                 while select.select([self.rfile], [], [], 0)[0]:
 238                     if not self.rfile.read(1):
 239                         break
 240                 if sts:
 241                     self.log_error("CGI script exit status %#x", sts)
 242                 return
 243             # Child
 244             try:
 245                 try:
 246                     os.setuid(nobody)
 247                 except os.error:
 248                     pass
 249                 os.dup2(self.rfile.fileno(), 0)
 250                 os.dup2(self.wfile.fileno(), 1)
 251                 os.execve(scriptfile, args, os.environ)
 252             except:
 253                 self.server.handle_error(self.request, self.client_address)
 254                 os._exit(127)
 255
 256         else:
 257             # Non Unix - use subprocess
 258             import subprocess
 259             cmdline = [scriptfile]
 260             if self.is_python(scriptfile):
 261                 interp = sys.executable
 262                 if interp.lower().endswith("w.exe"):
 263                     # On Windows, use python.exe, not pythonw.exe
 264                     interp = interp[:-5] + interp[-4:]
 265                 cmdline = [interp, '-u'] + cmdline
 266             if '=' not in query:
 267                 cmdline.append(query)
 268
 269             self.log_message("command: %s", subprocess.list2cmdline(cmdline))
 270             try:
 271                 nbytes = int(length)
 272             except (TypeError, ValueError):
 273                 nbytes = 0
 274             p = subprocess.Popen(cmdline,
 275                                  stdin = subprocess.PIPE,
 276                                  stdout = subprocess.PIPE,
 277                                  stderr = subprocess.PIPE
 278                                 )
 279             if self.command.lower() == "post" and nbytes > 0:
 280                 data = self.rfile.read(nbytes)
 281             else:
 282                 data = None
 283             # throw away additional data [see bug #427345]
 284             while select.select([self.rfile._sock], [], [], 0)[0]:
 285                 if not self.rfile._sock.recv(1):
 286                     break
 287             stdout, stderr = p.communicate(data)
 288             self.wfile.write(stdout)
 289             if stderr:
 290                 self.log_error('%s', stderr)
 291             status = p.returncode
 292             if status:
 293                 self.log_error("CGI script exit status %#x", status)
 294             else:
 295                 self.log_message("CGI script exited OK")
 296
 297
 298 # TODO(gregory.p.smith): Move this into an appropriate library.
 299 def _url_collapse_path_split(path):
 300     """
 301     Given a URL path, remove extra '/'s and '.' path elements and collapse
 302     any '..' references.
 303
 304     Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
 305
 306     Returns: A tuple of (head, tail) where tail is everything after the final /
 307     and head is everything before it.  Head will always start with a '/' and,
 308     if it contains anything else, never have a trailing '/'.
 309
 310     Raises: IndexError if too many '..' occur within the path.
 311     """
 312     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
 313     # path semantics rather than local operating system semantics.
 314     path_parts = []
 315     for part in path.split('/'):
 316         if part == '.':
 317             path_parts.append('')
 318         else:
 319             path_parts.append(part)
 320     # Filter out blank non trailing parts before consuming the '..'.
 321     path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
 322     if path_parts:
 323         tail_part = path_parts.pop()
 324     else:
 325         tail_part = ''
 326     head_parts = []
 327     for part in path_parts:
 328         if part == '..':
 329             head_parts.pop()
 330         else:
 331             head_parts.append(part)
 332     if tail_part and tail_part == '..':
 333         head_parts.pop()
 334         tail_part = ''
 335     return ('/' + '/'.join(head_parts), tail_part)
 336
 337
 338 nobody = None
 339
 340 def nobody_uid():
 341     """Internal routine to get nobody's uid"""
 342     global nobody
 343     if nobody:
 344         return nobody
 345     try:
 346         import pwd
 347     except ImportError:
 348         return -1
 349     try:
 350         nobody = pwd.getpwnam('nobody')[2]
 351     except KeyError:
 352         nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
 353     return nobody
 354
 355
 356 def executable(path):
 357     """Test for executable file."""
 358     try:
 359         st = os.stat(path)
 360     except os.error:
 361         return False
 362     return st.st_mode & 0111 != 0
 363
 364
 365 def test(HandlerClass = CGIHTTPRequestHandler,
 366          ServerClass = BaseHTTPServer.HTTPServer):
 367     SimpleHTTPServer.test(HandlerClass, ServerClass)
 368
 369
 370 if __name__ == '__main__':
 371     test()