Lib/cgi.py

   1 #! /usr/local/bin/python
   2
   3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake.  It is
   4 # intentionally NOT "/usr/bin/env python".  On many systems
   5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
   6 # scripts, and /usr/local/bin is the default directory where Python is
   7 # installed, so /usr/bin/env would be unable to find python.  Granted,
   8 # binary installations by Linux vendors often install Python in
   9 # /usr/bin.  So let those vendors patch cgi.py to match their choice
  10 # of installation.
  11
  12 """Support module for CGI (Common Gateway Interface) scripts.
  13
  14 This module defines a number of utilities for use by CGI scripts
  15 written in Python.
  16 """
  17
  18 # XXX Perhaps there should be a slimmed version that doesn't contain
  19 # all those backwards compatible and debugging classes and functions?
  20
  21 # History
  22 # -------
  23 #
  24 # Michael McLay started this module.  Steve Majewski changed the
  25 # interface to SvFormContentDict and FormContentDict.  The multipart
  26 # parsing was inspired by code submitted by Andreas Paepcke.  Guido van
  27 # Rossum rewrote, reformatted and documented the module and is currently
  28 # responsible for its maintenance.
  29 #
  30
  31 __version__ = "2.6"
  32
  33
  34 # Imports
  35 # =======
  36
  37 from operator import attrgetter
  38 import sys
  39 import os
  40 import urllib
  41 import mimetools
  42 import rfc822
  43 import UserDict
  44 try:
  45     from cStringIO import StringIO
  46 except ImportError:
  47     from StringIO import StringIO
  48
  49 __all__ = ["MiniFieldStorage", "FieldStorage", "FormContentDict",
  50            "SvFormContentDict", "InterpFormContentDict", "FormContent",
  51            "parse", "parse_qs", "parse_qsl", "parse_multipart",
  52            "parse_header", "print_exception", "print_environ",
  53            "print_form", "print_directory", "print_arguments",
  54            "print_environ_usage", "escape"]
  55
  56 # Logging support
  57 # ===============
  58
  59 logfile = ""            # Filename to log to, if not empty
  60 logfp = None            # File object to log to, if not None
  61
  62 def initlog(*allargs):
  63     """Write a log message, if there is a log file.
  64
  65     Even though this function is called initlog(), you should always
  66     use log(); log is a variable that is set either to initlog
  67     (initially), to dolog (once the log file has been opened), or to
  68     nolog (when logging is disabled).
  69
  70     The first argument is a format string; the remaining arguments (if
  71     any) are arguments to the % operator, so e.g.
  72         log("%s: %s", "a", "b")
  73     will write "a: b" to the log file, followed by a newline.
  74
  75     If the global logfp is not None, it should be a file object to
  76     which log data is written.
  77
  78     If the global logfp is None, the global logfile may be a string
  79     giving a filename to open, in append mode.  This file should be
  80     world writable!!!  If the file can't be opened, logging is
  81     silently disabled (since there is no safe place where we could
  82     send an error message).
  83
  84     """
  85     global logfp, log
  86     if logfile and not logfp:
  87         try:
  88             logfp = open(logfile, "a")
  89         except IOError:
  90             pass
  91     if not logfp:
  92         log = nolog
  93     else:
  94         log = dolog
  95     log(*allargs)
  96
  97 def dolog(fmt, *args):
  98     """Write a log message to the log file.  See initlog() for docs."""
  99     logfp.write(fmt%args + "\n")
 100
 101 def nolog(*allargs):
 102     """Dummy function, assigned to log when logging is disabled."""
 103     pass
 104
 105 log = initlog           # The current logging function
 106
 107
 108 # Parsing functions
 109 # =================
 110
 111 # Maximum input we will accept when REQUEST_METHOD is POST
 112 # 0 ==> unlimited input
 113 maxlen = 0
 114
 115 def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
 116     """Parse a query in the environment or from a file (default stdin)
 117
 118         Arguments, all optional:
 119
 120         fp              : file pointer; default: sys.stdin
 121
 122         environ         : environment dictionary; default: os.environ
 123
 124         keep_blank_values: flag indicating whether blank values in
 125             URL encoded forms should be treated as blank strings.
 126             A true value indicates that blanks should be retained as
 127             blank strings.  The default false value indicates that
 128             blank values are to be ignored and treated as if they were
 129             not included.
 130
 131         strict_parsing: flag indicating what to do with parsing errors.
 132             If false (the default), errors are silently ignored.
 133             If true, errors raise a ValueError exception.
 134     """
 135     if fp is None:
 136         fp = sys.stdin
 137     if not 'REQUEST_METHOD' in environ:
 138         environ['REQUEST_METHOD'] = 'GET'       # For testing stand-alone
 139     if environ['REQUEST_METHOD'] == 'POST':
 140         ctype, pdict = parse_header(environ['CONTENT_TYPE'])
 141         if ctype == 'multipart/form-data':
 142             return parse_multipart(fp, pdict)
 143         elif ctype == 'application/x-www-form-urlencoded':
 144             clength = int(environ['CONTENT_LENGTH'])
 145             if maxlen and clength > maxlen:
 146                 raise ValueError, 'Maximum content length exceeded'
 147             qs = fp.read(clength)
 148         else:
 149             qs = ''                     # Unknown content-type
 150         if 'QUERY_STRING' in environ:
 151             if qs: qs = qs + '&'
 152             qs = qs + environ['QUERY_STRING']
 153         elif sys.argv[1:]:
 154             if qs: qs = qs + '&'
 155             qs = qs + sys.argv[1]
 156         environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
 157     elif 'QUERY_STRING' in environ:
 158         qs = environ['QUERY_STRING']
 159     else:
 160         if sys.argv[1:]:
 161             qs = sys.argv[1]
 162         else:
 163             qs = ""
 164         environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
 165     return parse_qs(qs, keep_blank_values, strict_parsing)
 166
 167
 168 def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
 169     """Parse a query given as a string argument.
 170
 171         Arguments:
 172
 173         qs: URL-encoded query string to be parsed
 174
 175         keep_blank_values: flag indicating whether blank values in
 176             URL encoded queries should be treated as blank strings.
 177             A true value indicates that blanks should be retained as
 178             blank strings.  The default false value indicates that
 179             blank values are to be ignored and treated as if they were
 180             not included.
 181
 182         strict_parsing: flag indicating what to do with parsing errors.
 183             If false (the default), errors are silently ignored.
 184             If true, errors raise a ValueError exception.
 185     """
 186     dict = {}
 187     for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
 188         if name in dict:
 189             dict[name].append(value)
 190         else:
 191             dict[name] = [value]
 192     return dict
 193
 194 def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
 195     """Parse a query given as a string argument.
 196
 197     Arguments:
 198
 199     qs: URL-encoded query string to be parsed
 200
 201     keep_blank_values: flag indicating whether blank values in
 202         URL encoded queries should be treated as blank strings.  A
 203         true value indicates that blanks should be retained as blank
 204         strings.  The default false value indicates that blank values
 205         are to be ignored and treated as if they were  not included.
 206
 207     strict_parsing: flag indicating what to do with parsing errors. If
 208         false (the default), errors are silently ignored. If true,
 209         errors raise a ValueError exception.
 210
 211     Returns a list, as G-d intended.
 212     """
 213     pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 214     r = []
 215     for name_value in pairs:
 216         if not name_value and not strict_parsing:
 217             continue
 218         nv = name_value.split('=', 1)
 219         if len(nv) != 2:
 220             if strict_parsing:
 221                 raise ValueError, "bad query field: %r" % (name_value,)
 222             # Handle case of a control-name with no equal sign
 223             if keep_blank_values:
 224                 nv.append('')
 225             else:
 226                 continue
 227         if len(nv[1]) or keep_blank_values:
 228             name = urllib.unquote(nv[0].replace('+', ' '))
 229             value = urllib.unquote(nv[1].replace('+', ' '))
 230             r.append((name, value))
 231
 232     return r
 233
 234
 235 def parse_multipart(fp, pdict):
 236     """Parse multipart input.
 237
 238     Arguments:
 239     fp   : input file
 240     pdict: dictionary containing other parameters of content-type header
 241
 242     Returns a dictionary just like parse_qs(): keys are the field names, each
 243     value is a list of values for that field.  This is easy to use but not
 244     much good if you are expecting megabytes to be uploaded -- in that case,
 245     use the FieldStorage class instead which is much more flexible.  Note
 246     that content-type is the raw, unparsed contents of the content-type
 247     header.
 248
 249     XXX This does not parse nested multipart parts -- use FieldStorage for
 250     that.
 251
 252     XXX This should really be subsumed by FieldStorage altogether -- no
 253     point in having two implementations of the same parsing algorithm.
 254     Also, FieldStorage protects itself better against certain DoS attacks
 255     by limiting the size of the data read in one chunk.  The API here
 256     does not support that kind of protection.  This also affects parse()
 257     since it can call parse_multipart().
 258
 259     """
 260     boundary = ""
 261     if 'boundary' in pdict:
 262         boundary = pdict['boundary']
 263     if not valid_boundary(boundary):
 264         raise ValueError,  ('Invalid boundary in multipart form: %r'
 265                             % (boundary,))
 266
 267     nextpart = "--" + boundary
 268     lastpart = "--" + boundary + "--"
 269     partdict = {}
 270     terminator = ""
 271
 272     while terminator != lastpart:
 273         bytes = -1
 274         data = None
 275         if terminator:
 276             # At start of next part.  Read headers first.
 277             headers = mimetools.Message(fp)
 278             clength = headers.getheader('content-length')
 279             if clength:
 280                 try:
 281                     bytes = int(clength)
 282                 except ValueError:
 283                     pass
 284             if bytes > 0:
 285                 if maxlen and bytes > maxlen:
 286                     raise ValueError, 'Maximum content length exceeded'
 287                 data = fp.read(bytes)
 288             else:
 289                 data = ""
 290         # Read lines until end of part.
 291         lines = []
 292         while 1:
 293             line = fp.readline()
 294             if not line:
 295                 terminator = lastpart # End outer loop
 296                 break
 297             if line[:2] == "--":
 298                 terminator = line.strip()
 299                 if terminator in (nextpart, lastpart):
 300                     break
 301             lines.append(line)
 302         # Done with part.
 303         if data is None:
 304             continue
 305         if bytes < 0:
 306             if lines:
 307                 # Strip final line terminator
 308                 line = lines[-1]
 309                 if line[-2:] == "\r\n":
 310                     line = line[:-2]
 311                 elif line[-1:] == "\n":
 312                     line = line[:-1]
 313                 lines[-1] = line
 314                 data = "".join(lines)
 315         line = headers['content-disposition']
 316         if not line:
 317             continue
 318         key, params = parse_header(line)
 319         if key != 'form-data':
 320             continue
 321         if 'name' in params:
 322             name = params['name']
 323         else:
 324             continue
 325         if name in partdict:
 326             partdict[name].append(data)
 327         else:
 328             partdict[name] = [data]
 329
 330     return partdict
 331
 332
 333 def parse_header(line):
 334     """Parse a Content-type like header.
 335
 336     Return the main content-type and a dictionary of options.
 337
 338     """
 339     plist = [x.strip() for x in line.split(';')]
 340     key = plist.pop(0).lower()
 341     pdict = {}
 342     for p in plist:
 343         i = p.find('=')
 344         if i >= 0:
 345             name = p[:i].strip().lower()
 346             value = p[i+1:].strip()
 347             if len(value) >= 2 and value[0] == value[-1] == '"':
 348                 value = value[1:-1]
 349                 value = value.replace('\\\\', '\\').replace('\\"', '"')
 350             pdict[name] = value
 351     return key, pdict
 352
 353
 354 # Classes for field storage
 355 # =========================
 356
 357 class MiniFieldStorage:
 358
 359     """Like FieldStorage, for use when no file uploads are possible."""
 360
 361     # Dummy attributes
 362     filename = None
 363     list = None
 364     type = None
 365     file = None
 366     type_options = {}
 367     disposition = None
 368     disposition_options = {}
 369     headers = {}
 370
 371     def __init__(self, name, value):
 372         """Constructor from field name and value."""
 373         self.name = name
 374         self.value = value
 375         # self.file = StringIO(value)
 376
 377     def __repr__(self):
 378         """Return printable representation."""
 379         return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
 380
 381
 382 class FieldStorage:
 383
 384     """Store a sequence of fields, reading multipart/form-data.
 385
 386     This class provides naming, typing, files stored on disk, and
 387     more.  At the top level, it is accessible like a dictionary, whose
 388     keys are the field names.  (Note: None can occur as a field name.)
 389     The items are either a Python list (if there's multiple values) or
 390     another FieldStorage or MiniFieldStorage object.  If it's a single
 391     object, it has the following attributes:
 392
 393     name: the field name, if specified; otherwise None
 394
 395     filename: the filename, if specified; otherwise None; this is the
 396         client side filename, *not* the file name on which it is
 397         stored (that's a temporary file you don't deal with)
 398
 399     value: the value as a *string*; for file uploads, this
 400         transparently reads the file every time you request the value
 401
 402     file: the file(-like) object from which you can read the data;
 403         None if the data is stored a simple string
 404
 405     type: the content-type, or None if not specified
 406
 407     type_options: dictionary of options specified on the content-type
 408         line
 409
 410     disposition: content-disposition, or None if not specified
 411
 412     disposition_options: dictionary of corresponding options
 413
 414     headers: a dictionary(-like) object (sometimes rfc822.Message or a
 415         subclass thereof) containing *all* headers
 416
 417     The class is subclassable, mostly for the purpose of overriding
 418     the make_file() method, which is called internally to come up with
 419     a file open for reading and writing.  This makes it possible to
 420     override the default choice of storing all files in a temporary
 421     directory and unlinking them as soon as they have been opened.
 422
 423     """
 424
 425     def __init__(self, fp=None, headers=None, outerboundary="",
 426                  environ=os.environ, keep_blank_values=0, strict_parsing=0):
 427         """Constructor.  Read multipart/* until last part.
 428
 429         Arguments, all optional:
 430
 431         fp              : file pointer; default: sys.stdin
 432             (not used when the request method is GET)
 433
 434         headers         : header dictionary-like object; default:
 435             taken from environ as per CGI spec
 436
 437         outerboundary   : terminating multipart boundary
 438             (for internal use only)
 439
 440         environ         : environment dictionary; default: os.environ
 441
 442         keep_blank_values: flag indicating whether blank values in
 443             URL encoded forms should be treated as blank strings.
 444             A true value indicates that blanks should be retained as
 445             blank strings.  The default false value indicates that
 446             blank values are to be ignored and treated as if they were
 447             not included.
 448
 449         strict_parsing: flag indicating what to do with parsing errors.
 450             If false (the default), errors are silently ignored.
 451             If true, errors raise a ValueError exception.
 452
 453         """
 454         method = 'GET'
 455         self.keep_blank_values = keep_blank_values
 456         self.strict_parsing = strict_parsing
 457         if 'REQUEST_METHOD' in environ:
 458             method = environ['REQUEST_METHOD'].upper()
 459         self.qs_on_post = None
 460         if method == 'GET' or method == 'HEAD':
 461             if 'QUERY_STRING' in environ:
 462                 qs = environ['QUERY_STRING']
 463             elif sys.argv[1:]:
 464                 qs = sys.argv[1]
 465             else:
 466                 qs = ""
 467             fp = StringIO(qs)
 468             if headers is None:
 469                 headers = {'content-type':
 470                            "application/x-www-form-urlencoded"}
 471         if headers is None:
 472             headers = {}
 473             if method == 'POST':
 474                 # Set default content-type for POST to what's traditional
 475                 headers['content-type'] = "application/x-www-form-urlencoded"
 476             if 'CONTENT_TYPE' in environ:
 477                 headers['content-type'] = environ['CONTENT_TYPE']
 478             if 'QUERY_STRING' in environ:
 479                 self.qs_on_post = environ['QUERY_STRING']
 480             if 'CONTENT_LENGTH' in environ:
 481                 headers['content-length'] = environ['CONTENT_LENGTH']
 482         self.fp = fp or sys.stdin
 483         self.headers = headers
 484         self.outerboundary = outerboundary
 485
 486         # Process content-disposition header
 487         cdisp, pdict = "", {}
 488         if 'content-disposition' in self.headers:
 489             cdisp, pdict = parse_header(self.headers['content-disposition'])
 490         self.disposition = cdisp
 491         self.disposition_options = pdict
 492         self.name = None
 493         if 'name' in pdict:
 494             self.name = pdict['name']
 495         self.filename = None
 496         if 'filename' in pdict:
 497             self.filename = pdict['filename']
 498
 499         # Process content-type header
 500         #
 501         # Honor any existing content-type header.  But if there is no
 502         # content-type header, use some sensible defaults.  Assume
 503         # outerboundary is "" at the outer level, but something non-false
 504         # inside a multi-part.  The default for an inner part is text/plain,
 505         # but for an outer part it should be urlencoded.  This should catch
 506         # bogus clients which erroneously forget to include a content-type
 507         # header.
 508         #
 509         # See below for what we do if there does exist a content-type header,
 510         # but it happens to be something we don't understand.
 511         if 'content-type' in self.headers:
 512             ctype, pdict = parse_header(self.headers['content-type'])
 513         elif self.outerboundary or method != 'POST':
 514             ctype, pdict = "text/plain", {}
 515         else:
 516             ctype, pdict = 'application/x-www-form-urlencoded', {}
 517         self.type = ctype
 518         self.type_options = pdict
 519         self.innerboundary = ""
 520         if 'boundary' in pdict:
 521             self.innerboundary = pdict['boundary']
 522         clen = -1
 523         if 'content-length' in self.headers:
 524             try:
 525                 clen = int(self.headers['content-length'])
 526             except ValueError:
 527                 pass
 528             if maxlen and clen > maxlen:
 529                 raise ValueError, 'Maximum content length exceeded'
 530         self.length = clen
 531
 532         self.list = self.file = None
 533         self.done = 0
 534         if ctype == 'application/x-www-form-urlencoded':
 535             self.read_urlencoded()
 536         elif ctype[:10] == 'multipart/':
 537             self.read_multi(environ, keep_blank_values, strict_parsing)
 538         else:
 539             self.read_single()
 540
 541     def __repr__(self):
 542         """Return a printable representation."""
 543         return "FieldStorage(%r, %r, %r)" % (
 544                 self.name, self.filename, self.value)
 545
 546     def __iter__(self):
 547         return iter(self.keys())
 548
 549     def __getattr__(self, name):
 550         if name != 'value':
 551             raise AttributeError, name
 552         if self.file:
 553             self.file.seek(0)
 554             value = self.file.read()
 555             self.file.seek(0)
 556         elif self.list is not None:
 557             value = self.list
 558         else:
 559             value = None
 560         return value
 561
 562     def __getitem__(self, key):
 563         """Dictionary style indexing."""
 564         if self.list is None:
 565             raise TypeError, "not indexable"
 566         found = []
 567         for item in self.list:
 568             if item.name == key: found.append(item)
 569         if not found:
 570             raise KeyError, key
 571         if len(found) == 1:
 572             return found[0]
 573         else:
 574             return found
 575
 576     def getvalue(self, key, default=None):
 577         """Dictionary style get() method, including 'value' lookup."""
 578         if key in self:
 579             value = self[key]
 580             if type(value) is type([]):
 581                 return map(attrgetter('value'), value)
 582             else:
 583                 return value.value
 584         else:
 585             return default
 586
 587     def getfirst(self, key, default=None):
 588         """ Return the first value received."""
 589         if key in self:
 590             value = self[key]
 591             if type(value) is type([]):
 592                 return value[0].value
 593             else:
 594                 return value.value
 595         else:
 596             return default
 597
 598     def getlist(self, key):
 599         """ Return list of received values."""
 600         if key in self:
 601             value = self[key]
 602             if type(value) is type([]):
 603                 return map(attrgetter('value'), value)
 604             else:
 605                 return [value.value]
 606         else:
 607             return []
 608
 609     def keys(self):
 610         """Dictionary style keys() method."""
 611         if self.list is None:
 612             raise TypeError, "not indexable"
 613         return list(set(item.name for item in self.list))
 614
 615     def has_key(self, key):
 616         """Dictionary style has_key() method."""
 617         if self.list is None:
 618             raise TypeError, "not indexable"
 619         return any(item.name == key for item in self.list)
 620
 621     def __contains__(self, key):
 622         """Dictionary style __contains__ method."""
 623         if self.list is None:
 624             raise TypeError, "not indexable"
 625         return any(item.name == key for item in self.list)
 626
 627     def __len__(self):
 628         """Dictionary style len(x) support."""
 629         return len(self.keys())
 630
 631     def __nonzero__(self):
 632         return bool(self.list)
 633
 634     def read_urlencoded(self):
 635         """Internal: read data in query string format."""
 636         qs = self.fp.read(self.length)
 637         if self.qs_on_post:
 638             qs += '&' + self.qs_on_post
 639         self.list = list = []
 640         for key, value in parse_qsl(qs, self.keep_blank_values,
 641                                     self.strict_parsing):
 642             list.append(MiniFieldStorage(key, value))
 643         self.skip_lines()
 644
 645     FieldStorageClass = None
 646
 647     def read_multi(self, environ, keep_blank_values, strict_parsing):
 648         """Internal: read a part that is itself multipart."""
 649         ib = self.innerboundary
 650         if not valid_boundary(ib):
 651             raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,)
 652         self.list = []
 653         if self.qs_on_post:
 654             for key, value in parse_qsl(self.qs_on_post, self.keep_blank_values,
 655                                         self.strict_parsing):
 656                 self.list.append(MiniFieldStorage(key, value))
 657             FieldStorageClass = None
 658
 659         klass = self.FieldStorageClass or self.__class__
 660         part = klass(self.fp, {}, ib,
 661                      environ, keep_blank_values, strict_parsing)
 662         # Throw first part away
 663         while not part.done:
 664             headers = rfc822.Message(self.fp)
 665             part = klass(self.fp, headers, ib,
 666                          environ, keep_blank_values, strict_parsing)
 667             self.list.append(part)
 668         self.skip_lines()
 669
 670     def read_single(self):
 671         """Internal: read an atomic part."""
 672         if self.length >= 0:
 673             self.read_binary()
 674             self.skip_lines()
 675         else:
 676             self.read_lines()
 677         self.file.seek(0)
 678
 679     bufsize = 8*1024            # I/O buffering size for copy to file
 680
 681     def read_binary(self):
 682         """Internal: read binary data."""
 683         self.file = self.make_file('b')
 684         todo = self.length
 685         if todo >= 0:
 686             while todo > 0:
 687                 data = self.fp.read(min(todo, self.bufsize))
 688                 if not data:
 689                     self.done = -1
 690                     break
 691                 self.file.write(data)
 692                 todo = todo - len(data)
 693
 694     def read_lines(self):
 695         """Internal: read lines until EOF or outerboundary."""
 696         self.file = self.__file = StringIO()
 697         if self.outerboundary:
 698             self.read_lines_to_outerboundary()
 699         else:
 700             self.read_lines_to_eof()
 701
 702     def __write(self, line):
 703         if self.__file is not None:
 704             if self.__file.tell() + len(line) > 1000:
 705                 self.file = self.make_file('')
 706                 self.file.write(self.__file.getvalue())
 707                 self.__file = None
 708         self.file.write(line)
 709
 710     def read_lines_to_eof(self):
 711         """Internal: read lines until EOF."""
 712         while 1:
 713             line = self.fp.readline(1<<16)
 714             if not line:
 715                 self.done = -1
 716                 break
 717             self.__write(line)
 718
 719     def read_lines_to_outerboundary(self):
 720         """Internal: read lines until outerboundary."""
 721         next = "--" + self.outerboundary
 722         last = next + "--"
 723         delim = ""
 724         last_line_lfend = True
 725         while 1:
 726             line = self.fp.readline(1<<16)
 727             if not line:
 728                 self.done = -1
 729                 break
 730             if line[:2] == "--" and last_line_lfend:
 731                 strippedline = line.strip()
 732                 if strippedline == next:
 733                     break
 734                 if strippedline == last:
 735                     self.done = 1
 736                     break
 737             odelim = delim
 738             if line[-2:] == "\r\n":
 739                 delim = "\r\n"
 740                 line = line[:-2]
 741                 last_line_lfend = True
 742             elif line[-1] == "\n":
 743                 delim = "\n"
 744                 line = line[:-1]
 745                 last_line_lfend = True
 746             else:
 747                 delim = ""
 748                 last_line_lfend = False
 749             self.__write(odelim + line)
 750
 751     def skip_lines(self):
 752         """Internal: skip lines until outer boundary if defined."""
 753         if not self.outerboundary or self.done:
 754             return
 755         next = "--" + self.outerboundary
 756         last = next + "--"
 757         last_line_lfend = True
 758         while 1:
 759             line = self.fp.readline(1<<16)
 760             if not line:
 761                 self.done = -1
 762                 break
 763             if line[:2] == "--" and last_line_lfend:
 764                 strippedline = line.strip()
 765                 if strippedline == next:
 766                     break
 767                 if strippedline == last:
 768                     self.done = 1
 769                     break
 770             last_line_lfend = line.endswith('\n')
 771
 772     def make_file(self, binary=None):
 773         """Overridable: return a readable & writable file.
 774
 775         The file will be used as follows:
 776         - data is written to it
 777         - seek(0)
 778         - data is read from it
 779
 780         The 'binary' argument is unused -- the file is always opened
 781         in binary mode.
 782
 783         This version opens a temporary file for reading and writing,
 784         and immediately deletes (unlinks) it.  The trick (on Unix!) is
 785         that the file can still be used, but it can't be opened by
 786         another process, and it will automatically be deleted when it
 787         is closed or when the current process terminates.
 788
 789         If you want a more permanent file, you derive a class which
 790         overrides this method.  If you want a visible temporary file
 791         that is nevertheless automatically deleted when the script
 792         terminates, try defining a __del__ method in a derived class
 793         which unlinks the temporary files you have created.
 794
 795         """
 796         import tempfile
 797         return tempfile.TemporaryFile("w+b")
 798
 799
 800
 801 # Backwards Compatibility Classes
 802 # ===============================
 803
 804 class FormContentDict(UserDict.UserDict):
 805     """Form content as dictionary with a list of values per field.
 806
 807     form = FormContentDict()
 808
 809     form[key] -> [value, value, ...]
 810     key in form -> Boolean
 811     form.keys() -> [key, key, ...]
 812     form.values() -> [[val, val, ...], [val, val, ...], ...]
 813     form.items() ->  [(key, [val, val, ...]), (key, [val, val, ...]), ...]
 814     form.dict == {key: [val, val, ...], ...}
 815
 816     """
 817     def __init__(self, environ=os.environ, keep_blank_values=0, strict_parsing=0):
 818         self.dict = self.data = parse(environ=environ,
 819                                       keep_blank_values=keep_blank_values,
 820                                       strict_parsing=strict_parsing)
 821         self.query_string = environ['QUERY_STRING']
 822
 823
 824 class SvFormContentDict(FormContentDict):
 825     """Form content as dictionary expecting a single value per field.
 826
 827     If you only expect a single value for each field, then form[key]
 828     will return that single value.  It will raise an IndexError if
 829     that expectation is not true.  If you expect a field to have
 830     possible multiple values, than you can use form.getlist(key) to
 831     get all of the values.  values() and items() are a compromise:
 832     they return single strings where there is a single value, and
 833     lists of strings otherwise.
 834
 835     """
 836     def __getitem__(self, key):
 837         if len(self.dict[key]) > 1:
 838             raise IndexError, 'expecting a single value'
 839         return self.dict[key][0]
 840     def getlist(self, key):
 841         return self.dict[key]
 842     def values(self):
 843         result = []
 844         for value in self.dict.values():
 845             if len(value) == 1:
 846                 result.append(value[0])
 847             else: result.append(value)
 848         return result
 849     def items(self):
 850         result = []
 851         for key, value in self.dict.items():
 852             if len(value) == 1:
 853                 result.append((key, value[0]))
 854             else: result.append((key, value))
 855         return result
 856
 857
 858 class InterpFormContentDict(SvFormContentDict):
 859     """This class is present for backwards compatibility only."""
 860     def __getitem__(self, key):
 861         v = SvFormContentDict.__getitem__(self, key)
 862         if v[0] in '0123456789+-.':
 863             try: return int(v)
 864             except ValueError:
 865                 try: return float(v)
 866                 except ValueError: pass
 867         return v.strip()
 868     def values(self):
 869         result = []
 870         for key in self.keys():
 871             try:
 872                 result.append(self[key])
 873             except IndexError:
 874                 result.append(self.dict[key])
 875         return result
 876     def items(self):
 877         result = []
 878         for key in self.keys():
 879             try:
 880                 result.append((key, self[key]))
 881             except IndexError:
 882                 result.append((key, self.dict[key]))
 883         return result
 884
 885
 886 class FormContent(FormContentDict):
 887     """This class is present for backwards compatibility only."""
 888     def values(self, key):
 889         if key in self.dict :return self.dict[key]
 890         else: return None
 891     def indexed_value(self, key, location):
 892         if key in self.dict:
 893             if len(self.dict[key]) > location:
 894                 return self.dict[key][location]
 895             else: return None
 896         else: return None
 897     def value(self, key):
 898         if key in self.dict: return self.dict[key][0]
 899         else: return None
 900     def length(self, key):
 901         return len(self.dict[key])
 902     def stripped(self, key):
 903         if key in self.dict: return self.dict[key][0].strip()
 904         else: return None
 905     def pars(self):
 906         return self.dict
 907
 908
 909 # Test/debug code
 910 # ===============
 911
 912 def test(environ=os.environ):
 913     """Robust test CGI script, usable as main program.
 914
 915     Write minimal HTTP headers and dump all information provided to
 916     the script in HTML form.
 917
 918     """
 919     print "Content-type: text/html"
 920     print
 921     sys.stderr = sys.stdout
 922     try:
 923         form = FieldStorage()   # Replace with other classes to test those
 924         print_directory()
 925         print_arguments()
 926         print_form(form)
 927         print_environ(environ)
 928         print_environ_usage()
 929         def f():
 930             exec "testing print_exception() -- <I>italics?</I>"
 931         def g(f=f):
 932             f()
 933         print "<H3>What follows is a test, not an actual exception:</H3>"
 934         g()
 935     except:
 936         print_exception()
 937
 938     print "<H1>Second try with a small maxlen...</H1>"
 939
 940     global maxlen
 941     maxlen = 50
 942     try:
 943         form = FieldStorage()   # Replace with other classes to test those
 944         print_directory()
 945         print_arguments()
 946         print_form(form)
 947         print_environ(environ)
 948     except:
 949         print_exception()
 950
 951 def print_exception(type=None, value=None, tb=None, limit=None):
 952     if type is None:
 953         type, value, tb = sys.exc_info()
 954     import traceback
 955     print
 956     print "<H3>Traceback (most recent call last):</H3>"
 957     list = traceback.format_tb(tb, limit) + \
 958            traceback.format_exception_only(type, value)
 959     print "<PRE>%s<B>%s</B></PRE>" % (
 960         escape("".join(list[:-1])),
 961         escape(list[-1]),
 962         )
 963     del tb
 964
 965 def print_environ(environ=os.environ):
 966     """Dump the shell environment as HTML."""
 967     keys = environ.keys()
 968     keys.sort()
 969     print
 970     print "<H3>Shell Environment:</H3>"
 971     print "<DL>"
 972     for key in keys:
 973         print "<DT>", escape(key), "<DD>", escape(environ[key])
 974     print "</DL>"
 975     print
 976
 977 def print_form(form):
 978     """Dump the contents of a form as HTML."""
 979     keys = form.keys()
 980     keys.sort()
 981     print
 982     print "<H3>Form Contents:</H3>"
 983     if not keys:
 984         print "<P>No form fields."
 985     print "<DL>"
 986     for key in keys:
 987         print "<DT>" + escape(key) + ":",
 988         value = form[key]
 989         print "<i>" + escape(repr(type(value))) + "</i>"
 990         print "<DD>" + escape(repr(value))
 991     print "</DL>"
 992     print
 993
 994 def print_directory():
 995     """Dump the current directory as HTML."""
 996     print
 997     print "<H3>Current Working Directory:</H3>"
 998     try:
 999         pwd = os.getcwd()
1000     except os.error, msg:
1001         print "os.error:", escape(str(msg))
1002     else:
1003         print escape(pwd)
1004     print
1005
1006 def print_arguments():
1007     print
1008     print "<H3>Command Line Arguments:</H3>"
1009     print
1010     print sys.argv
1011     print
1012
1013 def print_environ_usage():
1014     """Dump a list of environment variables used by CGI as HTML."""
1015     print """
1016 <H3>These environment variables could have been set:</H3>
1017 <UL>
1018 <LI>AUTH_TYPE
1019 <LI>CONTENT_LENGTH
1020 <LI>CONTENT_TYPE
1021 <LI>DATE_GMT
1022 <LI>DATE_LOCAL
1023 <LI>DOCUMENT_NAME
1024 <LI>DOCUMENT_ROOT
1025 <LI>DOCUMENT_URI
1026 <LI>GATEWAY_INTERFACE
1027 <LI>LAST_MODIFIED
1028 <LI>PATH
1029 <LI>PATH_INFO
1030 <LI>PATH_TRANSLATED
1031 <LI>QUERY_STRING
1032 <LI>REMOTE_ADDR
1033 <LI>REMOTE_HOST
1034 <LI>REMOTE_IDENT
1035 <LI>REMOTE_USER
1036 <LI>REQUEST_METHOD
1037 <LI>SCRIPT_NAME
1038 <LI>SERVER_NAME
1039 <LI>SERVER_PORT
1040 <LI>SERVER_PROTOCOL
1041 <LI>SERVER_ROOT
1042 <LI>SERVER_SOFTWARE
1043 </UL>
1044 In addition, HTTP headers sent by the server may be passed in the
1045 environment as well.  Here are some common variable names:
1046 <UL>
1047 <LI>HTTP_ACCEPT
1048 <LI>HTTP_CONNECTION
1049 <LI>HTTP_HOST
1050 <LI>HTTP_PRAGMA
1051 <LI>HTTP_REFERER
1052 <LI>HTTP_USER_AGENT
1053 </UL>
1054 """
1055
1056
1057 # Utilities
1058 # =========
1059
1060 def escape(s, quote=None):
1061     '''Replace special characters "&", "<" and ">" to HTML-safe sequences.
1062     If the optional flag quote is true, the quotation mark character (")
1063     is also translated.'''
1064     s = s.replace("&", "&amp;") # Must be done first!
1065     s = s.replace("<", "&lt;")
1066     s = s.replace(">", "&gt;")
1067     if quote:
1068         s = s.replace('"', "&quot;")
1069     return s
1070
1071 def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
1072     import re
1073     return re.match(_vb_pattern, s)
1074
1075 # Invoke mainline
1076 # ===============
1077
1078 # Call test() when this file is run as a script (not imported as a module)
1079 if __name__ == '__main__':
1080     test()