1 #! /usr/local/bin/python
3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4 # intentionally NOT "/usr/bin/env python". On many systems
5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6 # scripts, and /usr/local/bin is the default directory where Python is
7 # installed, so /usr/bin/env would be unable to find python. Granted,
8 # binary installations by Linux vendors often install Python in
9 # /usr/bin. So let those vendors patch cgi.py to match their choice
12 """Support module for CGI (Common Gateway Interface) scripts.
14 This module defines a number of utilities for use by CGI scripts
21 # Michael McLay started this module. Steve Majewski changed the
22 # interface to SvFormContentDict and FormContentDict. The multipart
23 # parsing was inspired by code submitted by Andreas Paepcke. Guido van
24 # Rossum rewrote, reformatted and documented the module and is currently
25 # responsible for its maintenance.
34 from operator
import attrgetter
35 from io
import StringIO
40 from warnings
import warn
42 __all__
= ["MiniFieldStorage", "FieldStorage",
43 "parse", "parse_qs", "parse_qsl", "parse_multipart",
44 "parse_header", "print_exception", "print_environ",
45 "print_form", "print_directory", "print_arguments",
46 "print_environ_usage", "escape"]
51 logfile
= "" # Filename to log to, if not empty
52 logfp
= None # File object to log to, if not None
54 def initlog(*allargs
):
55 """Write a log message, if there is a log file.
57 Even though this function is called initlog(), you should always
58 use log(); log is a variable that is set either to initlog
59 (initially), to dolog (once the log file has been opened), or to
60 nolog (when logging is disabled).
62 The first argument is a format string; the remaining arguments (if
63 any) are arguments to the % operator, so e.g.
64 log("%s: %s", "a", "b")
65 will write "a: b" to the log file, followed by a newline.
67 If the global logfp is not None, it should be a file object to
68 which log data is written.
70 If the global logfp is None, the global logfile may be a string
71 giving a filename to open, in append mode. This file should be
72 world writable!!! If the file can't be opened, logging is
73 silently disabled (since there is no safe place where we could
74 send an error message).
78 if logfile
and not logfp
:
80 logfp
= open(logfile
, "a")
89 def dolog(fmt
, *args
):
90 """Write a log message to the log file. See initlog() for docs."""
91 logfp
.write(fmt
%args
+ "\n")
94 """Dummy function, assigned to log when logging is disabled."""
97 log
= initlog
# The current logging function
103 # Maximum input we will accept when REQUEST_METHOD is POST
104 # 0 ==> unlimited input
107 def parse(fp
=None, environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
108 """Parse a query in the environment or from a file (default stdin)
110 Arguments, all optional:
112 fp : file pointer; default: sys.stdin
114 environ : environment dictionary; default: os.environ
116 keep_blank_values: flag indicating whether blank values in
117 URL encoded forms should be treated as blank strings.
118 A true value indicates that blanks should be retained as
119 blank strings. The default false value indicates that
120 blank values are to be ignored and treated as if they were
123 strict_parsing: flag indicating what to do with parsing errors.
124 If false (the default), errors are silently ignored.
125 If true, errors raise a ValueError exception.
129 if not 'REQUEST_METHOD' in environ
:
130 environ
['REQUEST_METHOD'] = 'GET' # For testing stand-alone
131 if environ
['REQUEST_METHOD'] == 'POST':
132 ctype
, pdict
= parse_header(environ
['CONTENT_TYPE'])
133 if ctype
== 'multipart/form-data':
134 return parse_multipart(fp
, pdict
)
135 elif ctype
== 'application/x-www-form-urlencoded':
136 clength
= int(environ
['CONTENT_LENGTH'])
137 if maxlen
and clength
> maxlen
:
138 raise ValueError('Maximum content length exceeded')
139 qs
= fp
.read(clength
)
141 qs
= '' # Unknown content-type
142 if 'QUERY_STRING' in environ
:
144 qs
= qs
+ environ
['QUERY_STRING']
147 qs
= qs
+ sys
.argv
[1]
148 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
149 elif 'QUERY_STRING' in environ
:
150 qs
= environ
['QUERY_STRING']
156 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
157 return urllib
.parse
.parse_qs(qs
, keep_blank_values
, strict_parsing
)
160 # parse query string function called from urlparse,
161 # this is done in order to maintain backward compatiblity.
163 def parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0):
164 """Parse a query given as a string argument."""
165 warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
166 DeprecationWarning, 2)
167 return urllib
.parse
.parse_qs(qs
, keep_blank_values
, strict_parsing
)
169 def parse_qsl(qs
, keep_blank_values
=0, strict_parsing
=0):
170 """Parse a query given as a string argument."""
171 warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
172 DeprecationWarning, 2)
173 return urllib
.parse
.parse_qsl(qs
, keep_blank_values
, strict_parsing
)
175 def parse_multipart(fp
, pdict
):
176 """Parse multipart input.
180 pdict: dictionary containing other parameters of content-type header
182 Returns a dictionary just like parse_qs(): keys are the field names, each
183 value is a list of values for that field. This is easy to use but not
184 much good if you are expecting megabytes to be uploaded -- in that case,
185 use the FieldStorage class instead which is much more flexible. Note
186 that content-type is the raw, unparsed contents of the content-type
189 XXX This does not parse nested multipart parts -- use FieldStorage for
192 XXX This should really be subsumed by FieldStorage altogether -- no
193 point in having two implementations of the same parsing algorithm.
194 Also, FieldStorage protects itself better against certain DoS attacks
195 by limiting the size of the data read in one chunk. The API here
196 does not support that kind of protection. This also affects parse()
197 since it can call parse_multipart().
203 if 'boundary' in pdict
:
204 boundary
= pdict
['boundary']
205 if not valid_boundary(boundary
):
206 raise ValueError('Invalid boundary in multipart form: %r'
209 nextpart
= "--" + boundary
210 lastpart
= "--" + boundary
+ "--"
214 while terminator
!= lastpart
:
218 # At start of next part. Read headers first.
219 headers
= http
.client
.parse_headers(fp
)
220 clength
= headers
.get('content-length')
227 if maxlen
and bytes
> maxlen
:
228 raise ValueError('Maximum content length exceeded')
229 data
= fp
.read(bytes
)
232 # Read lines until end of part.
237 terminator
= lastpart
# End outer loop
240 terminator
= line
.strip()
241 if terminator
in (nextpart
, lastpart
):
249 # Strip final line terminator
251 if line
[-2:] == "\r\n":
253 elif line
[-1:] == "\n":
256 data
= "".join(lines
)
257 line
= headers
['content-disposition']
260 key
, params
= parse_header(line
)
261 if key
!= 'form-data':
264 name
= params
['name']
268 partdict
[name
].append(data
)
270 partdict
[name
] = [data
]
279 while end
> 0 and s
.count('"', 0, end
) % 2:
280 end
= s
.find(';', end
+ 1)
287 def parse_header(line
):
288 """Parse a Content-type like header.
290 Return the main content-type and a dictionary of options.
293 parts
= _parseparam(';' + line
)
294 key
= parts
.__next
__()
299 name
= p
[:i
].strip().lower()
300 value
= p
[i
+1:].strip()
301 if len(value
) >= 2 and value
[0] == value
[-1] == '"':
303 value
= value
.replace('\\\\', '\\').replace('\\"', '"')
308 # Classes for field storage
309 # =========================
311 class MiniFieldStorage
:
313 """Like FieldStorage, for use when no file uploads are possible."""
322 disposition_options
= {}
325 def __init__(self
, name
, value
):
326 """Constructor from field name and value."""
329 # self.file = StringIO(value)
332 """Return printable representation."""
333 return "MiniFieldStorage(%r, %r)" % (self
.name
, self
.value
)
338 """Store a sequence of fields, reading multipart/form-data.
340 This class provides naming, typing, files stored on disk, and
341 more. At the top level, it is accessible like a dictionary, whose
342 keys are the field names. (Note: None can occur as a field name.)
343 The items are either a Python list (if there's multiple values) or
344 another FieldStorage or MiniFieldStorage object. If it's a single
345 object, it has the following attributes:
347 name: the field name, if specified; otherwise None
349 filename: the filename, if specified; otherwise None; this is the
350 client side filename, *not* the file name on which it is
351 stored (that's a temporary file you don't deal with)
353 value: the value as a *string*; for file uploads, this
354 transparently reads the file every time you request the value
356 file: the file(-like) object from which you can read the data;
357 None if the data is stored a simple string
359 type: the content-type, or None if not specified
361 type_options: dictionary of options specified on the content-type
364 disposition: content-disposition, or None if not specified
366 disposition_options: dictionary of corresponding options
368 headers: a dictionary(-like) object (sometimes email.message.Message or a
369 subclass thereof) containing *all* headers
371 The class is subclassable, mostly for the purpose of overriding
372 the make_file() method, which is called internally to come up with
373 a file open for reading and writing. This makes it possible to
374 override the default choice of storing all files in a temporary
375 directory and unlinking them as soon as they have been opened.
379 def __init__(self
, fp
=None, headers
=None, outerboundary
="",
380 environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
381 """Constructor. Read multipart/* until last part.
383 Arguments, all optional:
385 fp : file pointer; default: sys.stdin
386 (not used when the request method is GET)
388 headers : header dictionary-like object; default:
389 taken from environ as per CGI spec
391 outerboundary : terminating multipart boundary
392 (for internal use only)
394 environ : environment dictionary; default: os.environ
396 keep_blank_values: flag indicating whether blank values in
397 URL encoded forms should be treated as blank strings.
398 A true value indicates that blanks should be retained as
399 blank strings. The default false value indicates that
400 blank values are to be ignored and treated as if they were
403 strict_parsing: flag indicating what to do with parsing errors.
404 If false (the default), errors are silently ignored.
405 If true, errors raise a ValueError exception.
409 self
.keep_blank_values
= keep_blank_values
410 self
.strict_parsing
= strict_parsing
411 if 'REQUEST_METHOD' in environ
:
412 method
= environ
['REQUEST_METHOD'].upper()
413 self
.qs_on_post
= None
414 if method
== 'GET' or method
== 'HEAD':
415 if 'QUERY_STRING' in environ
:
416 qs
= environ
['QUERY_STRING']
423 headers
= {'content-type':
424 "application/x-www-form-urlencoded"}
428 # Set default content-type for POST to what's traditional
429 headers
['content-type'] = "application/x-www-form-urlencoded"
430 if 'CONTENT_TYPE' in environ
:
431 headers
['content-type'] = environ
['CONTENT_TYPE']
432 if 'QUERY_STRING' in environ
:
433 self
.qs_on_post
= environ
['QUERY_STRING']
434 if 'CONTENT_LENGTH' in environ
:
435 headers
['content-length'] = environ
['CONTENT_LENGTH']
436 self
.fp
= fp
or sys
.stdin
437 self
.headers
= headers
438 self
.outerboundary
= outerboundary
440 # Process content-disposition header
441 cdisp
, pdict
= "", {}
442 if 'content-disposition' in self
.headers
:
443 cdisp
, pdict
= parse_header(self
.headers
['content-disposition'])
444 self
.disposition
= cdisp
445 self
.disposition_options
= pdict
448 self
.name
= pdict
['name']
450 if 'filename' in pdict
:
451 self
.filename
= pdict
['filename']
453 # Process content-type header
455 # Honor any existing content-type header. But if there is no
456 # content-type header, use some sensible defaults. Assume
457 # outerboundary is "" at the outer level, but something non-false
458 # inside a multi-part. The default for an inner part is text/plain,
459 # but for an outer part it should be urlencoded. This should catch
460 # bogus clients which erroneously forget to include a content-type
463 # See below for what we do if there does exist a content-type header,
464 # but it happens to be something we don't understand.
465 if 'content-type' in self
.headers
:
466 ctype
, pdict
= parse_header(self
.headers
['content-type'])
467 elif self
.outerboundary
or method
!= 'POST':
468 ctype
, pdict
= "text/plain", {}
470 ctype
, pdict
= 'application/x-www-form-urlencoded', {}
472 self
.type_options
= pdict
473 self
.innerboundary
= ""
474 if 'boundary' in pdict
:
475 self
.innerboundary
= pdict
['boundary']
477 if 'content-length' in self
.headers
:
479 clen
= int(self
.headers
['content-length'])
482 if maxlen
and clen
> maxlen
:
483 raise ValueError('Maximum content length exceeded')
486 self
.list = self
.file = None
488 if ctype
== 'application/x-www-form-urlencoded':
489 self
.read_urlencoded()
490 elif ctype
[:10] == 'multipart/':
491 self
.read_multi(environ
, keep_blank_values
, strict_parsing
)
496 """Return a printable representation."""
497 return "FieldStorage(%r, %r, %r)" % (
498 self
.name
, self
.filename
, self
.value
)
501 return iter(self
.keys())
503 def __getattr__(self
, name
):
505 raise AttributeError(name
)
508 value
= self
.file.read()
510 elif self
.list is not None:
516 def __getitem__(self
, key
):
517 """Dictionary style indexing."""
518 if self
.list is None:
519 raise TypeError("not indexable")
521 for item
in self
.list:
522 if item
.name
== key
: found
.append(item
)
530 def getvalue(self
, key
, default
=None):
531 """Dictionary style get() method, including 'value' lookup."""
534 if type(value
) is type([]):
535 return [x
.value
for x
in value
]
541 def getfirst(self
, key
, default
=None):
542 """ Return the first value received."""
545 if type(value
) is type([]):
546 return value
[0].value
552 def getlist(self
, key
):
553 """ Return list of received values."""
556 if type(value
) is type([]):
557 return [x
.value
for x
in value
]
564 """Dictionary style keys() method."""
565 if self
.list is None:
566 raise TypeError("not indexable")
567 return list(set(item
.name
for item
in self
.list))
569 def __contains__(self
, key
):
570 """Dictionary style __contains__ method."""
571 if self
.list is None:
572 raise TypeError("not indexable")
573 return any(item
.name
== key
for item
in self
.list)
576 """Dictionary style len(x) support."""
577 return len(self
.keys())
579 def __nonzero__(self
):
580 return bool(self
.list)
582 def read_urlencoded(self
):
583 """Internal: read data in query string format."""
584 qs
= self
.fp
.read(self
.length
)
586 qs
+= '&' + self
.qs_on_post
587 self
.list = list = []
588 for key
, value
in urllib
.parse
.parse_qsl(qs
, self
.keep_blank_values
,
589 self
.strict_parsing
):
590 list.append(MiniFieldStorage(key
, value
))
593 FieldStorageClass
= None
595 def read_multi(self
, environ
, keep_blank_values
, strict_parsing
):
596 """Internal: read a part that is itself multipart."""
597 ib
= self
.innerboundary
598 if not valid_boundary(ib
):
599 raise ValueError('Invalid boundary in multipart form: %r' % (ib
,))
602 for key
, value
in urllib
.parse
.parse_qsl(self
.qs_on_post
,
603 self
.keep_blank_values
, self
.strict_parsing
):
604 self
.list.append(MiniFieldStorage(key
, value
))
605 FieldStorageClass
= None
607 klass
= self
.FieldStorageClass
or self
.__class
__
608 parser
= email
.parser
.FeedParser()
609 # Create bogus content-type header for proper multipart parsing
610 parser
.feed('Content-Type: %s; boundary=%s\r\n\r\n' % (self
.type, ib
))
611 parser
.feed(self
.fp
.read())
612 full_msg
= parser
.close()
614 msgs
= full_msg
.get_payload()
616 fp
= StringIO(msg
.get_payload())
617 part
= klass(fp
, msg
, ib
, environ
, keep_blank_values
,
619 self
.list.append(part
)
622 def read_single(self
):
623 """Internal: read an atomic part."""
631 bufsize
= 8*1024 # I/O buffering size for copy to file
633 def read_binary(self
):
634 """Internal: read binary data."""
635 self
.file = self
.make_file()
639 data
= self
.fp
.read(min(todo
, self
.bufsize
))
643 self
.file.write(data
)
644 todo
= todo
- len(data
)
646 def read_lines(self
):
647 """Internal: read lines until EOF or outerboundary."""
648 self
.file = self
.__file
= StringIO()
649 if self
.outerboundary
:
650 self
.read_lines_to_outerboundary()
652 self
.read_lines_to_eof()
654 def __write(self
, line
):
655 if self
.__file
is not None:
656 if self
.__file
.tell() + len(line
) > 1000:
657 self
.file = self
.make_file()
658 data
= self
.__file
.getvalue()
659 self
.file.write(data
)
661 self
.file.write(line
)
663 def read_lines_to_eof(self
):
664 """Internal: read lines until EOF."""
666 line
= self
.fp
.readline(1<<16)
672 def read_lines_to_outerboundary(self
):
673 """Internal: read lines until outerboundary."""
674 next
= "--" + self
.outerboundary
677 last_line_lfend
= True
679 line
= self
.fp
.readline(1<<16)
683 if line
[:2] == "--" and last_line_lfend
:
684 strippedline
= line
.strip()
685 if strippedline
== next
:
687 if strippedline
== last
:
691 if line
[-2:] == "\r\n":
694 last_line_lfend
= True
695 elif line
[-1] == "\n":
698 last_line_lfend
= True
701 last_line_lfend
= False
702 self
.__write
(odelim
+ line
)
704 def skip_lines(self
):
705 """Internal: skip lines until outer boundary if defined."""
706 if not self
.outerboundary
or self
.done
:
708 next
= "--" + self
.outerboundary
710 last_line_lfend
= True
712 line
= self
.fp
.readline(1<<16)
716 if line
[:2] == "--" and last_line_lfend
:
717 strippedline
= line
.strip()
718 if strippedline
== next
:
720 if strippedline
== last
:
723 last_line_lfend
= line
.endswith('\n')
726 """Overridable: return a readable & writable file.
728 The file will be used as follows:
729 - data is written to it
731 - data is read from it
733 The file is always opened in text mode.
735 This version opens a temporary file for reading and writing,
736 and immediately deletes (unlinks) it. The trick (on Unix!) is
737 that the file can still be used, but it can't be opened by
738 another process, and it will automatically be deleted when it
739 is closed or when the current process terminates.
741 If you want a more permanent file, you derive a class which
742 overrides this method. If you want a visible temporary file
743 that is nevertheless automatically deleted when the script
744 terminates, try defining a __del__ method in a derived class
745 which unlinks the temporary files you have created.
749 return tempfile
.TemporaryFile("w+", encoding
="utf-8", newline
="\n")
755 def test(environ
=os
.environ
):
756 """Robust test CGI script, usable as main program.
758 Write minimal HTTP headers and dump all information provided to
759 the script in HTML form.
762 print("Content-type: text/html")
764 sys
.stderr
= sys
.stdout
766 form
= FieldStorage() # Replace with other classes to test those
770 print_environ(environ
)
771 print_environ_usage()
773 exec("testing print_exception() -- <I>italics?</I>")
776 print("<H3>What follows is a test, not an actual exception:</H3>")
781 print("<H1>Second try with a small maxlen...</H1>")
786 form
= FieldStorage() # Replace with other classes to test those
790 print_environ(environ
)
794 def print_exception(type=None, value
=None, tb
=None, limit
=None):
796 type, value
, tb
= sys
.exc_info()
799 print("<H3>Traceback (most recent call last):</H3>")
800 list = traceback
.format_tb(tb
, limit
) + \
801 traceback
.format_exception_only(type, value
)
802 print("<PRE>%s<B>%s</B></PRE>" % (
803 escape("".join(list[:-1])),
808 def print_environ(environ
=os
.environ
):
809 """Dump the shell environment as HTML."""
810 keys
= sorted(environ
.keys())
812 print("<H3>Shell Environment:</H3>")
815 print("<DT>", escape(key
), "<DD>", escape(environ
[key
]))
819 def print_form(form
):
820 """Dump the contents of a form as HTML."""
821 keys
= sorted(form
.keys())
823 print("<H3>Form Contents:</H3>")
825 print("<P>No form fields.")
828 print("<DT>" + escape(key
) + ":", end
=' ')
830 print("<i>" + escape(repr(type(value
))) + "</i>")
831 print("<DD>" + escape(repr(value
)))
835 def print_directory():
836 """Dump the current directory as HTML."""
838 print("<H3>Current Working Directory:</H3>")
841 except os
.error
as msg
:
842 print("os.error:", escape(str(msg
)))
847 def print_arguments():
849 print("<H3>Command Line Arguments:</H3>")
854 def print_environ_usage():
855 """Dump a list of environment variables used by CGI as HTML."""
857 <H3>These environment variables could have been set:</H3>
867 <LI>GATEWAY_INTERFACE
885 In addition, HTTP headers sent by the server may be passed in the
886 environment as well. Here are some common variable names:
901 def escape(s
, quote
=None):
902 '''Replace special characters "&", "<" and ">" to HTML-safe sequences.
903 If the optional flag quote is true, the quotation mark character (")
904 is also translated.'''
905 s
= s
.replace("&", "&") # Must be done first!
906 s
= s
.replace("<", "<")
907 s
= s
.replace(">", ">")
909 s
= s
.replace('"', """)
912 def valid_boundary(s
, _vb_pattern
="^[ -~]{0,200}[!-~]$"):
914 return re
.match(_vb_pattern
, s
)
919 # Call test() when this file is run as a script (not imported as a module)
920 if __name__
== '__main__':