Remove unnecessary (and wrong) check for movement, as the parent does it (fixes ...
[ardour2.git] / tools / bug_tool / ClientForm.py
blobc42f65b31382f1d7dbb2a71d3e7e71e103af2413
1 """HTML form handling for web clients.
3 ClientForm is a Python module for handling HTML forms on the client
4 side, useful for parsing HTML forms, filling them in and returning the
5 completed forms to the server. It has developed from a port of Gisle
6 Aas' Perl module HTML::Form, from the libwww-perl library, but the
7 interface is not the same.
9 The most useful docstring is the one for HTMLForm.
11 RFC 1866: HTML 2.0
12 RFC 1867: Form-based File Upload in HTML
13 RFC 2388: Returning Values from Forms: multipart/form-data
14 HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
15 HTML 4.01 Specification, W3C Recommendation 24 December 1999
18 Copyright 2002-2003 John J. Lee <jjl@pobox.com>
19 Copyright 1998-2000 Gisle Aas.
21 This code is free software; you can redistribute it and/or modify it
22 under the terms of the BSD License (see the file COPYING included with
23 the distribution).
25 """
27 # XXX
28 # Treat unknown controls as text controls? (this was a recent LWP
29 # HTML::Form change) I guess this is INPUT with no TYPE? Check LWP
30 # source and browser behaviour.
31 # Support for list item ids. How to handle missing ids? (How do I deal
32 # with duplicate OPTION labels ATM? Can't remember...)
33 # Arrange things so can automatically PyPI-register with categories
34 # without messing up 1.5.2 compatibility.
35 # Tests need work.
36 # Test single and multiple file upload some more on the web.
37 # Does file upload work when name is missing? Sourceforge tracker form
38 # doesn't like it. Check standards, and test with Apache. Test binary
39 # upload with Apache.
40 # Add label support for CHECKBOX and RADIO.
41 # Better docs.
42 # Deal with character sets properly. Not sure what the issues are here.
43 # I don't *think* any encoding of control names, filenames or data is
44 # necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
45 # doesn't seem to do it.
46 # Add charset parameter to Content-type headers? How to find value??
47 # Get rid of MapBase, AList and MimeWriter.
48 # I'm not going to fix this unless somebody tells me what real servers
49 # that want this encoding actually expect: If enctype is
50 # application/x-www-form-urlencoded and there's a FILE control present.
51 # Strictly, it should be 'name=data' (see HTML 4.01 spec., section
52 # 17.13.2), but I send "name=" ATM. What about multiple file upload??
53 # Get rid of the two type-switches (for kind and click*).
54 # Remove single-selection code: can be special case of multi-selection,
55 # with a few variations, I think.
56 # Factor out multiple-selection list code? May not be easy. Maybe like
57 # this:
59 # ListControl
60 # ^
61 # | MultipleListControlMixin
62 # | ^
63 # SelectControl /
64 # ^ /
65 # \ /
66 # MultiSelectControl
69 # Plan
70 # ----
71 # Maybe a 0.2.x, cleaned up a bit and with id support for list items?
72 # Not sure it's worth it, really.
73 # Remove toggle methods.
74 # Replace by_label with choice between value / id / label /
75 # element contents (see discussion with Gisle about labels on
76 # libwww-perl list).
77 # ...what else?
78 # Work on DOMForm.
79 # XForms? Don't know if there's a need here.
82 try: True
83 except NameError:
84 True = 1
85 False = 0
87 try: bool
88 except NameError:
89 def bool(expr):
90 if expr: return True
91 else: return False
93 import sys, urllib, urllib2, types, string, mimetools, copy
94 from urlparse import urljoin
95 from cStringIO import StringIO
96 try:
97 import UnicodeType
98 except ImportError:
99 UNICODE = False
100 else:
101 UNICODE = True
103 VERSION = "0.1.13"
105 CHUNK = 1024 # size of chunks fed to parser, in bytes
107 # This version of urlencode is from my Python 1.5.2 back-port of the
108 # Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
109 # of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
110 def urlencode(query,doseq=False,):
111 """Encode a sequence of two-element tuples or dictionary into a URL query \
112 string.
114 If any values in the query arg are sequences and doseq is true, each
115 sequence element is converted to a separate parameter.
117 If the query arg is a sequence of two-element tuples, the order of the
118 parameters in the output will match the order of parameters in the
119 input.
122 if hasattr(query,"items"):
123 # mapping objects
124 query = query.items()
125 else:
126 # it's a bother at times that strings and string-like objects are
127 # sequences...
128 try:
129 # non-sequence items should not work with len()
130 x = len(query)
131 # non-empty strings will fail this
132 if len(query) and type(query[0]) != types.TupleType:
133 raise TypeError()
134 # zero-length sequences of all types will get here and succeed,
135 # but that's a minor nit - since the original implementation
136 # allowed empty dicts that type of behavior probably should be
137 # preserved for consistency
138 except TypeError:
139 ty,va,tb = sys.exc_info()
140 raise TypeError("not a valid non-string sequence or mapping "
141 "object", tb)
143 l = []
144 if not doseq:
145 # preserve old behavior
146 for k, v in query:
147 k = urllib.quote_plus(str(k))
148 v = urllib.quote_plus(str(v))
149 l.append(k + '=' + v)
150 else:
151 for k, v in query:
152 k = urllib.quote_plus(str(k))
153 if type(v) == types.StringType:
154 v = urllib.quote_plus(v)
155 l.append(k + '=' + v)
156 elif UNICODE and type(v) == types.UnicodeType:
157 # is there a reasonable way to convert to ASCII?
158 # encode generates a string, but "replace" or "ignore"
159 # lose information and "strict" can raise UnicodeError
160 v = urllib.quote_plus(v.encode("ASCII","replace"))
161 l.append(k + '=' + v)
162 else:
163 try:
164 # is this a sufficient test for sequence-ness?
165 x = len(v)
166 except TypeError:
167 # not a sequence
168 v = urllib.quote_plus(str(v))
169 l.append(k + '=' + v)
170 else:
171 # loop over the sequence
172 for elt in v:
173 l.append(k + '=' + urllib.quote_plus(str(elt)))
174 return string.join(l, '&')
176 def startswith(string, initial):
177 if len(initial) > len(string): return False
178 return string[:len(initial)] == initial
180 def issequence(x):
181 try:
182 x[0]
183 except (TypeError, KeyError):
184 return False
185 except IndexError:
186 pass
187 return True
189 def isstringlike(x):
190 try: x+""
191 except: return False
192 else: return True
195 # XXX don't really want to drag this along (MapBase, AList, MimeWriter)
197 class MapBase:
198 """Mapping designed to be easily derived from.
200 Subclass it and override __init__, __setitem__, __getitem__, __delitem__
201 and keys. Nothing else should need to be overridden, unlike UserDict.
202 This significantly simplifies dictionary-like classes.
204 Also different from UserDict in that it has a redonly flag, and can be
205 updated (and initialised) with a sequence of pairs (key, value).
208 def __init__(self, init=None):
209 self._data = {}
210 self.readonly = False
211 if init is not None: self.update(init)
213 def __getitem__(self, key):
214 return self._data[key]
216 def __setitem__(self, key, item):
217 if not self.readonly:
218 self._data[key] = item
219 else:
220 raise TypeError("object doesn't support item assignment")
222 def __delitem__(self, key):
223 if not self.readonly:
224 del self._data[key]
225 else:
226 raise TypeError("object doesn't support item deletion")
228 def keys(self):
229 return self._data.keys()
231 # now the internal workings, there should be no need to override these:
233 def clear(self):
234 for k in self.keys():
235 del self[k]
237 def __repr__(self):
238 rep = []
239 for k, v in self.items():
240 rep.append("%s: %s" % (repr(k), repr(v)))
241 return self.__class__.__name__+"{"+(string.join(rep, ", "))+"}"
243 def copy(self):
244 return copy.copy(self)
246 def __cmp__(self, dict):
247 # note: return value is *not* boolean
248 for k, v in self.items():
249 if not (dict.has_key(k) and dict[k] == v):
250 return 1 # different
251 return 0 # the same
253 def __len__(self):
254 return len(self.keys())
256 def values(self):
257 r = []
258 for k in self.keys():
259 r.append(self[k])
260 return r
262 def items(self):
263 keys = self.keys()
264 vals = self.values()
265 r = []
266 for i in len(self):
267 r.append((keys[i], vals[i]))
268 return r
270 def has_key(self, key):
271 return key in self.keys()
273 def update(self, map):
274 if issequence(map) and not isstringlike(map):
275 items = map
276 else:
277 items = map.items()
278 for tup in items:
279 if not isinstance(tup, TupleType):
280 raise TypeError(
281 "MapBase.update requires a map or a sequence of pairs")
282 k, v = tup
283 self[k] = v
285 def get(self, key, failobj=None):
286 if key in self.keys():
287 return self[key]
288 else:
289 return failobj
291 def setdefault(self, key, failobj=None):
292 if not self.has_key(key):
293 self[key] = failobj
294 return self[key]
297 class AList(MapBase):
298 """Read-only ordered mapping."""
299 def __init__(self, seq=[]):
300 self.readonly = True
301 self._inverted = False
302 self._data = list(seq[:])
303 self._keys = []
304 self._values = []
305 for key, value in seq:
306 self._keys.append(key)
307 self._values.append(value)
309 def set_inverted(self, inverted):
310 if (inverted and not self._inverted) or (
311 not inverted and self._inverted):
312 self._keys, self._values = self._values, self._keys
313 if inverted: self._inverted = True
314 else: self._inverted = False
316 def __getitem__(self, key):
317 try:
318 i = self._keys.index(key)
319 except ValueError:
320 raise KeyError(key)
321 return self._values[i]
323 def __delitem__(self, key):
324 try:
325 i = self._keys.index[key]
326 except ValueError:
327 raise KeyError(key)
328 del self._values[i]
330 def keys(self): return list(self._keys[:])
331 def values(self): return list(self._values[:])
332 def items(self):
333 data = self._data[:]
334 if not self._inverted:
335 return data
336 else:
337 newdata = []
338 for k, v in data:
339 newdata.append((v, k))
340 return newdata
343 # This cut-n-pasted MimeWriter from standard library is here so can add
344 # to HTTP headers rather than message body when appropriate. It also uses
345 # \r\n in place of \n. This is nasty.
346 class MimeWriter:
348 """Generic MIME writer.
350 Methods:
352 __init__()
353 addheader()
354 flushheaders()
355 startbody()
356 startmultipartbody()
357 nextpart()
358 lastpart()
360 A MIME writer is much more primitive than a MIME parser. It
361 doesn't seek around on the output file, and it doesn't use large
362 amounts of buffer space, so you have to write the parts in the
363 order they should occur on the output file. It does buffer the
364 headers you add, allowing you to rearrange their order.
366 General usage is:
368 f = <open the output file>
369 w = MimeWriter(f)
370 ...call w.addheader(key, value) 0 or more times...
372 followed by either:
374 f = w.startbody(content_type)
375 ...call f.write(data) for body data...
379 w.startmultipartbody(subtype)
380 for each part:
381 subwriter = w.nextpart()
382 ...use the subwriter's methods to create the subpart...
383 w.lastpart()
385 The subwriter is another MimeWriter instance, and should be
386 treated in the same way as the toplevel MimeWriter. This way,
387 writing recursive body parts is easy.
389 Warning: don't forget to call lastpart()!
391 XXX There should be more state so calls made in the wrong order
392 are detected.
394 Some special cases:
396 - startbody() just returns the file passed to the constructor;
397 but don't use this knowledge, as it may be changed.
399 - startmultipartbody() actually returns a file as well;
400 this can be used to write the initial 'if you can read this your
401 mailer is not MIME-aware' message.
403 - If you call flushheaders(), the headers accumulated so far are
404 written out (and forgotten); this is useful if you don't need a
405 body part at all, e.g. for a subpart of type message/rfc822
406 that's (mis)used to store some header-like information.
408 - Passing a keyword argument 'prefix=<flag>' to addheader(),
409 start*body() affects where the header is inserted; 0 means
410 append at the end, 1 means insert at the start; default is
411 append for addheader(), but insert for start*body(), which use
412 it to determine where the Content-type header goes.
416 def __init__(self, fp, http_hdrs=None):
417 self._http_hdrs = http_hdrs
418 self._fp = fp
419 self._headers = []
420 self._boundary = []
421 self._first_part = True
423 def addheader(self, key, value, prefix=0,
424 add_to_http_hdrs=0):
426 prefix is ignored if add_to_http_hdrs is true.
428 lines = string.split(value, "\r\n")
429 while lines and not lines[-1]: del lines[-1]
430 while lines and not lines[0]: del lines[0]
431 if add_to_http_hdrs:
432 value = string.join(lines, "")
433 self._http_hdrs.append((key, value))
434 else:
435 for i in range(1, len(lines)):
436 lines[i] = " " + string.strip(lines[i])
437 value = string.join(lines, "\r\n") + "\r\n"
438 line = key + ": " + value
439 if prefix:
440 self._headers.insert(0, line)
441 else:
442 self._headers.append(line)
444 def flushheaders(self):
445 self._fp.writelines(self._headers)
446 self._headers = []
448 def startbody(self, ctype=None, plist=[], prefix=1,
449 add_to_http_hdrs=0, content_type=1):
451 prefix is ignored if add_to_http_hdrs is true.
453 if content_type and ctype:
454 for name, value in plist:
455 ctype = ctype + ';\r\n %s=\"%s\"' % (name, value)
456 self.addheader("Content-type", ctype, prefix=prefix,
457 add_to_http_hdrs=add_to_http_hdrs)
458 self.flushheaders()
459 if not add_to_http_hdrs: self._fp.write("\r\n")
460 self._first_part = True
461 return self._fp
463 def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
464 add_to_http_hdrs=0, content_type=1):
465 boundary = boundary or mimetools.choose_boundary()
466 self._boundary.append(boundary)
467 return self.startbody("multipart/" + subtype,
468 [("boundary", boundary)] + plist,
469 prefix=prefix,
470 add_to_http_hdrs=add_to_http_hdrs,
471 content_type=content_type)
473 def nextpart(self):
474 boundary = self._boundary[-1]
475 if self._first_part:
476 self._first_part = False
477 else:
478 self._fp.write("\r\n")
479 self._fp.write("--" + boundary + "\r\n")
480 return self.__class__(self._fp)
482 def lastpart(self):
483 if self._first_part:
484 self.nextpart()
485 boundary = self._boundary.pop()
486 self._fp.write("\r\n--" + boundary + "--\r\n")
489 class ControlNotFoundError(ValueError): pass
490 class ItemNotFoundError(ValueError): pass
491 class ItemCountError(ValueError): pass
493 class ParseError(Exception): pass
496 def ParseResponse(response, select_default=False, ignore_errors=False):
497 """Parse HTTP response and return a list of HTMLForm instances.
499 The return value of urllib2.urlopen can be conveniently passed to this
500 function as the response parameter.
502 ClientForm.ParseError is raised on parse errors.
504 response: file-like object (supporting read() method) with a method
505 geturl(), returning the base URI of the HTTP response
506 select_default: for multiple-selection SELECT controls and RADIO controls,
507 pick the first item as the default if none are selected in the HTML
508 ignore_errors: don't raise ParseError, and carry on regardless if the
509 parser gets confused
511 Pass a true value for select_default if you want the behaviour specified by
512 RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
513 RADIO or multiple-selection SELECT control if none were selected in the
514 HTML. Most browsers (including Microsoft Internet Explorer (IE) and
515 Netscape Navigator) instead leave all items unselected in these cases. The
516 W3C HTML 4.0 standard leaves this behaviour undefined in the case of
517 multiple-selection SELECT controls, but insists that at least one RADIO
518 button should be checked at all times, in contradiction to browser
519 behaviour.
521 Precisely what ignore_errors does isn't well-defined yet, so don't rely too
522 much on the current behaviour -- if you want robustness, you're better off
523 fixing the HTML before passing it to this function.
526 return ParseFile(response, response.geturl(), select_default)
528 def ParseFile(file, base_uri, select_default=False, ignore_errors=False):
529 """Parse HTML and return a list of HTMLForm instances.
531 ClientForm.ParseError is raised on parse errors.
533 file: file-like object (supporting read() method) containing HTML with zero
534 or more forms to be parsed
535 base_uri: the base URI of the document
537 For the other arguments and further details, see ParseResponse.__doc__.
540 fp = _FORM_PARSER_CLASS(ignore_errors)
541 while 1:
542 data = file.read(CHUNK)
543 fp.feed(data)
544 if len(data) != CHUNK: break
545 forms = []
546 for (name, action, method, enctype), attrs, controls in fp.forms:
547 if action is None:
548 action = base_uri
549 else:
550 action = urljoin(base_uri, action)
551 form = HTMLForm(action, method, enctype, name, attrs)
552 for type, name, attr in controls:
553 form.new_control(type, name, attr, select_default=select_default)
554 forms.append(form)
555 for form in forms:
556 form.fixup()
557 return forms
560 class _AbstractFormParser:
561 """forms attribute contains HTMLForm instances on completion."""
562 # pinched (and modified) from Moshe Zadka
563 def __init__(self, ignore_errors, entitydefs=None):
564 if entitydefs is not None:
565 self.entitydefs = entitydefs
566 self._ignore_errors = ignore_errors
567 self.forms = []
568 self._current_form = None
569 self._select = None
570 self._optgroup = None
571 self._option = None
572 self._textarea = None
574 def error(self, error):
575 if not self._ignore_errors: raise error
577 def start_form(self, attrs):
578 if self._current_form is not None:
579 self.error(ParseError("nested FORMs"))
580 name = None
581 action = None
582 enctype = "application/x-www-form-urlencoded"
583 method = "GET"
584 d = {}
585 for key, value in attrs:
586 if key == "name":
587 name = value
588 elif key == "action":
589 action = value
590 elif key == "method":
591 method = string.upper(value)
592 elif key == "enctype":
593 enctype = string.lower(value)
594 else:
595 d[key] = value
596 controls = []
597 self._current_form = (name, action, method, enctype), d, controls
599 def end_form(self):
600 if self._current_form is None:
601 self.error(ParseError("end of FORM before start"))
602 self.forms.append(self._current_form)
603 self._current_form = None
605 def start_select(self, attrs):
606 if self._current_form is None:
607 self.error(ParseError("start of SELECT before start of FORM"))
608 if self._select is not None:
609 self.error(ParseError("nested SELECTs"))
610 if self._textarea is not None:
611 self.error(ParseError("SELECT inside TEXTAREA"))
612 d = {}
613 for key, val in attrs:
614 d[key] = val
616 self._select = d
618 self._append_select_control({"__select": d})
620 def end_select(self):
621 if self._current_form is None:
622 self.error(ParseError("end of SELECT before start of FORM"))
623 if self._select is None:
624 self.error(ParseError("end of SELECT before start"))
626 if self._option is not None:
627 self._end_option()
629 self._select = None
631 def start_optgroup(self, attrs):
632 if self._select is None:
633 self.error(ParseError("OPTGROUP outside of SELECT"))
634 d = {}
635 for key, val in attrs:
636 d[key] = val
638 self._optgroup = d
640 def end_optgroup(self):
641 if self._optgroup is None:
642 self.error(ParseError("end of OPTGROUP before start"))
643 self._optgroup = None
645 def _start_option(self, attrs):
646 if self._select is None:
647 self.error(ParseError("OPTION outside of SELECT"))
648 if self._option is not None:
649 self._end_option()
651 d = {}
652 for key, val in attrs:
653 d[key] = val
655 self._option = {}
656 self._option.update(d)
657 if (self._optgroup and self._optgroup.has_key("disabled") and
658 not self._option.has_key("disabled")):
659 self._option["disabled"] = None
661 def _end_option(self):
662 if self._option is None:
663 self.error(ParseError("end of OPTION before start"))
665 contents = string.strip(self._option.get("contents", ""))
666 #contents = string.strip(self._option["contents"])
667 self._option["contents"] = contents
668 if not self._option.has_key("value"):
669 self._option["value"] = contents
670 if not self._option.has_key("label"):
671 self._option["label"] = contents
672 # stuff dict of SELECT HTML attrs into a special private key
673 # (gets deleted again later)
674 self._option["__select"] = self._select
675 self._append_select_control(self._option)
676 self._option = None
678 def _append_select_control(self, attrs):
679 controls = self._current_form[2]
680 name = self._select.get("name")
681 controls.append(("select", name, attrs))
683 ## def do_option(self, attrs):
684 ## if self._select is None:
685 ## self.error(ParseError("OPTION outside of SELECT"))
686 ## d = {}
687 ## for key, val in attrs:
688 ## d[key] = val
690 ## self._option = {}
691 ## self._option.update(d)
692 ## if (self._optgroup and self._optgroup.has_key("disabled") and
693 ## not self._option.has_key("disabled")):
694 ## self._option["disabled"] = None
696 def start_textarea(self, attrs):
697 if self._current_form is None:
698 self.error(ParseError("start of TEXTAREA before start of FORM"))
699 if self._textarea is not None:
700 self.error(ParseError("nested TEXTAREAs"))
701 if self._select is not None:
702 self.error(ParseError("TEXTAREA inside SELECT"))
703 d = {}
704 for key, val in attrs:
705 d[key] = val
707 self._textarea = d
709 def end_textarea(self):
710 if self._current_form is None:
711 self.error(ParseError("end of TEXTAREA before start of FORM"))
712 if self._textarea is None:
713 self.error(ParseError("end of TEXTAREA before start"))
714 controls = self._current_form[2]
715 name = self._textarea.get("name")
716 controls.append(("textarea", name, self._textarea))
717 self._textarea = None
719 def handle_data(self, data):
720 if self._option is not None:
721 # self._option is a dictionary of the OPTION element's HTML
722 # attributes, but it has two special keys, one of which is the
723 # special "contents" key contains text between OPTION tags (the
724 # other is the "__select" key: see the end_option method)
725 map = self._option
726 key = "contents"
727 elif self._textarea is not None:
728 map = self._textarea
729 key = "value"
730 else:
731 return
733 if not map.has_key(key):
734 map[key] = data
735 else:
736 map[key] = map[key] + data
738 ## def handle_data(self, data):
739 ## if self._option is not None:
740 ## contents = string.strip(data)
741 ## controls = self._current_form[2]
742 ## if not self._option.has_key("value"):
743 ## self._option["value"] = contents
744 ## if not self._option.has_key("label"):
745 ## self._option["label"] = contents
746 ## # self._option is a dictionary of the OPTION element's HTML
747 ## # attributes, but it has two special keys:
748 ## # 1. special "contents" key contains text between OPTION tags
749 ## self._option["contents"] = contents
750 ## # 2. stuff dict of SELECT HTML attrs into a special private key
751 ## # (gets deleted again later)
752 ## self._option["__select"] = self._select
753 ## self._append_select_control(self._option)
754 ## self._option = None
755 ## elif self._textarea is not None:
756 ## #self._textarea["value"] = data
757 ## if self._textarea.get("value") is None:
758 ## self._textarea["value"] = data
759 ## else:
760 ## self._textarea["value"] = self._textarea["value"] + data
762 def do_button(self, attrs):
763 if self._current_form is None:
764 self.error(ParseError("start of BUTTON before start of FORM"))
765 d = {}
766 d["type"] = "submit" # default
767 for key, val in attrs:
768 d[key] = val
769 controls = self._current_form[2]
771 type = d["type"]
772 name = d.get("name")
773 # we don't want to lose information, so use a type string that
774 # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
775 # eg. type for BUTTON/RESET is "resetbutton"
776 # (type for INPUT/RESET is "reset")
777 type = type+"button"
778 controls.append((type, name, d))
780 def do_input(self, attrs):
781 if self._current_form is None:
782 self.error(ParseError("start of INPUT before start of FORM"))
783 d = {}
784 d["type"] = "text" # default
785 for key, val in attrs:
786 d[key] = val
787 controls = self._current_form[2]
789 type = d["type"]
790 name = d.get("name")
791 controls.append((type, name, d))
793 def do_isindex(self, attrs):
794 if self._current_form is None:
795 self.error(ParseError("start of ISINDEX before start of FORM"))
796 d = {}
797 for key, val in attrs:
798 d[key] = val
799 controls = self._current_form[2]
801 # isindex doesn't have type or name HTML attributes
802 controls.append(("isindex", None, d))
804 # use HTMLParser if we have it (it does XHTML), htmllib otherwise
805 try:
806 import HTMLParser
807 except ImportError:
808 import htmllib, formatter
809 class _FormParser(_AbstractFormParser, htmllib.HTMLParser):
810 # This is still here for compatibility with Python 1.5.2.
811 # It doesn't do the right thing with XHTML.
812 def __init__(self, ignore_errors, entitydefs=None):
813 htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
814 _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
816 def do_option(self, attrs):
817 _AbstractFormParser._start_option(self, attrs)
819 _FORM_PARSER_CLASS = _FormParser
820 else:
821 class _XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
822 # thanks to Michael Howitz for this!
823 def __init__(self, ignore_errors, entitydefs=None):
824 HTMLParser.HTMLParser.__init__(self)
825 _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
827 def start_option(self, attrs):
828 _AbstractFormParser._start_option(self, attrs)
830 def end_option(self):
831 _AbstractFormParser._end_option(self)
833 def handle_starttag(self, tag, attrs):
834 try:
835 method = getattr(self, 'start_' + tag)
836 except AttributeError:
837 try:
838 method = getattr(self, 'do_' + tag)
839 except AttributeError:
840 pass # unknown tag
841 else:
842 method(attrs)
843 else:
844 method(attrs)
846 def handle_endtag(self, tag):
847 try:
848 method = getattr(self, 'end_' + tag)
849 except AttributeError:
850 pass # unknown tag
851 else:
852 method()
854 # handle_charref, handle_entityref and default entitydefs are taken
855 # from sgmllib
856 def handle_charref(self, name):
857 try:
858 n = int(name)
859 except ValueError:
860 self.unknown_charref(name)
861 return
862 if not 0 <= n <= 255:
863 self.unknown_charref(name)
864 return
865 self.handle_data(chr(n))
867 # Definition of entities -- derived classes may override
868 entitydefs = \
869 {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
871 def handle_entityref(self, name):
872 table = self.entitydefs
873 if name in table:
874 self.handle_data(table[name])
875 else:
876 self.unknown_entityref(name)
877 return
879 # These methods would have passed through the ref intact if I'd thought
880 # of it earlier, but since the old parser silently swallows unknown
881 # refs, so does this new parser.
882 def unknown_entityref(self, ref): pass
883 def unknown_charref(self, ref): pass
885 _FORM_PARSER_CLASS = _XHTMLCompatibleFormParser
888 class Control:
889 """An HTML form control.
891 An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
892 things to Control objects, and most of Control's methods are, in effect,
893 documented by the HTMLForm docstrings.
895 The Controls in an HTMLForm can be got at via the HTMLForm.find_control
896 method or the HTMLForm.controls attribute.
898 Control instances are usually constructed using the ParseFile /
899 ParseResponse functions, so you can probably ignore the rest of this
900 paragraph. A Control is only properly initialised after the fixup method
901 has been called. In fact, this is only strictly necessary for ListControl
902 instances. This is necessary because ListControls are built up from
903 ListControls each containing only a single item, and their initial value(s)
904 can only be known after the sequence is complete.
906 The types and values that are acceptable for assignment to the value
907 attribute are defined by subclasses.
909 If the disabled attribute is true, this represents the state typically
910 represented by browsers by `greying out' a control. If the disabled
911 attribute is true, the Control will raise AttributeError if an attempt is
912 made to change its value. In addition, the control will not be considered
913 `successful' as defined by the W3C HTML 4 standard -- ie. it will
914 contribute no data to the return value of the HTMLForm.click* methods. To
915 enable a control, set the disabled attribute to a false value.
917 If the readonly attribute is true, the Control will raise AttributeError if
918 an attempt is made to change its value. To make a control writable, set
919 the readonly attribute to a false value.
921 All controls have the disabled and readonly attributes, not only those that
922 may have the HTML attributes of the same names.
924 On assignment to the value attribute, the following exceptions are raised:
925 TypeError, AttributeError (if the value attribute should not be assigned
926 to, because the control is disabled, for example) and ValueError.
928 If the name or value attributes are None, or the value is an empty list, or
929 if the control is disabled, the control is not successful.
931 Public attributes:
933 type: string describing type of control (see the keys of the
934 HTMLForm.type2class dictionary for the allowable values) (readonly)
935 name: name of control (readonly)
936 value: current value of control (subclasses may allow a single value, a
937 sequence of values, or either)
938 disabled: disabled state
939 readonly: readonly state
940 id: value of id HTML attribute
943 def __init__(self, type, name, attrs):
945 type: string describing type of control (see the keys of the
946 HTMLForm.type2class dictionary for the allowable values)
947 name: control name
948 attrs: HTML attributes of control's HTML element
951 raise NotImplementedError()
953 def add_to_form(self, form):
954 form.controls.append(self)
956 def fixup(self):
957 pass
959 def __getattr__(self, name): raise NotImplementedError()
960 def __setattr__(self, name, value): raise NotImplementedError()
962 def pairs(self):
963 """Return list of (key, value) pairs suitable for passing to urlencode.
965 raise NotImplementedError()
967 def _write_mime_data(self, mw):
968 """Write data for this control to a MimeWriter."""
969 # called by HTMLForm
970 for name, value in self.pairs():
971 mw2 = mw.nextpart()
972 mw2.addheader("Content-disposition",
973 'form-data; name="%s"' % name, 1)
974 f = mw2.startbody(prefix=0)
975 f.write(value)
977 def __str__(self):
978 raise NotImplementedError()
981 #---------------------------------------------------
982 class ScalarControl(Control):
983 """Control whose value is not restricted to one of a prescribed set.
985 Some ScalarControls don't accept any value attribute. Otherwise, takes a
986 single value, which must be string-like.
988 Additional read-only public attribute:
990 attrs: dictionary mapping the names of original HTML attributes of the
991 control to their values
994 def __init__(self, type, name, attrs):
995 self.__dict__["type"] = string.lower(type)
996 self.__dict__["name"] = name
997 self._value = attrs.get("value")
998 self.disabled = attrs.has_key("disabled")
999 self.readonly = attrs.has_key("readonly")
1000 self.id = attrs.get("id")
1002 self.attrs = attrs.copy()
1004 self._clicked = False
1006 def __getattr__(self, name):
1007 if name == "value":
1008 return self.__dict__["_value"]
1009 else:
1010 raise AttributeError("%s instance has no attribute '%s'" %
1011 (self.__class__.__name__, name))
1013 def __setattr__(self, name, value):
1014 if name == "value":
1015 if not isstringlike(value):
1016 raise TypeError("must assign a string")
1017 elif self.readonly:
1018 raise AttributeError("control '%s' is readonly" % self.name)
1019 elif self.disabled:
1020 raise AttributeError("control '%s' is disabled" % self.name)
1021 self.__dict__["_value"] = value
1022 elif name in ("name", "type"):
1023 raise AttributeError("%s attribute is readonly" % name)
1024 else:
1025 self.__dict__[name] = value
1027 def pairs(self):
1028 name = self.name
1029 value = self.value
1030 if name is None or value is None or self.disabled:
1031 return []
1032 return [(name, value)]
1034 def __str__(self):
1035 name = self.name
1036 value = self.value
1037 if name is None: name = "<None>"
1038 if value is None: value = "<None>"
1040 infos = []
1041 if self.disabled: infos.append("disabled")
1042 if self.readonly: infos.append("readonly")
1043 info = string.join(infos, ", ")
1044 if info: info = " (%s)" % info
1046 return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1049 #---------------------------------------------------
1050 class TextControl(ScalarControl):
1051 """Textual input control.
1053 Covers:
1055 INPUT/TEXT
1056 INPUT/PASSWORD
1057 INPUT/FILE
1058 INPUT/HIDDEN
1059 TEXTAREA
1062 def __init__(self, type, name, attrs):
1063 ScalarControl.__init__(self, type, name, attrs)
1064 if self.type == "hidden": self.readonly = True
1065 if self._value is None:
1066 self._value = ""
1069 #---------------------------------------------------
1070 class FileControl(ScalarControl):
1071 """File upload with INPUT TYPE=FILE.
1073 The value attribute of a FileControl is always None.
1075 Additional public method: add_file
1078 def __init__(self, type, name, attrs):
1079 ScalarControl.__init__(self, type, name, attrs)
1080 self._value = None
1081 self._upload_data = []
1083 def __setattr__(self, name, value):
1084 if name in ("value", "name", "type"):
1085 raise AttributeError("%s attribute is readonly" % name)
1086 else:
1087 self.__dict__[name] = value
1089 def add_file(self, file_object, content_type=None, filename=None):
1090 if not hasattr(file_object, "read"):
1091 raise TypeError("file-like object must have read method")
1092 if content_type is not None and not isstringlike(content_type):
1093 raise TypeError("content type must be None or string-like")
1094 if filename is not None and not isstringlike(filename):
1095 raise TypeError("filename must be None or string-like")
1096 if content_type is None:
1097 content_type = "application/octet-stream"
1098 self._upload_data.append((file_object, content_type, filename))
1100 def pairs(self):
1101 # XXX should it be successful even if unnamed?
1102 if self.name is None or self.disabled:
1103 return []
1104 return [(self.name, "")]
1106 def _write_mime_data(self, mw):
1107 # called by HTMLForm
1108 if len(self._upload_data) == 1:
1109 # single file
1110 file_object, content_type, filename = self._upload_data[0]
1111 mw2 = mw.nextpart()
1112 fn_part = filename and ('; filename="%s"' % filename) or ''
1113 disp = 'form-data; name="%s"%s' % (self.name, fn_part)
1114 mw2.addheader("Content-disposition", disp, prefix=1)
1115 fh = mw2.startbody(content_type, prefix=0)
1116 fh.write(file_object.read())
1117 elif len(self._upload_data) != 0:
1118 # multiple files
1119 mw2 = mw.nextpart()
1120 disp = 'form-data; name="%s"' % self.name
1121 mw2.addheader("Content-disposition", disp, prefix=1)
1122 fh = mw2.startmultipartbody("mixed", prefix=0)
1123 for file_object, content_type, filename in self._upload_data:
1124 mw3 = mw2.nextpart()
1125 fn_part = filename and ('; filename="%s"' % filename) or ''
1126 disp = 'file%s' % fn_part
1127 mw3.addheader("Content-disposition", disp, prefix=1)
1128 fh2 = mw3.startbody(content_type, prefix=0)
1129 fh2.write(file_object.read())
1130 mw2.lastpart()
1132 def __str__(self):
1133 name = self.name
1134 if name is None: name = "<None>"
1136 if not self._upload_data:
1137 value = "<No files added>"
1138 else:
1139 value = []
1140 for file, ctype, filename in self._upload_data:
1141 if filename is None:
1142 value.append("<Unnamed file>")
1143 else:
1144 value.append(filename)
1145 value = string.join(value, ", ")
1147 info = []
1148 if self.disabled: info.append("disabled")
1149 if self.readonly: info.append("readonly")
1150 info = string.join(info, ", ")
1151 if info: info = " (%s)" % info
1153 return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1156 #---------------------------------------------------
1157 class IsindexControl(ScalarControl):
1158 """ISINDEX control.
1160 ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
1161 part of regular HTML forms at all, and predates it. You're only allowed
1162 one ISINDEX per HTML document. ISINDEX and regular form submission are
1163 mutually exclusive -- either submit a form, or the ISINDEX.
1165 Having said this, since ISINDEX controls may appear in forms (which is
1166 probably bad HTML), ParseFile / ParseResponse will include them in the
1167 HTMLForm instances it returns. You can set the ISINDEX's value, as with
1168 any other control (but note that ISINDEX controls have no name, so you'll
1169 need to use the type argument of set_value!). When you submit the form,
1170 the ISINDEX will not be successful (ie., no data will get returned to the
1171 server as a result of its presence), unless you click on the ISINDEX
1172 control, in which case the ISINDEX gets submitted instead of the form:
1174 form.set_value("my isindex value", type="isindex")
1175 urllib2.urlopen(form.click(type="isindex"))
1177 ISINDEX elements outside of FORMs are ignored. If you want to submit one
1178 by hand, do it like so:
1180 url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
1181 result = urllib2.urlopen(url)
1184 def __init__(self, type, name, attrs):
1185 ScalarControl.__init__(self, type, name, attrs)
1186 if self._value is None:
1187 self._value = ""
1189 def pairs(self):
1190 return []
1192 def _click(self, form, coord, return_type):
1193 # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
1194 # want "bar+baz".
1195 # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
1196 # deprecated in 4.01, but it should still say how to submit it).
1197 # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
1198 url = urljoin(form.action, "?"+urllib.quote_plus(self.value))
1199 req_data = url, None, []
1201 if return_type == "pairs":
1202 return []
1203 elif return_type == "request_data":
1204 return req_data
1205 else:
1206 return urllib2.Request(url)
1208 def __str__(self):
1209 value = self.value
1210 if value is None: value = "<None>"
1212 infos = []
1213 if self.disabled: infos.append("disabled")
1214 if self.readonly: infos.append("readonly")
1215 info = string.join(infos, ", ")
1216 if info: info = " (%s)" % info
1218 return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
1221 #---------------------------------------------------
1222 class IgnoreControl(ScalarControl):
1223 """Control that we're not interested in.
1225 Covers:
1227 INPUT/RESET
1228 BUTTON/RESET
1229 INPUT/BUTTON
1230 BUTTON/BUTTON
1232 These controls are always unsuccessful, in the terminology of HTML 4 (ie.
1233 they never require any information to be returned to the server).
1235 BUTTON/BUTTON is used to generate events for script embedded in HTML.
1237 The value attribute of IgnoreControl is always None.
1240 def __init__(self, type, name, attrs):
1241 ScalarControl.__init__(self, type, name, attrs)
1242 self._value = None
1244 def __setattr__(self, name, value):
1245 if name == "value":
1246 raise AttributeError(
1247 "control '%s' is ignored, hence read-only" % self.name)
1248 elif name in ("name", "type"):
1249 raise AttributeError("%s attribute is readonly" % name)
1250 else:
1251 self.__dict__[name] = value
1254 #---------------------------------------------------
1255 class ListControl(Control):
1256 """Control representing a sequence of items.
1258 The value attribute of a ListControl represents the selected list items in
1259 the control.
1261 ListControl implements both list controls that take a single value and
1262 those that take multiple values.
1264 ListControls accept sequence values only. Some controls only accept
1265 sequences of length 0 or 1 (RADIO, and single-selection SELECT).
1266 In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
1267 and multiple-selection SELECTs (those having the "multiple" HTML attribute)
1268 accept sequences of any length.
1270 Note the following mistake:
1272 control.value = some_value
1273 assert control.value == some_value # not necessarily true
1275 The reason for this is that the value attribute always gives the list items
1276 in the order they were listed in the HTML.
1278 ListControl items can also be referred to by their labels instead of names.
1279 Use the by_label argument, and the set_value_by_label, get_value_by_label
1280 methods.
1282 XXX RadioControl and CheckboxControl don't implement by_label yet.
1284 Note that, rather confusingly, though SELECT controls are represented in
1285 HTML by SELECT elements (which contain OPTION elements, representing
1286 individual list items), CHECKBOXes and RADIOs are not represented by *any*
1287 element. Instead, those controls are represented by a collection of INPUT
1288 elements. For example, this is a SELECT control, named "control1":
1290 <select name="control1">
1291 <option>foo</option>
1292 <option value="1">bar</option>
1293 </select>
1295 and this is a CHECKBOX control, named "control2":
1297 <input type="checkbox" name="control2" value="foo" id="cbe1">
1298 <input type="checkbox" name="control2" value="bar" id="cbe2">
1300 The id attribute of a CHECKBOX or RADIO ListControl is always that of its
1301 first element (for example, "cbe1" above).
1304 Additional read-only public attribute: multiple.
1307 ListControls are built up by the parser from their component items by
1308 creating one ListControl per item, consolidating them into a single master
1309 ListControl held by the HTMLForm:
1311 -User calls form.new_control(...)
1312 -Form creates Control, and calls control.add_to_form(self).
1313 -Control looks for a Control with the same name and type in the form, and
1314 if it finds one, merges itself with that control by calling
1315 control.merge_control(self). The first Control added to the form, of a
1316 particular name and type, is the only one that survives in the form.
1317 -Form calls control.fixup for all its controls. ListControls in the form
1318 know they can now safely pick their default values.
1320 To create a ListControl without an HTMLForm, use:
1322 control.merge_control(new_control)
1325 def __init__(self, type, name, attrs={}, select_default=False,
1326 called_as_base_class=False):
1328 select_default: for RADIO and multiple-selection SELECT controls, pick
1329 the first item as the default if no 'selected' HTML attribute is
1330 present
1333 if not called_as_base_class:
1334 raise NotImplementedError()
1336 self.__dict__["type"] = string.lower(type)
1337 self.__dict__["name"] = name
1338 self._value = attrs.get("value")
1339 self.disabled = False
1340 self.readonly = False
1341 self.id = attrs.get("id")
1343 self._attrs = attrs.copy()
1344 # As Controls are merged in with .merge_control(), self._attrs will
1345 # refer to each Control in turn -- always the most recently merged
1346 # control. Each merged-in Control instance corresponds to a single
1347 # list item: see ListControl.__doc__.
1348 if attrs:
1349 self._attrs_list = [self._attrs] # extended by .merge_control()
1350 self._disabled_list = [self._attrs.has_key("disabled")] # ditto
1351 else:
1352 self._attrs_list = [] # extended by .merge_control()
1353 self._disabled_list = [] # ditto
1355 self._select_default = select_default
1356 self._clicked = False
1357 # Some list controls can have their default set only after all items
1358 # are known. If so, self._value_is_set is false, and the self.fixup
1359 # method, called after all items have been added, sets the default.
1360 self._value_is_set = False
1362 def _value_from_label(self, label):
1363 raise NotImplementedError("control '%s' does not yet support "
1364 "by_label" % self.name)
1366 def toggle(self, name, by_label=False):
1367 return self._set_selected_state(name, 2, by_label)
1368 def set(self, selected, name, by_label=False):
1369 action = int(bool(selected))
1370 return self._set_selected_state(name, action, by_label)
1372 def _set_selected_state(self, name, action, by_label):
1374 name: item name
1375 action:
1376 0: clear
1377 1: set
1378 2: toggle
1381 if not isstringlike(name):
1382 raise TypeError("item name must be string-like")
1383 if self.disabled:
1384 raise AttributeError("control '%s' is disabled" % self.name)
1385 if self.readonly:
1386 raise AttributeError("control '%s' is readonly" % self.name)
1387 if by_label:
1388 name = self._value_from_label(name)
1389 try:
1390 i = self._menu.index(name)
1391 except ValueError:
1392 raise ItemNotFoundError("no item named '%s'" % name)
1394 if self.multiple:
1395 if action == 2:
1396 action = not self._selected[i]
1397 if action and self._disabled_list[i]:
1398 raise AttributeError("item '%s' is disabled" % name)
1399 self._selected[i] = bool(action)
1400 else:
1401 if action == 2:
1402 if self._selected == name:
1403 action = 0
1404 else:
1405 action = 1
1406 if action == 0 and self._selected == name:
1407 self._selected = None
1408 elif action == 1:
1409 if self._disabled_list[i]:
1410 raise AttributeError("item '%s' is disabled" % name)
1411 self._selected = name
1413 def toggle_single(self, by_label=False):
1414 self._set_single_selected_state(2, by_label)
1415 def set_single(self, selected, by_label=False):
1416 action = int(bool(selected))
1417 self._set_single_selected_state(action, by_label)
1419 def _set_single_selected_state(self, action, by_label):
1420 if len(self._menu) != 1:
1421 raise ItemCountError("'%s' is not a single-item control" %
1422 self.name)
1424 name = self._menu[0]
1425 if by_label:
1426 name = self._value_from_label(name)
1427 self._set_selected_state(name, action, by_label)
1429 def get_item_disabled(self, name, by_label=False):
1430 """Get disabled state of named list item in a ListControl."""
1431 if by_label:
1432 name = self._value_from_label(name)
1433 try:
1434 i = self._menu.index(name)
1435 except ValueError:
1436 raise ItemNotFoundError()
1437 else:
1438 return self._disabled_list[i]
1440 def set_item_disabled(self, disabled, name, by_label=False):
1441 """Set disabled state of named list item in a ListControl.
1443 disabled: boolean disabled state
1446 if by_label:
1447 name = self._value_from_label(name)
1448 try:
1449 i = self._menu.index(name)
1450 except ValueError:
1451 raise ItemNotFoundError()
1452 else:
1453 self._disabled_list[i] = bool(disabled)
1455 def set_all_items_disabled(self, disabled):
1456 """Set disabled state of all list items in a ListControl.
1458 disabled: boolean disabled state
1461 for i in range(len(self._disabled_list)):
1462 self._disabled_list[i] = bool(disabled)
1464 def get_item_attrs(self, name, by_label=False):
1465 """Return dictionary of HTML attributes for a single ListControl item.
1467 The HTML element types that describe list items are: OPTION for SELECT
1468 controls, INPUT for the rest. These elements have HTML attributes that
1469 you may occasionally want to know about -- for example, the "alt" HTML
1470 attribute gives a text string describing the item (graphical browsers
1471 usually display this as a tooltip).
1473 The returned dictionary maps HTML attribute names to values. The names
1474 and values are taken from the original HTML.
1476 Note that for SELECT controls, the returned dictionary contains a
1477 special key "contents" -- see SelectControl.__doc__.
1480 if by_label:
1481 name = self._value_from_label(name)
1482 try:
1483 i = self._menu.index(name)
1484 except ValueError:
1485 raise ItemNotFoundError()
1486 return self._attrs_list[i]
1488 def add_to_form(self, form):
1489 try:
1490 control = form.find_control(self.name, self.type)
1491 except ControlNotFoundError:
1492 Control.add_to_form(self, form)
1493 else:
1494 control.merge_control(self)
1496 def merge_control(self, control):
1497 assert bool(control.multiple) == bool(self.multiple)
1498 assert isinstance(control, self.__class__)
1499 self._menu.extend(control._menu)
1500 self._attrs_list.extend(control._attrs_list)
1501 self._disabled_list.extend(control._disabled_list)
1502 if control.multiple:
1503 self._selected.extend(control._selected)
1504 else:
1505 if control._value_is_set:
1506 self._selected = control._selected
1507 if control._value_is_set:
1508 self._value_is_set = True
1510 def fixup(self):
1512 ListControls are built up from component list items (which are also
1513 ListControls) during parsing. This method should be called after all
1514 items have been added. See ListControl.__doc__ for the reason this is
1515 required.
1518 # Need to set default selection where no item was indicated as being
1519 # selected by the HTML:
1521 # CHECKBOX:
1522 # Nothing should be selected.
1523 # SELECT/single, SELECT/multiple and RADIO:
1524 # RFC 1866 (HTML 2.0): says first item should be selected.
1525 # W3C HTML 4.01 Specification: says that client behaviour is
1526 # undefined in this case. For RADIO, exactly one must be selected,
1527 # though which one is undefined.
1528 # Both Netscape and Microsoft Internet Explorer (IE) choose first
1529 # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
1530 # and Firebird 0.6) leave all items unselected for RADIO and
1531 # SELECT/multiple.
1533 # Since both Netscape and IE all choose the first item for
1534 # SELECT/single, we do the same. OTOH, both Netscape and IE
1535 # leave SELECT/multiple with nothing selected, in violation of RFC 1866
1536 # (but not in violation of the W3C HTML 4 standard); the same is true
1537 # of RADIO (which *is* in violation of the HTML 4 standard). We follow
1538 # RFC 1866 if the select_default attribute is set, and Netscape and IE
1539 # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
1540 # can deselect all items in a RadioControl.
1542 raise NotImplementedError()
1544 def __getattr__(self, name):
1545 if name == "value":
1546 menu = self._menu
1547 if self.multiple:
1548 values = []
1549 for i in range(len(menu)):
1550 if self._selected[i]: values.append(menu[i])
1551 return values
1552 else:
1553 if self._selected is None: return []
1554 else: return [self._selected]
1555 else:
1556 raise AttributeError("%s instance has no attribute '%s'" %
1557 (self.__class__.__name__, name))
1559 def __setattr__(self, name, value):
1560 if name == "value":
1561 if self.disabled:
1562 raise AttributeError("control '%s' is disabled" % self.name)
1563 if self.readonly:
1564 raise AttributeError("control '%s' is readonly" % self.name)
1565 self._set_value(value)
1566 elif name in ("name", "type", "multiple"):
1567 raise AttributeError("%s attribute is readonly" % name)
1568 else:
1569 self.__dict__[name] = value
1571 def _set_value(self, value):
1572 if self.multiple:
1573 self._multiple_set_value(value)
1574 else:
1575 self._single_set_value(value)
1577 def _single_set_value(self, value):
1578 if value is None or isstringlike(value):
1579 raise TypeError("ListControl, must set a sequence")
1580 nr = len(value)
1581 if not (0 <= nr <= 1):
1582 raise ItemCountError("single selection list, must set sequence of "
1583 "length 0 or 1")
1585 if nr == 0:
1586 self._selected = None
1587 else:
1588 value = value[0]
1589 try:
1590 i = self._menu.index(value)
1591 except ValueError:
1592 raise ItemNotFoundError("no item named '%s'" %
1593 repr(value))
1594 if self._disabled_list[i]:
1595 raise AttributeError("item '%s' is disabled" % value)
1596 self._selected = value
1598 def _multiple_set_value(self, value):
1599 if value is None or isstringlike(value):
1600 raise TypeError("ListControl, must set a sequence")
1602 selected = [False]*len(self._selected)
1603 menu = self._menu
1604 disabled_list = self._disabled_list
1606 for v in value:
1607 found = False
1608 for i in range(len(menu)):
1609 item_name = menu[i]
1610 if v == item_name:
1611 if disabled_list[i]:
1612 raise AttributeError("item '%s' is disabled" % value)
1613 selected[i] = True
1614 found = True
1615 break
1616 if not found:
1617 raise ItemNotFoundError("no item named '%s'" % repr(v))
1618 self._selected = selected
1620 def set_value_by_label(self, value):
1621 raise NotImplementedError("control '%s' does not yet support "
1622 "by_label" % self.name)
1623 def get_value_by_label(self):
1624 raise NotImplementedError("control '%s' does not yet support "
1625 "by_label" % self.name)
1627 def possible_items(self, by_label=False):
1628 if by_label:
1629 raise NotImplementedError(
1630 "control '%s' does not yet support by_label" % self.name)
1631 return copy.copy(self._menu)
1633 def pairs(self):
1634 if self.disabled:
1635 return []
1637 if not self.multiple:
1638 name = self.name
1639 value = self._selected
1640 if name is None or value is None:
1641 return []
1642 return [(name, value)]
1643 else:
1644 control_name = self.name # usually the name HTML attribute
1645 pairs = []
1646 for i in range(len(self._menu)):
1647 item_name = self._menu[i] # usually the value HTML attribute
1648 if self._selected[i]:
1649 pairs.append((control_name, item_name))
1650 return pairs
1652 def _item_str(self, i):
1653 item_name = self._menu[i]
1654 if self.multiple:
1655 if self._selected[i]:
1656 item_name = "*"+item_name
1657 else:
1658 if self._selected == item_name:
1659 item_name = "*"+item_name
1660 if self._disabled_list[i]:
1661 item_name = "(%s)" % item_name
1662 return item_name
1664 def __str__(self):
1665 name = self.name
1666 if name is None: name = "<None>"
1668 display = []
1669 for i in range(len(self._menu)):
1670 s = self._item_str(i)
1671 display.append(s)
1673 infos = []
1674 if self.disabled: infos.append("disabled")
1675 if self.readonly: infos.append("readonly")
1676 info = string.join(infos, ", ")
1677 if info: info = " (%s)" % info
1679 return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
1680 name, string.join(display, ", "), info)
1683 class RadioControl(ListControl):
1685 Covers:
1687 INPUT/RADIO
1690 def __init__(self, type, name, attrs, select_default=False):
1691 ListControl.__init__(self, type, name, attrs, select_default,
1692 called_as_base_class=True)
1693 self.__dict__["multiple"] = False
1694 value = attrs.get("value", "on")
1695 self._menu = [value]
1696 checked = attrs.has_key("checked")
1697 if checked:
1698 self._value_is_set = True
1699 self._selected = value
1700 else:
1701 self._selected = None
1703 def fixup(self):
1704 if not self._value_is_set:
1705 # no item explicitly selected
1706 assert self._selected is None
1707 if self._select_default:
1708 self._selected = self._menu[0]
1709 self._value_is_set = True
1712 class CheckboxControl(ListControl):
1714 Covers:
1716 INPUT/CHECKBOX
1719 def __init__(self, type, name, attrs, select_default=False):
1720 ListControl.__init__(self, type, name, attrs, select_default,
1721 called_as_base_class=True)
1722 self.__dict__["multiple"] = True
1723 value = attrs.get("value", "on")
1724 self._menu = [value]
1725 checked = attrs.has_key("checked")
1726 self._selected = [checked]
1727 self._value_is_set = True
1729 def fixup(self):
1730 # If no items were explicitly checked in HTML, that's how we must
1731 # leave it, so we have nothing to do here.
1732 assert self._value_is_set
1735 class SelectControl(ListControl):
1737 Covers:
1739 SELECT (and OPTION)
1741 SELECT control values and labels are subject to some messy defaulting
1742 rules. For example, if the HTML repreentation of the control is:
1744 <SELECT name=year>
1745 <OPTION value=0 label="2002">current year</OPTION>
1746 <OPTION value=1>2001</OPTION>
1747 <OPTION>2000</OPTION>
1748 </SELECT>
1750 The items, in order, have labels "2002", "2001" and "2000", whereas their
1751 values are "0", "1" and "2000" respectively. Note that the value of the
1752 last OPTION in this example defaults to its contents, as specified by RFC
1753 1866, as do the labels of the second and third OPTIONs.
1755 The purpose of these methods is that the OPTION labels are sometimes much
1756 more meaningful, than are the OPTION values, which can make for more
1757 maintainable code.
1759 Additional read-only public attribute: attrs
1761 The attrs attribute is a dictionary of the original HTML attributes of the
1762 SELECT element. Other ListControls do not have this attribute, because in
1763 other cases the control as a whole does not correspond to any single HTML
1764 element. The get_item_attrs method may be used as usual to get at the
1765 HTML attributes of the HTML elements corresponding to individual list items
1766 (for SELECT controls, these are OPTION elements).
1768 Another special case is that the attributes dictionaries returned by
1769 get_item_attrs have a special key "contents" which does not correspond to
1770 any real HTML attribute, but rather contains the contents of the OPTION
1771 element:
1773 <OPTION>this bit</OPTION>
1776 # HTML attributes here are treated slightly from other list controls:
1777 # -The SELECT HTML attributes dictionary is stuffed into the OPTION
1778 # HTML attributes dictionary under the "__select" key.
1779 # -The content of each OPTION element is stored under the special
1780 # "contents" key of the dictionary.
1781 # After all this, the dictionary is passed to the SelectControl constructor
1782 # as the attrs argument, as usual. However:
1783 # -The first SelectControl constructed when building up a SELECT control
1784 # has a constructor attrs argument containing only the __select key -- so
1785 # this SelectControl represents an empty SELECT control.
1786 # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
1787 # the __select dictionary containing the SELECT HTML-attributes.
1788 def __init__(self, type, name, attrs, select_default=False):
1789 # fish out the SELECT HTML attributes from the OPTION HTML attributes
1790 # dictionary
1791 self.attrs = attrs["__select"].copy()
1792 attrs = attrs.copy()
1793 del attrs["__select"]
1795 ListControl.__init__(self, type, name, attrs, select_default,
1796 called_as_base_class=True)
1798 self._label_map = None
1799 self.disabled = self.attrs.has_key("disabled")
1800 self.id = self.attrs.get("id")
1802 self._menu = []
1803 self._selected = []
1804 self._value_is_set = False
1805 if self.attrs.has_key("multiple"):
1806 self.__dict__["multiple"] = True
1807 self._selected = []
1808 else:
1809 self.__dict__["multiple"] = False
1810 self._selected = None
1812 if attrs: # OPTION item data was provided
1813 value = attrs["value"]
1814 self._menu.append(value)
1815 selected = attrs.has_key("selected")
1816 if selected:
1817 self._value_is_set = True
1818 if self.attrs.has_key("multiple"):
1819 self._selected.append(selected)
1820 elif selected:
1821 self._selected = value
1823 def _build_select_label_map(self):
1824 """Return an ordered mapping of labels to values.
1826 For example, if the HTML repreentation of the control is as given in
1827 SelectControl.__doc__, this function will return a mapping like:
1829 {"2002": "0", "2001": "1", "2000": "2000"}
1832 alist = []
1833 for val in self._menu:
1834 attrs = self.get_item_attrs(val)
1835 alist.append((attrs["label"], val))
1836 return AList(alist)
1838 def _value_from_label(self, label):
1839 try:
1840 return self._label_map[label]
1841 except KeyError:
1842 raise ItemNotFoundError("no item has label '%s'" % label)
1844 def fixup(self):
1845 if not self._value_is_set:
1846 # No item explicitly selected.
1847 if len(self._menu) > 0:
1848 if self.multiple:
1849 if self._select_default:
1850 self._selected[0] = True
1851 else:
1852 assert self._selected is None
1853 self._selected = self._menu[0]
1854 self._value_is_set = True
1855 self._label_map = self._build_select_label_map()
1857 def possible_items(self, by_label=False):
1858 if not by_label:
1859 return copy.copy(self._menu)
1860 else:
1861 self._label_map.set_inverted(True)
1862 try:
1863 r = map(lambda v, self=self: self._label_map[v], self._menu)
1864 finally:
1865 self._label_map.set_inverted(False)
1866 return r
1868 def set_value_by_label(self, value):
1869 if isstringlike(value):
1870 raise TypeError("ListControl, must set a sequence, not a string")
1871 if self.disabled:
1872 raise AttributeError("control '%s' is disabled" % self.name)
1873 if self.readonly:
1874 raise AttributeError("control '%s' is readonly" % self.name)
1876 try:
1877 value = map(lambda v, self=self: self._label_map[v], value)
1878 except KeyError, e:
1879 raise ItemNotFoundError("no item has label '%s'" % e.args[0])
1880 self._set_value(value)
1882 def get_value_by_label(self):
1883 menu = self._menu
1884 self._label_map.set_inverted(True)
1885 try:
1886 if self.multiple:
1887 values = []
1888 for i in range(len(menu)):
1889 if self._selected[i]:
1890 values.append(self._label_map[menu[i]])
1891 return values
1892 else:
1893 return [self._label_map[self._selected]]
1894 finally:
1895 self._label_map.set_inverted(False)
1898 #---------------------------------------------------
1899 class SubmitControl(ScalarControl):
1901 Covers:
1903 INPUT/SUBMIT
1904 BUTTON/SUBMIT
1907 def __init__(self, type, name, attrs):
1908 ScalarControl.__init__(self, type, name, attrs)
1909 # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
1910 # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
1911 # to define this.
1912 if self.value is None: self.value = ""
1913 self.readonly = True
1915 def _click(self, form, coord, return_type):
1916 self._clicked = coord
1917 r = form._switch_click(return_type)
1918 self._clicked = False
1919 return r
1921 def pairs(self):
1922 if not self._clicked:
1923 return []
1924 return ScalarControl.pairs(self)
1927 #---------------------------------------------------
1928 class ImageControl(SubmitControl):
1930 Covers:
1932 INPUT/IMAGE
1934 The value attribute of an ImageControl is always None. Coordinates are
1935 specified using one of the HTMLForm.click* methods.
1938 def __init__(self, type, name, attrs):
1939 ScalarControl.__init__(self, type, name, attrs)
1940 self.__dict__["value"] = None
1942 def __setattr__(self, name, value):
1943 if name in ("value", "name", "type"):
1944 raise AttributeError("%s attribute is readonly" % name)
1945 else:
1946 self.__dict__[name] = value
1948 def pairs(self):
1949 clicked = self._clicked
1950 if self.disabled or not clicked:
1951 return []
1952 name = self.name
1953 if name is None: return []
1954 return [("%s.x" % name, str(clicked[0])),
1955 ("%s.y" % name, str(clicked[1]))]
1958 # aliases, just to make str(control) and str(form) clearer
1959 class PasswordControl(TextControl): pass
1960 class HiddenControl(TextControl): pass
1961 class TextareaControl(TextControl): pass
1962 class SubmitButtonControl(SubmitControl): pass
1965 def is_listcontrol(control): return isinstance(control, ListControl)
1968 class HTMLForm:
1969 """Represents a single HTML <form> ... </form> element.
1971 A form consists of a sequence of controls that usually have names, and
1972 which can take on various values. The values of the various types of
1973 controls represent variously: text, zero-, one- or many-of-many choices,
1974 and files to be uploaded.
1976 Forms can be filled in with data to be returned to the server, and then
1977 submitted, using the click method to generate a request object suitable for
1978 passing to urllib2.urlopen (or the click_request_data or click_pairs
1979 methods if you're not using urllib2).
1981 import ClientForm
1982 forms = ClientForm.ParseFile(html, base_uri)
1983 form = forms[0]
1985 form["query"] = "Python"
1986 form.set("lots", "nr_results")
1988 response = urllib2.urlopen(form.click())
1990 Usually, HTMLForm instances are not created directly. Instead, the
1991 ParseFile or ParseResponse factory functions are used. If you do construct
1992 HTMLForm objects yourself, however, note that an HTMLForm instance is only
1993 properly initialised after the fixup method has been called (ParseFile and
1994 ParseResponse do this for you). See ListControl.__doc__ for the reason
1995 this is required.
1997 Indexing a form (form["control_name"]) returns the named Control's value
1998 attribute. Assignment to a form index (form["control_name"] = something)
1999 is equivalent to assignment to the named Control's value attribute. If you
2000 need to be more specific than just supplying the control's name, use the
2001 set_value and get_value methods.
2003 ListControl values are lists of item names. The list item's name is the
2004 value of the corresponding HTML element's "value" attribute.
2006 Example:
2008 <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
2009 <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
2011 defines a CHECKBOX control with name "cheeses" which has two items, named
2012 "leicester" and "cheddar".
2014 Another example:
2016 <SELECT name="more_cheeses">
2017 <OPTION>1</OPTION>
2018 <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
2019 </SELECT>
2021 defines a SELECT control with name "more_cheeses" which has two items,
2022 named "1" and "2".
2024 To set, clear or toggle individual list items, use the set and toggle
2025 methods. To set the whole value, do as for any other control:use indexing
2026 or the set_/get_value methods.
2028 Example:
2030 # select *only* the item named "cheddar"
2031 form["cheeses"] = ["cheddar"]
2032 # select "cheddar", leave other items unaffected
2033 form.set("cheddar", "cheeses")
2035 Some controls (RADIO and SELECT without the multiple attribute) can only
2036 have zero or one items selected at a time. Some controls (CHECKBOX and
2037 SELECT with the multiple attribute) can have multiple items selected at a
2038 time. To set the whole value of a multiple-selection ListControl, assign a
2039 sequence to a form index:
2041 form["cheeses"] = ["cheddar", "leicester"]
2043 To check whether a control has an item, or whether an item is selected,
2044 respectively:
2046 "cheddar" in form.possible_items("cheeses")
2047 "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
2049 Note that some items may be disabled (see below).
2051 Note the following mistake:
2053 form[control_name] = control_value
2054 assert form[control_name] == control_value # not necessarily true
2056 The reason for this is that form[control_name] always gives the list items
2057 in the order they were listed in the HTML.
2059 List items (hence list values, too) can be referred to in terms of list
2060 item labels rather than list item names. Currently, this is only possible
2061 for SELECT controls (this is a bug). To use this feature, use the by_label
2062 arguments to the various HTMLForm methods. Note that it is *item* names
2063 (hence ListControl values also), not *control* names, that can be referred
2064 to by label.
2066 The question of default values of OPTION contents, labels and values is
2067 somewhat complicated: see SelectControl.__doc__ and
2068 ListControl.get_item_attrs.__doc__ if you think you need to know.
2070 Controls can be disabled or readonly. In either case, the control's value
2071 cannot be changed until you clear those flags (using the methods on
2072 HTMLForm). Disabled is the state typically represented by browsers by
2073 `greying out' a control. Disabled controls are not `successful' -- they
2074 don't cause data to get returned to the server. Readonly controls usually
2075 appear in browsers as read-only text boxes. Readonly controls are
2076 successful. List items can also be disabled. Attempts to select disabled
2077 items (with form[name] = value, or using the ListControl.set method, for
2078 example) fail. Attempts to clear disabled items are allowed.
2080 If a lot of controls are readonly, it can be useful to do this:
2082 form.set_all_readonly(False)
2084 When you want to do several things with a single control, or want to do
2085 less common things, like changing which controls and items are disabled,
2086 you can get at a particular control:
2088 control = form.find_control("cheeses")
2089 control.set_item_disabled(False, "gruyere")
2090 control.set("gruyere")
2092 Most methods on HTMLForm just delegate to the contained controls, so see
2093 the docstrings of the various Control classes for further documentation.
2094 Most of these delegating methods take name, type, kind, id and nr arguments
2095 to specify the control to be operated on: see
2096 HTMLForm.find_control.__doc__.
2098 ControlNotFoundError (subclass of ValueError) is raised if the specified
2099 control can't be found. This includes occasions where a non-ListControl
2100 is found, but the method (set, for example) requires a ListControl.
2101 ItemNotFoundError (subclass of ValueError) is raised if a list item can't
2102 be found. ItemCountError (subclass of ValueError) is raised if an attempt
2103 is made to select more than one item and the control doesn't allow that, or
2104 set/get_single are called and the control contains more than one item.
2105 AttributeError is raised if a control or item is readonly or disabled and
2106 an attempt is made to alter its value.
2108 XXX CheckBoxControl and RadioControl don't yet support item access by label
2110 Security note: Remember that any passwords you store in HTMLForm instances
2111 will be saved to disk in the clear if you pickle them (directly or
2112 indirectly). The simplest solution to this is to avoid pickling HTMLForm
2113 objects. You could also pickle before filling in any password, or just set
2114 the password to "" before pickling.
2117 Public attributes:
2119 action: full (absolute URI) form action
2120 method: "GET" or "POST"
2121 enctype: form transfer encoding MIME type
2122 name: name of form (None if no name was specified)
2123 attrs: dictionary mapping original HTML form attributes to their values
2125 controls: list of Control instances; do not alter this list
2126 (instead, call form.new_control to make a Control and add it to the
2127 form, or control.add_to_form if you already have a Control instance)
2131 Methods for form filling:
2132 -------------------------
2134 Most of the these methods have very similar arguments. See
2135 HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
2136 arguments. See above for a description of by_label.
2138 def find_control(self,
2139 name=None, type=None, kind=None, id=None, predicate=None,
2140 nr=None)
2142 get_value(name=None, type=None, kind=None, id=None, nr=None,
2143 by_label=False)
2144 set_value(value,
2145 name=None, type=None, kind=None, id=None, nr=None,
2146 by_label=False)
2148 set_all_readonly(readonly)
2151 Methods applying only to ListControls:
2153 possible_items(name=None, type=None, kind=None, id=None, nr=None,
2154 by_label=False)
2156 set(selected, item_name,
2157 name=None, type=None, kind=None, id=None, nr=None,
2158 by_label=False)
2159 toggle(item_name,
2160 name=None, type=None, id=None, nr=None,
2161 by_label=False)
2163 set_single(selected,
2164 name=None, type=None, kind=None, id=None, nr=None,
2165 by_label=False)
2166 toggle_single(name=None, type=None, kind=None, id=None, nr=None,
2167 by_label=False)
2170 Method applying only to FileControls:
2172 add_file(file_object,
2173 content_type="application/octet-stream", filename=None,
2174 name=None, id=None, nr=None)
2177 Methods applying only to clickable controls:
2179 click(name=None, type=None, id=None, nr=0, coord=(1,1))
2180 click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
2181 click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
2185 type2class = {
2186 "text": TextControl,
2187 "password": PasswordControl,
2188 "hidden": HiddenControl,
2189 "textarea": TextareaControl,
2191 "isindex": IsindexControl,
2193 "file": FileControl,
2195 "button": IgnoreControl,
2196 "buttonbutton": IgnoreControl,
2197 "reset": IgnoreControl,
2198 "resetbutton": IgnoreControl,
2200 "submit": SubmitControl,
2201 "submitbutton": SubmitButtonControl,
2202 "image": ImageControl,
2204 "radio": RadioControl,
2205 "checkbox": CheckboxControl,
2206 "select": SelectControl,
2209 #---------------------------------------------------
2210 # Initialisation. Use ParseResponse / ParseFile instead.
2212 def __init__(self, action, method="GET",
2213 enctype="application/x-www-form-urlencoded",
2214 name=None, attrs=None):
2216 In the usual case, use ParseResponse (or ParseFile) to create new
2217 HTMLForm objects.
2219 action: full (absolute URI) form action
2220 method: "GET" or "POST"
2221 enctype: form transfer encoding MIME type
2222 name: name of form
2223 attrs: dictionary mapping original HTML form attributes to their values
2226 self.action = action
2227 self.method = method
2228 self.enctype = enctype
2229 self.name = name
2230 if attrs is not None:
2231 self.attrs = attrs.copy()
2232 else:
2233 self.attrs = {}
2234 self.controls = []
2236 def new_control(self, type, name, attrs,
2237 ignore_unknown=False, select_default=False):
2238 """Adds a new control to the form.
2240 This is usually called by ParseFile and ParseResponse. Don't call it
2241 youself unless you're building your own Control instances.
2243 Note that controls representing lists of items are built up from
2244 controls holding only a single list item. See ListControl.__doc__ for
2245 further information.
2247 type: type of control (see Control.__doc__ for a list)
2248 attrs: HTML attributes of control
2249 ignore_unknown: if true, use a dummy Control instance for controls of
2250 unknown type; otherwise, raise ValueError
2251 select_default: for RADIO and multiple-selection SELECT controls, pick
2252 the first item as the default if no 'selected' HTML attribute is
2253 present (this defaulting happens when the HTMLForm.fixup method is
2254 called)
2257 type = string.lower(type)
2258 klass = self.type2class.get(type)
2259 if klass is None:
2260 if ignore_unknown:
2261 klass = IgnoreControl
2262 else:
2263 raise ValueError("Unknown control type '%s'" % type)
2265 a = attrs.copy()
2266 if issubclass(klass, ListControl):
2267 control = klass(type, name, a, select_default)
2268 else:
2269 control = klass(type, name, a)
2270 control.add_to_form(self)
2272 def fixup(self):
2273 """Normalise form after all controls have been added.
2275 This is usually called by ParseFile and ParseResponse. Don't call it
2276 youself unless you're building your own Control instances.
2278 This method should only be called once, after all controls have been
2279 added to the form.
2282 for control in self.controls:
2283 control.fixup()
2285 #---------------------------------------------------
2286 def __str__(self):
2287 header = "%s %s %s" % (self.method, self.action, self.enctype)
2288 rep = [header]
2289 for control in self.controls:
2290 rep.append(" %s" % str(control))
2291 return "<%s>" % string.join(rep, "\n")
2293 #---------------------------------------------------
2294 # Form-filling methods.
2296 def __getitem__(self, name):
2297 return self.find_control(name).value
2298 def __setitem__(self, name, value):
2299 control = self.find_control(name)
2300 try:
2301 control.value = value
2302 except AttributeError, e:
2303 raise ValueError(str(e))
2305 def get_value(self,
2306 name=None, type=None, kind=None, id=None, nr=None,
2307 by_label=False):
2308 """Return value of control.
2310 If only name and value arguments are supplied, equivalent to
2312 form[name]
2315 c = self.find_control(name, type, kind, id, nr=nr)
2316 if by_label:
2317 try:
2318 meth = c.get_value_by_label
2319 except AttributeError:
2320 raise NotImplementedError(
2321 "control '%s' does not yet support by_label" % c.name)
2322 else:
2323 return meth()
2324 else:
2325 return c.value
2326 def set_value(self, value,
2327 name=None, type=None, kind=None, id=None, nr=None,
2328 by_label=False):
2329 """Set value of control.
2331 If only name and value arguments are supplied, equivalent to
2333 form[name] = value
2336 c = self.find_control(name, type, kind, id, nr=nr)
2337 if by_label:
2338 try:
2339 meth = c.set_value_by_label
2340 except AttributeError:
2341 raise NotImplementedError(
2342 "control '%s' does not yet support by_label" % c.name)
2343 else:
2344 meth(value)
2345 else:
2346 c.value = value
2348 def set_all_readonly(self, readonly):
2349 for control in self.controls:
2350 control.readonly = bool(readonly)
2353 #---------------------------------------------------
2354 # Form-filling methods applying only to ListControls.
2356 def possible_items(self,
2357 name=None, type=None, kind=None, id=None, nr=None,
2358 by_label=False):
2359 """Return a list of all values that the specified control can take."""
2360 c = self._find_list_control(name, type, kind, id, nr)
2361 return c.possible_items(by_label)
2363 def set(self, selected, item_name,
2364 name=None, type=None, kind=None, id=None, nr=None,
2365 by_label=False):
2366 """Select / deselect named list item.
2368 selected: boolean selected state
2371 self._find_list_control(name, type, kind, id, nr).set(
2372 selected, item_name, by_label)
2373 def toggle(self, item_name,
2374 name=None, type=None, kind=None, id=None, nr=None,
2375 by_label=False):
2376 """Toggle selected state of named list item."""
2377 self._find_list_control(name, type, kind, id, nr).toggle(
2378 item_name, by_label)
2380 def set_single(self, selected,
2381 name=None, type=None, kind=None, id=None, nr=None,
2382 by_label=False):
2383 """Select / deselect list item in a control having only one item.
2385 If the control has multiple list items, ItemCountError is raised.
2387 This is just a convenience method, so you don't need to know the item's
2388 name -- the item name in these single-item controls is usually
2389 something meaningless like "1" or "on".
2391 For example, if a checkbox has a single item named "on", the following
2392 two calls are equivalent:
2394 control.toggle("on")
2395 control.toggle_single()
2398 self._find_list_control(name, type, kind, id, nr).set_single(
2399 selected, by_label)
2400 def toggle_single(self, name=None, type=None, kind=None, id=None, nr=None,
2401 by_label=False):
2402 """Toggle selected state of list item in control having only one item.
2404 The rest is as for HTMLForm.set_single.__doc__.
2407 self._find_list_control(name, type, kind, id, nr).toggle_single(
2408 by_label)
2410 #---------------------------------------------------
2411 # Form-filling method applying only to FileControls.
2413 def add_file(self, file_object, content_type=None, filename=None,
2414 name=None, id=None, nr=None):
2415 """Add a file to be uploaded.
2417 file_object: file-like object (with read method) from which to read
2418 data to upload
2419 content_type: MIME content type of data to upload
2420 filename: filename to pass to server
2422 If filename is None, no filename is sent to the server.
2424 If content_type is None, the content type is guessed based on the
2425 filename and the data from read from the file object.
2428 At the moment, guessed content type is always application/octet-stream.
2429 Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
2430 plain text.
2433 self.find_control(name, "file", id=id, nr=nr).add_file(
2434 file_object, content_type, filename)
2436 #---------------------------------------------------
2437 # Form submission methods, applying only to clickable controls.
2439 def click(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
2440 """Return request that would result from clicking on a control.
2442 The request object is a urllib2.Request instance, which you can pass to
2443 urllib2.urlopen (or ClientCookie.urlopen).
2445 Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
2446 IMAGEs) can be clicked.
2448 Will click on the first clickable control, subject to the name, type
2449 and nr arguments (as for find_control). If no name, type, id or number
2450 is specified and there are no clickable controls, a request will be
2451 returned for the form in its current, un-clicked, state.
2453 IndexError is raised if any of name, type, id or nr is specified but no
2454 matching control is found. ValueError is raised if the HTMLForm has an
2455 enctype attribute that is not recognised.
2457 You can optionally specify a coordinate to click at, which only makes a
2458 difference if you clicked on an image.
2461 return self._click(name, type, id, nr, coord, "request")
2463 def click_request_data(self,
2464 name=None, type=None, id=None, nr=0, coord=(1,1)):
2465 """As for click method, but return a tuple (url, data, headers).
2467 You can use this data to send a request to the server. This is useful
2468 if you're using httplib or urllib rather than urllib2. Otherwise, use
2469 the click method.
2471 # Untested. Have to subclass to add headers, I think -- so use urllib2
2472 # instead!
2473 import urllib
2474 url, data, hdrs = form.click_request_data()
2475 r = urllib.urlopen(url, data)
2477 # Untested. I don't know of any reason to use httplib -- you can get
2478 # just as much control with urllib2.
2479 import httplib, urlparse
2480 url, data, hdrs = form.click_request_data()
2481 tup = urlparse(url)
2482 host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
2483 conn = httplib.HTTPConnection(host)
2484 if data:
2485 httplib.request("POST", path, data, hdrs)
2486 else:
2487 httplib.request("GET", path, headers=hdrs)
2488 r = conn.getresponse()
2491 return self._click(name, type, id, nr, coord, "request_data")
2493 def click_pairs(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
2494 """As for click_request_data, but returns a list of (key, value) pairs.
2496 You can use this list as an argument to ClientForm.urlencode. This is
2497 usually only useful if you're using httplib or urllib rather than
2498 urllib2 or ClientCookie. It may also be useful if you want to manually
2499 tweak the keys and/or values, but this should not be necessary.
2500 Otherwise, use the click method.
2502 Note that this method is only useful for forms of MIME type
2503 x-www-form-urlencoded. In particular, it does not return the
2504 information required for file upload. If you need file upload and are
2505 not using urllib2, use click_request_data.
2507 Also note that Python 2.0's urllib.urlencode is slightly broken: it
2508 only accepts a mapping, not a sequence of pairs, as an argument. This
2509 messes up any ordering in the argument. Use ClientForm.urlencode
2510 instead.
2513 return self._click(name, type, id, nr, coord, "pairs")
2515 #---------------------------------------------------
2517 def find_control(self,
2518 name=None, type=None, kind=None, id=None, predicate=None,
2519 nr=None):
2520 """Locate some specific control within the form.
2522 At least one of the name, type, kind, predicate and nr arguments must
2523 be supplied. If no matching control is found, ControlNotFoundError is
2524 raised.
2526 If name is specified, then the control must have the indicated name.
2528 If type is specified then the control must have the specified type (in
2529 addition to the types possible for <input> HTML tags: "text",
2530 "password", "hidden", "submit", "image", "button", "radio", "checkbox",
2531 "file" we also have "reset", "buttonbutton", "submitbutton",
2532 "resetbutton", "textarea", "select" and "isindex").
2534 If kind is specified, then the control must fall into the specified
2535 group, each of which satisfies a particular interface. The types are
2536 "text", "list", "multilist", "singlelist", "clickable" and "file".
2538 If id is specified, then the control must have the indicated id.
2540 If predicate is specified, then the control must match that function.
2541 The predicate function is passed the control as its single argument,
2542 and should return a boolean value indicating whether the control
2543 matched.
2545 nr, if supplied, is the sequence number of the control (where 0 is the
2546 first). Note that control 0 is the first control matching all the
2547 other arguments (if supplied); it is not necessarily the first control
2548 in the form.
2551 if ((name is None) and (type is None) and (kind is None) and
2552 (id is None) and (predicate is None) and (nr is None)):
2553 raise ValueError(
2554 "at least one argument must be supplied to specify control")
2555 if nr is None: nr = 0
2557 return self._find_control(name, type, kind, id, predicate, nr)
2559 #---------------------------------------------------
2560 # Private methods.
2562 def _find_list_control(self,
2563 name=None, type=None, kind=None, id=None, nr=None):
2564 if ((name is None) and (type is None) and (kind is None) and
2565 (id is None) and (nr is None)):
2566 raise ValueError(
2567 "at least one argument must be supplied to specify control")
2568 if nr is None: nr = 0
2570 return self._find_control(name, type, kind, id, is_listcontrol, nr)
2572 def _find_control(self, name, type, kind, id, predicate, nr):
2573 if (name is not None) and not isstringlike(name):
2574 raise TypeError("control name must be string-like")
2575 if (type is not None) and not isstringlike(type):
2576 raise TypeError("control type must be string-like")
2577 if (kind is not None) and not isstringlike(kind):
2578 raise TypeError("control kind must be string-like")
2579 if (id is not None) and not isstringlike(id):
2580 raise TypeError("control id must be string-like")
2581 if (predicate is not None) and not callable(predicate):
2582 raise TypeError("control predicate must be callable")
2583 if nr < 0: raise ValueError("control number must be a positive "
2584 "integer")
2586 orig_nr = nr
2588 for control in self.controls:
2589 if name is not None and name != control.name:
2590 continue
2591 if type is not None and type != control.type:
2592 continue
2593 if (kind is not None and
2594 not self._is_control_in_kind(control, kind)):
2595 continue
2596 if id is not None and id != control.id:
2597 continue
2598 if predicate and not predicate(control):
2599 continue
2600 if nr:
2601 nr = nr - 1
2602 continue
2603 return control
2605 description = []
2606 if name is not None: description.append("name '%s'" % name)
2607 if type is not None: description.append("type '%s'" % type)
2608 if kind is not None: description.append("kind '%s'" % kind)
2609 if id is not None: description.append("id '%s'" % id)
2610 if predicate is not None:
2611 description.append("matching predicate %s" % predicate)
2612 if orig_nr: description.append("nr %d" % orig_nr)
2613 description = string.join(description, ", ")
2614 raise ControlNotFoundError("no control with "+description)
2616 def _is_control_in_kind(self, control, kind):
2617 # XXX not OO
2618 if kind == "list":
2619 return isinstance(control, ListControl)
2620 elif kind == "multilist":
2621 return bool(isinstance(control, ListControl) and control.multiple)
2622 elif kind == "singlelist":
2623 return bool(isinstance(control, ListControl) and
2624 not control.multiple)
2625 elif kind == "file":
2626 return isinstance(control, FileControl)
2627 elif kind == "text":
2628 return isinstance(control, TextControl)
2629 elif kind == "clickable":
2630 return (isinstance(control, SubmitControl) or
2631 isinstance(control, IsindexControl))
2632 else:
2633 raise ValueError("no such control kind '%s'" % kind)
2635 def _click(self, name, type, id, nr, coord, return_type):
2636 try:
2637 control = self._find_control(name, type, "clickable", id, None, nr)
2638 except ControlNotFoundError:
2639 if ((name is not None) or (type is not None) or (id is not None) or
2640 (nr != 0)):
2641 raise
2642 # no clickable controls, but no control was explicitly requested,
2643 # so return state without clicking any control
2644 return self._switch_click(return_type)
2645 else:
2646 return control._click(self, coord, return_type)
2648 def _pairs(self):
2649 """Return sequence of (key, value) pairs suitable for urlencoding."""
2650 pairs = []
2651 for control in self.controls:
2652 pairs.extend(control.pairs())
2653 return pairs
2655 def _request_data(self):
2656 """Return a tuple (url, data, headers)."""
2657 method = string.upper(self.method)
2658 if method == "GET":
2659 if self.enctype != "application/x-www-form-urlencoded":
2660 raise ValueError(
2661 "unknown GET form encoding type '%s'" % self.enctype)
2662 uri = "%s?%s" % (self.action, urlencode(self._pairs()))
2663 return uri, None, []
2664 elif method == "POST":
2665 if self.enctype == "application/x-www-form-urlencoded":
2666 return (self.action, urlencode(self._pairs()),
2667 [("Content-type", self.enctype)])
2668 elif self.enctype == "multipart/form-data":
2669 data = StringIO()
2670 http_hdrs = []
2671 mw = MimeWriter(data, http_hdrs)
2672 f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
2673 prefix=0)
2674 for control in self.controls:
2675 control._write_mime_data(mw)
2676 mw.lastpart()
2677 return self.action, data.getvalue(), http_hdrs
2678 else:
2679 raise ValueError(
2680 "unknown POST form encoding type '%s'" % self.enctype)
2681 else:
2682 raise ValueError("Unknown method '%s'" % method)
2684 def _switch_click(self, return_type):
2685 # This is called by HTMLForm and clickable Controls to hide switching
2686 # on return_type.
2687 # XXX
2688 # not OO
2689 # duplicated in IsindexControl._click
2690 if return_type == "pairs":
2691 return self._pairs()
2692 elif return_type == "request_data":
2693 return self._request_data()
2694 else:
2695 req_data = self._request_data()
2696 req = urllib2.Request(req_data[0], req_data[1])
2697 for key, val in req_data[2]:
2698 req.add_header(key, val)
2699 return req