1 """HTML form handling for web clients.
3 ClientForm is a Python module for handling HTML forms on the client
4 side, useful for parsing HTML forms, filling them in and returning the
5 completed forms to the server. It has developed from a port of Gisle
6 Aas' Perl module HTML::Form, from the libwww-perl library, but the
7 interface is not the same.
9 The most useful docstring is the one for HTMLForm.
12 RFC 1867: Form-based File Upload in HTML
13 RFC 2388: Returning Values from Forms: multipart/form-data
14 HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
15 HTML 4.01 Specification, W3C Recommendation 24 December 1999
18 Copyright 2002-2007 John J. Lee <jjl@pobox.com>
19 Copyright 2005 Gary Poster
20 Copyright 2005 Zope Corporation
21 Copyright 1998-2000 Gisle Aas.
23 This code is free software; you can redistribute it and/or modify it
24 under the terms of the BSD or ZPL 2.1 licenses (see the file
25 COPYING.txt included with the distribution).
30 # Remove parser testing hack
31 # safeUrl()-ize action
32 # Switch to unicode throughout (would be 0.3.x)
33 # See Wichert Akkerman's 2004-01-22 message to c.l.py.
34 # Add charset parameter to Content-type headers? How to find value??
35 # Add some more functional tests
36 # Especially single and multiple file upload on the internet.
37 # Does file upload work when name is missing? Sourceforge tracker form
38 # doesn't like it. Check standards, and test with Apache. Test
39 # binary upload with Apache.
40 # mailto submission & enctype text/plain
41 # I'm not going to fix this unless somebody tells me what real servers
42 # that want this encoding actually expect: If enctype is
43 # application/x-www-form-urlencoded and there's a FILE control present.
44 # Strictly, it should be 'name=data' (see HTML 4.01 spec., section
45 # 17.13.2), but I send "name=" ATM. What about multiple file upload??
47 # Would be nice, but I'm not going to do it myself:
48 # -------------------------------------------------
50 # Replace by_label etc. with moniker / selector concept. Allows, eg.,
51 # a choice between selection by value / id / label / element
52 # contents. Or choice between matching labels exactly or by
54 # Remove deprecated methods.
57 # XForms? Don't know if there's a need here.
59 __all__
= ['AmbiguityError', 'CheckboxControl', 'Control',
60 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
61 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
62 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
63 'ListControl', 'LocateError', 'Missing', 'NestingRobustFormParser',
64 'ParseError', 'ParseFile', 'ParseFileEx', 'ParseResponse',
65 'ParseResponseEx', 'PasswordControl', 'RadioControl',
66 'RobustFormParser', 'ScalarControl', 'SelectControl',
67 'SubmitButtonControl', 'SubmitControl', 'TextControl',
68 'TextareaControl', 'XHTMLCompatibleFormParser']
85 def debug(msg
, *args
, **kwds
):
88 _logger
= logging
.getLogger("ClientForm")
89 OPTIMIZATION_HACK
= True
91 def debug(msg
, *args
, **kwds
):
95 caller_name
= inspect
.stack()[1][3]
96 extended_msg
= '%%s %s' % msg
97 extended_args
= (caller_name
,)+args
98 debug
= _logger
.debug(extended_msg
, *extended_args
, **kwds
)
100 def _show_debug_messages():
101 global OPTIMIZATION_HACK
102 OPTIMIZATION_HACK
= False
103 _logger
.setLevel(logging
.DEBUG
)
104 handler
= logging
.StreamHandler(sys
.stdout
)
105 handler
.setLevel(logging
.DEBUG
)
106 _logger
.addHandler(handler
)
108 import sys
, urllib
, urllib2
, types
, mimetools
, copy
, urlparse
, \
109 htmlentitydefs
, re
, random
110 from cStringIO
import StringIO
113 # monkeypatch to fix http://www.python.org/sf/803422 :-(
114 sgmllib
.charref
= re
.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
116 # HTMLParser.HTMLParser is recent, so live without it if it's not available
117 # (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
121 HAVE_MODULE_HTMLPARSER
= False
123 HAVE_MODULE_HTMLPARSER
= True
128 def deprecation(message
, stack_offset
=0):
131 def deprecation(message
, stack_offset
=0):
132 warnings
.warn(message
, DeprecationWarning, stacklevel
=3+stack_offset
)
136 CHUNK
= 1024 # size of chunks fed to parser, in bytes
138 DEFAULT_ENCODING
= "latin-1"
142 _compress_re
= re
.compile(r
"\s+")
143 def compress_text(text
): return _compress_re
.sub(" ", text
.strip())
145 def normalize_line_endings(text
):
146 return re
.sub(r
"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text
)
149 # This version of urlencode is from my Python 1.5.2 back-port of the
150 # Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
151 # of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
152 def urlencode(query
,doseq
=False,):
153 """Encode a sequence of two-element tuples or dictionary into a URL query \
156 If any values in the query arg are sequences and doseq is true, each
157 sequence element is converted to a separate parameter.
159 If the query arg is a sequence of two-element tuples, the order of the
160 parameters in the output will match the order of parameters in the
164 if hasattr(query
,"items"):
166 query
= query
.items()
168 # it's a bother at times that strings and string-like objects are
171 # non-sequence items should not work with len()
173 # non-empty strings will fail this
174 if len(query
) and type(query
[0]) != types
.TupleType
:
176 # zero-length sequences of all types will get here and succeed,
177 # but that's a minor nit - since the original implementation
178 # allowed empty dicts that type of behavior probably should be
179 # preserved for consistency
181 ty
,va
,tb
= sys
.exc_info()
182 raise TypeError("not a valid non-string sequence or mapping "
187 # preserve old behavior
189 k
= urllib
.quote_plus(str(k
))
190 v
= urllib
.quote_plus(str(v
))
191 l
.append(k
+ '=' + v
)
194 k
= urllib
.quote_plus(str(k
))
195 if type(v
) == types
.StringType
:
196 v
= urllib
.quote_plus(v
)
197 l
.append(k
+ '=' + v
)
198 elif type(v
) == types
.UnicodeType
:
199 # is there a reasonable way to convert to ASCII?
200 # encode generates a string, but "replace" or "ignore"
201 # lose information and "strict" can raise UnicodeError
202 v
= urllib
.quote_plus(v
.encode("ASCII","replace"))
203 l
.append(k
+ '=' + v
)
206 # is this a sufficient test for sequence-ness?
210 v
= urllib
.quote_plus(str(v
))
211 l
.append(k
+ '=' + v
)
213 # loop over the sequence
215 l
.append(k
+ '=' + urllib
.quote_plus(str(elt
)))
218 def unescape(data
, entities
, encoding
=DEFAULT_ENCODING
):
219 if data
is None or "&" not in data
:
222 def replace_entities(match
, entities
=entities
, encoding
=encoding
):
225 return unescape_charref(ent
[2:-1], encoding
)
227 repl
= entities
.get(ent
)
229 if type(repl
) != type(""):
231 repl
= repl
.encode(encoding
)
239 return re
.sub(r
"&#?[A-Za-z0-9]+?;", replace_entities
, data
)
241 def unescape_charref(data
, encoding
):
242 name
, base
= data
, 10
243 if name
.startswith("x"):
244 name
, base
= name
[1:], 16
245 uc
= unichr(int(name
, base
))
250 repl
= uc
.encode(encoding
)
252 repl
= "&#%s;" % data
255 def get_entitydefs():
256 import htmlentitydefs
257 from codecs
import latin_1_decode
260 htmlentitydefs
.name2codepoint
261 except AttributeError:
263 for name
, char
in htmlentitydefs
.entitydefs
.items():
264 uc
= latin_1_decode(char
)[0]
265 if uc
.startswith("&#") and uc
.endswith(";"):
266 uc
= unescape_charref(uc
[2:-1], None)
267 entitydefs
["&%s;" % name
] = uc
269 for name
, codepoint
in htmlentitydefs
.name2codepoint
.items():
270 entitydefs
["&%s;" % name
] = unichr(codepoint
)
277 except (TypeError, KeyError):
289 def choose_boundary():
290 """Return a string usable as a multipart boundary."""
291 # follow IE and firefox
292 nonce
= "".join([str(random
.randint(0, sys
.maxint
-1)) for i
in 0,1,2])
293 return "-"*27 + nonce
295 # This cut-n-pasted MimeWriter from standard library is here so can add
296 # to HTTP headers rather than message body when appropriate. It also uses
297 # \r\n in place of \n. This is a bit nasty.
300 """Generic MIME writer.
312 A MIME writer is much more primitive than a MIME parser. It
313 doesn't seek around on the output file, and it doesn't use large
314 amounts of buffer space, so you have to write the parts in the
315 order they should occur on the output file. It does buffer the
316 headers you add, allowing you to rearrange their order.
320 f = <open the output file>
322 ...call w.addheader(key, value) 0 or more times...
326 f = w.startbody(content_type)
327 ...call f.write(data) for body data...
331 w.startmultipartbody(subtype)
333 subwriter = w.nextpart()
334 ...use the subwriter's methods to create the subpart...
337 The subwriter is another MimeWriter instance, and should be
338 treated in the same way as the toplevel MimeWriter. This way,
339 writing recursive body parts is easy.
341 Warning: don't forget to call lastpart()!
343 XXX There should be more state so calls made in the wrong order
348 - startbody() just returns the file passed to the constructor;
349 but don't use this knowledge, as it may be changed.
351 - startmultipartbody() actually returns a file as well;
352 this can be used to write the initial 'if you can read this your
353 mailer is not MIME-aware' message.
355 - If you call flushheaders(), the headers accumulated so far are
356 written out (and forgotten); this is useful if you don't need a
357 body part at all, e.g. for a subpart of type message/rfc822
358 that's (mis)used to store some header-like information.
360 - Passing a keyword argument 'prefix=<flag>' to addheader(),
361 start*body() affects where the header is inserted; 0 means
362 append at the end, 1 means insert at the start; default is
363 append for addheader(), but insert for start*body(), which use
364 it to determine where the Content-type header goes.
368 def __init__(self
, fp
, http_hdrs
=None):
369 self
._http
_hdrs
= http_hdrs
373 self
._first
_part
= True
375 def addheader(self
, key
, value
, prefix
=0,
378 prefix is ignored if add_to_http_hdrs is true.
380 lines
= value
.split("\r\n")
381 while lines
and not lines
[-1]: del lines
[-1]
382 while lines
and not lines
[0]: del lines
[0]
384 value
= "".join(lines
)
385 self
._http
_hdrs
.append((key
, value
))
387 for i
in range(1, len(lines
)):
388 lines
[i
] = " " + lines
[i
].strip()
389 value
= "\r\n".join(lines
) + "\r\n"
390 line
= key
+ ": " + value
392 self
._headers
.insert(0, line
)
394 self
._headers
.append(line
)
396 def flushheaders(self
):
397 self
._fp
.writelines(self
._headers
)
400 def startbody(self
, ctype
=None, plist
=[], prefix
=1,
401 add_to_http_hdrs
=0, content_type
=1):
403 prefix is ignored if add_to_http_hdrs is true.
405 if content_type
and ctype
:
406 for name
, value
in plist
:
407 ctype
= ctype
+ ';\r\n %s=%s' % (name
, value
)
408 self
.addheader("Content-type", ctype
, prefix
=prefix
,
409 add_to_http_hdrs
=add_to_http_hdrs
)
411 if not add_to_http_hdrs
: self
._fp
.write("\r\n")
412 self
._first
_part
= True
415 def startmultipartbody(self
, subtype
, boundary
=None, plist
=[], prefix
=1,
416 add_to_http_hdrs
=0, content_type
=1):
417 boundary
= boundary
or choose_boundary()
418 self
._boundary
.append(boundary
)
419 return self
.startbody("multipart/" + subtype
,
420 [("boundary", boundary
)] + plist
,
422 add_to_http_hdrs
=add_to_http_hdrs
,
423 content_type
=content_type
)
426 boundary
= self
._boundary
[-1]
428 self
._first
_part
= False
430 self
._fp
.write("\r\n")
431 self
._fp
.write("--" + boundary
+ "\r\n")
432 return self
.__class
__(self
._fp
)
437 boundary
= self
._boundary
.pop()
438 self
._fp
.write("\r\n--" + boundary
+ "--\r\n")
441 class LocateError(ValueError): pass
442 class AmbiguityError(LocateError
): pass
443 class ControlNotFoundError(LocateError
): pass
444 class ItemNotFoundError(LocateError
): pass
446 class ItemCountError(ValueError): pass
448 # for backwards compatibility, ParseError derives from exceptions that were
449 # raised by versions of ClientForm <= 0.2.5
450 if HAVE_MODULE_HTMLPARSER
:
451 SGMLLIB_PARSEERROR
= sgmllib
.SGMLParseError
452 class ParseError(sgmllib
.SGMLParseError
,
453 HTMLParser
.HTMLParseError
,
457 if hasattr(sgmllib
, "SGMLParseError"):
458 SGMLLIB_PARSEERROR
= sgmllib
.SGMLParseError
459 class ParseError(sgmllib
.SGMLParseError
):
462 SGMLLIB_PARSEERROR
= RuntimeError
463 class ParseError(RuntimeError):
467 class _AbstractFormParser
:
468 """forms attribute contains HTMLForm instances on completion."""
469 # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
470 def __init__(self
, entitydefs
=None, encoding
=DEFAULT_ENCODING
):
471 if entitydefs
is None:
472 entitydefs
= get_entitydefs()
473 self
._entitydefs
= entitydefs
474 self
._encoding
= encoding
479 self
._current
_label
= None
480 self
._current
_form
= None
482 self
._optgroup
= None
484 self
._textarea
= None
486 # forms[0] will contain all controls that are outside of any form
487 # self._global_form is an alias for self.forms[0]
488 self
._global
_form
= None
491 self
._current
_form
= self
._global
_form
= self
.forms
[0]
493 def do_base(self
, attrs
):
495 for key
, value
in attrs
:
497 self
.base
= self
.unescape_attr_if_required(value
)
501 if self
._current
_label
is not None:
503 if self
._current
_form
is not self
._global
_form
:
506 def start_form(self
, attrs
):
508 if self
._current
_form
is not self
._global
_form
:
509 raise ParseError("nested FORMs")
512 enctype
= "application/x-www-form-urlencoded"
515 for key
, value
in attrs
:
517 name
= self
.unescape_attr_if_required(value
)
518 elif key
== "action":
519 action
= self
.unescape_attr_if_required(value
)
520 elif key
== "method":
521 method
= self
.unescape_attr_if_required(value
.upper())
522 elif key
== "enctype":
523 enctype
= self
.unescape_attr_if_required(value
.lower())
524 d
[key
] = self
.unescape_attr_if_required(value
)
526 self
._current
_form
= (name
, action
, method
, enctype
), d
, controls
530 if self
._current
_label
is not None:
532 if self
._current
_form
is self
._global
_form
:
533 raise ParseError("end of FORM before start")
534 self
.forms
.append(self
._current
_form
)
535 self
._current
_form
= self
._global
_form
537 def start_select(self
, attrs
):
539 if self
._select
is not None:
540 raise ParseError("nested SELECTs")
541 if self
._textarea
is not None:
542 raise ParseError("SELECT inside TEXTAREA")
544 for key
, val
in attrs
:
545 d
[key
] = self
.unescape_attr_if_required(val
)
550 self
._append
_select
_control
({"__select": d
})
552 def end_select(self
):
554 if self
._select
is None:
555 raise ParseError("end of SELECT before start")
557 if self
._option
is not None:
562 def start_optgroup(self
, attrs
):
564 if self
._select
is None:
565 raise ParseError("OPTGROUP outside of SELECT")
567 for key
, val
in attrs
:
568 d
[key
] = self
.unescape_attr_if_required(val
)
572 def end_optgroup(self
):
574 if self
._optgroup
is None:
575 raise ParseError("end of OPTGROUP before start")
576 self
._optgroup
= None
578 def _start_option(self
, attrs
):
580 if self
._select
is None:
581 raise ParseError("OPTION outside of SELECT")
582 if self
._option
is not None:
586 for key
, val
in attrs
:
587 d
[key
] = self
.unescape_attr_if_required(val
)
590 self
._option
.update(d
)
591 if (self
._optgroup
and self
._optgroup
.has_key("disabled") and
592 not self
._option
.has_key("disabled")):
593 self
._option
["disabled"] = None
595 def _end_option(self
):
597 if self
._option
is None:
598 raise ParseError("end of OPTION before start")
600 contents
= self
._option
.get("contents", "").strip()
601 self
._option
["contents"] = contents
602 if not self
._option
.has_key("value"):
603 self
._option
["value"] = contents
604 if not self
._option
.has_key("label"):
605 self
._option
["label"] = contents
606 # stuff dict of SELECT HTML attrs into a special private key
607 # (gets deleted again later)
608 self
._option
["__select"] = self
._select
609 self
._append
_select
_control
(self
._option
)
612 def _append_select_control(self
, attrs
):
614 controls
= self
._current
_form
[2]
615 name
= self
._select
.get("name")
616 controls
.append(("select", name
, attrs
))
618 def start_textarea(self
, attrs
):
620 if self
._textarea
is not None:
621 raise ParseError("nested TEXTAREAs")
622 if self
._select
is not None:
623 raise ParseError("TEXTAREA inside SELECT")
625 for key
, val
in attrs
:
626 d
[key
] = self
.unescape_attr_if_required(val
)
631 def end_textarea(self
):
633 if self
._textarea
is None:
634 raise ParseError("end of TEXTAREA before start")
635 controls
= self
._current
_form
[2]
636 name
= self
._textarea
.get("name")
637 controls
.append(("textarea", name
, self
._textarea
))
638 self
._textarea
= None
640 def start_label(self
, attrs
):
642 if self
._current
_label
:
645 for key
, val
in attrs
:
646 d
[key
] = self
.unescape_attr_if_required(val
)
647 taken
= bool(d
.get("for")) # empty id is invalid
651 self
.labels
.append(d
)
652 self
._current
_label
= d
656 label
= self
._current
_label
658 # something is ugly in the HTML, but we're ignoring it
660 self
._current
_label
= None
661 label
["__text"] = label
["__text"]
662 # if it is staying around, it is True in all cases
665 def _add_label(self
, d
):
667 if self
._current
_label
is not None:
668 if self
._current
_label
["__taken"]:
669 self
.end_label() # be fuzzy
671 self
._current
_label
["__taken"] = True
672 d
["__label"] = self
._current
_label
674 def handle_data(self
, data
):
677 # according to http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1
678 # line break immediately after start tags or immediately before end
679 # tags must be ignored, but real browsers only ignore a line break
680 # after a start tag, so we'll do that.
681 if data
[0:2] == "\r\n":
683 if data
[0:1] in ["\n", "\r"]:
686 if self
._option
is not None:
687 # self._option is a dictionary of the OPTION element's HTML
688 # attributes, but it has two special keys, one of which is the
689 # special "contents" key contains text between OPTION tags (the
690 # other is the "__select" key: see the end_option method)
693 elif self
._textarea
is not None:
696 data
= normalize_line_endings(data
)
697 # not if within option or textarea
698 elif self
._current
_label
is not None:
699 map = self
._current
_label
704 if not map.has_key(key
):
707 map[key
] = map[key
] + data
709 def do_button(self
, attrs
):
712 d
["type"] = "submit" # default
713 for key
, val
in attrs
:
714 d
[key
] = self
.unescape_attr_if_required(val
)
715 controls
= self
._current
_form
[2]
719 # we don't want to lose information, so use a type string that
720 # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
721 # e.g. type for BUTTON/RESET is "resetbutton"
722 # (type for INPUT/RESET is "reset")
725 controls
.append((type, name
, d
))
727 def do_input(self
, attrs
):
730 d
["type"] = "text" # default
731 for key
, val
in attrs
:
732 d
[key
] = self
.unescape_attr_if_required(val
)
733 controls
= self
._current
_form
[2]
738 controls
.append((type, name
, d
))
740 def do_isindex(self
, attrs
):
743 for key
, val
in attrs
:
744 d
[key
] = self
.unescape_attr_if_required(val
)
745 controls
= self
._current
_form
[2]
748 # isindex doesn't have type or name HTML attributes
749 controls
.append(("isindex", None, d
))
751 def handle_entityref(self
, name
):
753 self
.handle_data(unescape(
754 '&%s;' % name
, self
._entitydefs
, self
._encoding
))
756 def handle_charref(self
, name
):
758 self
.handle_data(unescape_charref(name
, self
._encoding
))
760 def unescape_attr(self
, name
):
762 return unescape(name
, self
._entitydefs
, self
._encoding
)
764 def unescape_attrs(self
, attrs
):
767 for key
, val
in attrs
.items():
770 except AttributeError:
771 escaped_attrs
[key
] = self
.unescape_attr(val
)
773 # e.g. "__select" -- yuck!
774 escaped_attrs
[key
] = self
.unescape_attrs(val
)
777 def unknown_entityref(self
, ref
): self
.handle_data("&%s;" % ref
)
778 def unknown_charref(self
, ref
): self
.handle_data("&#%s;" % ref
)
781 if not HAVE_MODULE_HTMLPARSER
:
782 class XHTMLCompatibleFormParser
:
783 def __init__(self
, entitydefs
=None, encoding
=DEFAULT_ENCODING
):
784 raise ValueError("HTMLParser could not be imported")
786 class XHTMLCompatibleFormParser(_AbstractFormParser
, HTMLParser
.HTMLParser
):
787 """Good for XHTML, bad for tolerance of incorrect HTML."""
788 # thanks to Michael Howitz for this!
789 def __init__(self
, entitydefs
=None, encoding
=DEFAULT_ENCODING
):
790 HTMLParser
.HTMLParser
.__init
__(self
)
791 _AbstractFormParser
.__init
__(self
, entitydefs
, encoding
)
793 def feed(self
, data
):
795 HTMLParser
.HTMLParser
.feed(self
, data
)
796 except HTMLParser
.HTMLParseError
, exc
:
797 raise ParseError(exc
)
799 def start_option(self
, attrs
):
800 _AbstractFormParser
._start
_option
(self
, attrs
)
802 def end_option(self
):
803 _AbstractFormParser
._end
_option
(self
)
805 def handle_starttag(self
, tag
, attrs
):
807 method
= getattr(self
, "start_" + tag
)
808 except AttributeError:
810 method
= getattr(self
, "do_" + tag
)
811 except AttributeError:
818 def handle_endtag(self
, tag
):
820 method
= getattr(self
, "end_" + tag
)
821 except AttributeError:
826 def unescape(self
, name
):
827 # Use the entitydefs passed into constructor, not
828 # HTMLParser.HTMLParser's entitydefs.
829 return self
.unescape_attr(name
)
831 def unescape_attr_if_required(self
, name
):
832 return name
# HTMLParser.HTMLParser already did it
833 def unescape_attrs_if_required(self
, attrs
):
837 class _AbstractSgmllibParser(_AbstractFormParser
):
839 def do_option(self
, attrs
):
840 _AbstractFormParser
._start
_option
(self
, attrs
)
842 if sys
.version_info
[:2] >= (2,5):
843 # we override this attr to decode hex charrefs
844 entity_or_charref
= re
.compile(
845 '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
846 def convert_entityref(self
, name
):
847 return unescape("&%s;" % name
, self
._entitydefs
, self
._encoding
)
848 def convert_charref(self
, name
):
849 return unescape_charref("%s" % name
, self
._encoding
)
850 def unescape_attr_if_required(self
, name
):
851 return name
# sgmllib already did it
852 def unescape_attrs_if_required(self
, attrs
):
855 def unescape_attr_if_required(self
, name
):
856 return self
.unescape_attr(name
)
857 def unescape_attrs_if_required(self
, attrs
):
858 return self
.unescape_attrs(attrs
)
861 class FormParser(_AbstractSgmllibParser
, sgmllib
.SGMLParser
):
862 """Good for tolerance of incorrect HTML, bad for XHTML."""
863 def __init__(self
, entitydefs
=None, encoding
=DEFAULT_ENCODING
):
864 sgmllib
.SGMLParser
.__init
__(self
)
865 _AbstractFormParser
.__init
__(self
, entitydefs
, encoding
)
867 def feed(self
, data
):
869 sgmllib
.SGMLParser
.feed(self
, data
)
870 except SGMLLIB_PARSEERROR
, exc
:
871 raise ParseError(exc
)
875 # sigh, must support mechanize by allowing dynamic creation of classes based on
876 # its bundled copy of BeautifulSoup (which was necessary because of dependency
879 def _create_bs_classes(bs
,
882 class _AbstractBSFormParser(_AbstractSgmllibParser
):
884 def __init__(self
, entitydefs
=None, encoding
=DEFAULT_ENCODING
):
885 _AbstractFormParser
.__init
__(self
, entitydefs
, encoding
)
886 self
.bs_base_class
.__init
__(self
)
887 def handle_data(self
, data
):
888 _AbstractFormParser
.handle_data(self
, data
)
889 self
.bs_base_class
.handle_data(self
, data
)
890 def feed(self
, data
):
892 self
.bs_base_class
.feed(self
, data
)
893 except SGMLLIB_PARSEERROR
, exc
:
894 raise ParseError(exc
)
897 class RobustFormParser(_AbstractBSFormParser
, bs
):
898 """Tries to be highly tolerant of incorrect HTML."""
900 RobustFormParser
.bs_base_class
= bs
901 class NestingRobustFormParser(_AbstractBSFormParser
, icbinbs
):
902 """Tries to be highly tolerant of incorrect HTML.
904 Different from RobustFormParser in that it more often guesses nesting
905 above missing end tags (see BeautifulSoup docs).
909 NestingRobustFormParser
.bs_base_class
= icbinbs
911 return RobustFormParser
, NestingRobustFormParser
914 if sys
.version_info
[:2] < (2, 2):
915 raise ImportError # BeautifulSoup uses generators
920 RobustFormParser
, NestingRobustFormParser
= _create_bs_classes(
921 BeautifulSoup
.BeautifulSoup
, BeautifulSoup
.ICantBelieveItsBeautifulSoup
925 #FormParser = XHTMLCompatibleFormParser # testing hack
926 #FormParser = RobustFormParser # testing hack
929 def ParseResponseEx(response
,
930 select_default
=False,
931 form_parser_class
=FormParser
,
932 request_class
=urllib2
.Request
,
934 encoding
=DEFAULT_ENCODING
,
937 _urljoin
=urlparse
.urljoin
,
938 _urlparse
=urlparse
.urlparse
,
939 _urlunparse
=urlparse
.urlunparse
,
941 """Identical to ParseResponse, except that:
943 1. The returned list contains an extra item. The first form in the list
944 contains all controls not contained in any FORM element.
946 2. The arguments ignore_errors and backwards_compat have been removed.
948 3. Backwards-compatibility mode (backwards_compat=True) is not available.
950 return _ParseFileEx(response
, response
.geturl(),
960 _urlunparse
=_urlunparse
,
963 def ParseFileEx(file, base_uri
,
964 select_default
=False,
965 form_parser_class
=FormParser
,
966 request_class
=urllib2
.Request
,
968 encoding
=DEFAULT_ENCODING
,
971 _urljoin
=urlparse
.urljoin
,
972 _urlparse
=urlparse
.urlparse
,
973 _urlunparse
=urlparse
.urlunparse
,
975 """Identical to ParseFile, except that:
977 1. The returned list contains an extra item. The first form in the list
978 contains all controls not contained in any FORM element.
980 2. The arguments ignore_errors and backwards_compat have been removed.
982 3. Backwards-compatibility mode (backwards_compat=True) is not available.
984 return _ParseFileEx(file, base_uri
,
994 _urlunparse
=_urlunparse
,
997 def ParseResponse(response
, *args
, **kwds
):
998 """Parse HTTP response and return a list of HTMLForm instances.
1000 The return value of urllib2.urlopen can be conveniently passed to this
1001 function as the response parameter.
1003 ClientForm.ParseError is raised on parse errors.
1005 response: file-like object (supporting read() method) with a method
1006 geturl(), returning the URI of the HTTP response
1007 select_default: for multiple-selection SELECT controls and RADIO controls,
1008 pick the first item as the default if none are selected in the HTML
1009 form_parser_class: class to instantiate and use to pass
1010 request_class: class to return from .click() method (default is
1012 entitydefs: mapping like {"&": "&", ...} containing HTML entity
1013 definitions (a sensible default is used)
1014 encoding: character encoding used for encoding numeric character references
1015 when matching link text. ClientForm does not attempt to find the encoding
1016 in a META HTTP-EQUIV attribute in the document itself (mechanize, for
1017 example, does do that and will pass the correct value to ClientForm using
1020 backwards_compat: boolean that determines whether the returned HTMLForm
1021 objects are backwards-compatible with old code. If backwards_compat is
1024 - ClientForm 0.1 code will continue to work as before.
1026 - Label searches that do not specify a nr (number or count) will always
1027 get the first match, even if other controls match. If
1028 backwards_compat is False, label searches that have ambiguous results
1029 will raise an AmbiguityError.
1031 - Item label matching is done by strict string comparison rather than
1034 - De-selecting individual list items is allowed even if the Item is
1037 The backwards_compat argument will be deprecated in a future release.
1039 Pass a true value for select_default if you want the behaviour specified by
1040 RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
1041 RADIO or multiple-selection SELECT control if none were selected in the
1042 HTML. Most browsers (including Microsoft Internet Explorer (IE) and
1043 Netscape Navigator) instead leave all items unselected in these cases. The
1044 W3C HTML 4.0 standard leaves this behaviour undefined in the case of
1045 multiple-selection SELECT controls, but insists that at least one RADIO
1046 button should be checked at all times, in contradiction to browser
1049 There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses
1050 HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
1051 sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
1052 Note that HTMLParser is only available in Python 2.2 and later. You can
1053 pass your own class in here as a hack to work around bad HTML, but at your
1054 own risk: there is no well-defined interface.
1057 return _ParseFileEx(response
, response
.geturl(), *args
, **kwds
)[1:]
1059 def ParseFile(file, base_uri
, *args
, **kwds
):
1060 """Parse HTML and return a list of HTMLForm instances.
1062 ClientForm.ParseError is raised on parse errors.
1064 file: file-like object (supporting read() method) containing HTML with zero
1065 or more forms to be parsed
1066 base_uri: the URI of the document (note that the base URI used to submit
1067 the form will be that given in the BASE element if present, not that of
1070 For the other arguments and further details, see ParseResponse.__doc__.
1073 return _ParseFileEx(file, base_uri
, *args
, **kwds
)[1:]
1075 def _ParseFileEx(file, base_uri
,
1076 select_default
=False,
1077 ignore_errors
=False,
1078 form_parser_class
=FormParser
,
1079 request_class
=urllib2
.Request
,
1081 backwards_compat
=True,
1082 encoding
=DEFAULT_ENCODING
,
1083 _urljoin
=urlparse
.urljoin
,
1084 _urlparse
=urlparse
.urlparse
,
1085 _urlunparse
=urlparse
.urlunparse
,
1087 if backwards_compat
:
1088 deprecation("operating in backwards-compatibility mode", 1)
1089 fp
= form_parser_class(entitydefs
, encoding
)
1091 data
= file.read(CHUNK
)
1094 except ParseError
, e
:
1095 e
.base_uri
= base_uri
1097 if len(data
) != CHUNK
: break
1098 if fp
.base
is not None:
1099 # HTML BASE element takes precedence over document URI
1101 labels
= [] # Label(label) for label in fp.labels]
1105 labels
.append(label
)
1107 coll
= id_to_labels
.get(for_id
)
1109 id_to_labels
[for_id
] = [label
]
1113 for (name
, action
, method
, enctype
), attrs
, controls
in fp
.forms
:
1117 action
= _urljoin(base_uri
, action
)
1118 # would be nice to make HTMLForm class (form builder) pluggable
1120 action
, method
, enctype
, name
, attrs
, request_class
,
1121 forms
, labels
, id_to_labels
, backwards_compat
)
1122 form
._urlparse
= _urlparse
1123 form
._urlunparse
= _urlunparse
1124 for ii
in range(len(controls
)):
1125 type, name
, attrs
= controls
[ii
]
1126 # index=ii*10 allows ImageControl to return multiple ordered pairs
1128 type, name
, attrs
, select_default
=select_default
, index
=ii
*10)
1136 def __init__(self
, attrs
):
1137 self
.id = attrs
.get("for")
1138 self
._text
= attrs
.get("__text").strip()
1139 self
._ctext
= compress_text(self
._text
)
1141 self
._backwards
_compat
= False # maintained by HTMLForm
1143 def __getattr__(self
, name
):
1145 if self
._backwards
_compat
:
1149 return getattr(Label
, name
)
1151 def __setattr__(self
, name
, value
):
1153 # don't see any need for this, so make it read-only
1154 raise AttributeError("text attribute is read-only")
1155 self
.__dict
__[name
] = value
1158 return "<Label(id=%r, text=%r)>" % (self
.id, self
.text
)
1161 def _get_label(attrs
):
1162 text
= attrs
.get("__label")
1163 if text
is not None:
1169 """An HTML form control.
1171 An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm
1172 are accessed using the HTMLForm.find_control method or the
1173 HTMLForm.controls attribute.
1175 Control instances are usually constructed using the ParseFile /
1176 ParseResponse functions. If you use those functions, you can ignore the
1177 rest of this paragraph. A Control is only properly initialised after the
1178 fixup method has been called. In fact, this is only strictly necessary for
1179 ListControl instances. This is necessary because ListControls are built up
1180 from ListControls each containing only a single item, and their initial
1181 value(s) can only be known after the sequence is complete.
1183 The types and values that are acceptable for assignment to the value
1184 attribute are defined by subclasses.
1186 If the disabled attribute is true, this represents the state typically
1187 represented by browsers by 'greying out' a control. If the disabled
1188 attribute is true, the Control will raise AttributeError if an attempt is
1189 made to change its value. In addition, the control will not be considered
1190 'successful' as defined by the W3C HTML 4 standard -- ie. it will
1191 contribute no data to the return value of the HTMLForm.click* methods. To
1192 enable a control, set the disabled attribute to a false value.
1194 If the readonly attribute is true, the Control will raise AttributeError if
1195 an attempt is made to change its value. To make a control writable, set
1196 the readonly attribute to a false value.
1198 All controls have the disabled and readonly attributes, not only those that
1199 may have the HTML attributes of the same names.
1201 On assignment to the value attribute, the following exceptions are raised:
1202 TypeError, AttributeError (if the value attribute should not be assigned
1203 to, because the control is disabled, for example) and ValueError.
1205 If the name or value attributes are None, or the value is an empty list, or
1206 if the control is disabled, the control is not successful.
1210 type: string describing type of control (see the keys of the
1211 HTMLForm.type2class dictionary for the allowable values) (readonly)
1212 name: name of control (readonly)
1213 value: current value of control (subclasses may allow a single value, a
1214 sequence of values, or either)
1215 disabled: disabled state
1216 readonly: readonly state
1217 id: value of id HTML attribute
1220 def __init__(self
, type, name
, attrs
, index
=None):
1222 type: string describing type of control (see the keys of the
1223 HTMLForm.type2class dictionary for the allowable values)
1225 attrs: HTML attributes of control's HTML element
1228 raise NotImplementedError()
1230 def add_to_form(self
, form
):
1232 form
.controls
.append(self
)
1237 def is_of_kind(self
, kind
):
1238 raise NotImplementedError()
1241 raise NotImplementedError()
1243 def __getattr__(self
, name
): raise NotImplementedError()
1244 def __setattr__(self
, name
, value
): raise NotImplementedError()
1247 """Return list of (key, value) pairs suitable for passing to urlencode.
1249 return [(k
, v
) for (i
, k
, v
) in self
._totally
_ordered
_pairs
()]
1251 def _totally_ordered_pairs(self
):
1252 """Return list of (key, value, index) tuples.
1254 Like pairs, but allows preserving correct ordering even where several
1255 controls are involved.
1258 raise NotImplementedError()
1260 def _write_mime_data(self
, mw
, name
, value
):
1261 """Write data for a subitem of this control to a MimeWriter."""
1262 # called by HTMLForm
1264 mw2
.addheader("Content-disposition",
1265 'form-data; name="%s"' % name
, 1)
1266 f
= mw2
.startbody(prefix
=0)
1270 raise NotImplementedError()
1272 def get_labels(self
):
1273 """Return all labels (Label instances) for this control.
1275 If the control was surrounded by a <label> tag, that will be the first
1276 label; all other labels, connected by 'for' and 'id', are in the order
1277 that appear in the HTML.
1282 res
.append(self
._label
)
1284 res
.extend(self
._form
._id
_to
_labels
.get(self
.id, ()))
1288 #---------------------------------------------------
1289 class ScalarControl(Control
):
1290 """Control whose value is not restricted to one of a prescribed set.
1292 Some ScalarControls don't accept any value attribute. Otherwise, takes a
1293 single value, which must be string-like.
1295 Additional read-only public attribute:
1297 attrs: dictionary mapping the names of original HTML attributes of the
1298 control to their values
1301 def __init__(self
, type, name
, attrs
, index
=None):
1303 self
._label
= _get_label(attrs
)
1304 self
.__dict
__["type"] = type.lower()
1305 self
.__dict
__["name"] = name
1306 self
._value
= attrs
.get("value")
1307 self
.disabled
= attrs
.has_key("disabled")
1308 self
.readonly
= attrs
.has_key("readonly")
1309 self
.id = attrs
.get("id")
1311 self
.attrs
= attrs
.copy()
1313 self
._clicked
= False
1315 self
._urlparse
= urlparse
.urlparse
1316 self
._urlunparse
= urlparse
.urlunparse
1318 def __getattr__(self
, name
):
1320 return self
.__dict
__["_value"]
1322 raise AttributeError("%s instance has no attribute '%s'" %
1323 (self
.__class
__.__name
__, name
))
1325 def __setattr__(self
, name
, value
):
1327 if not isstringlike(value
):
1328 raise TypeError("must assign a string")
1330 raise AttributeError("control '%s' is readonly" % self
.name
)
1332 raise AttributeError("control '%s' is disabled" % self
.name
)
1333 self
.__dict
__["_value"] = value
1334 elif name
in ("name", "type"):
1335 raise AttributeError("%s attribute is readonly" % name
)
1337 self
.__dict
__[name
] = value
1339 def _totally_ordered_pairs(self
):
1342 if name
is None or value
is None or self
.disabled
:
1344 return [(self
._index
, name
, value
)]
1348 raise AttributeError("control '%s' is readonly" % self
.name
)
1349 self
.__dict
__["_value"] = None
1354 if name
is None: name
= "<None>"
1355 if value
is None: value
= "<None>"
1358 if self
.disabled
: infos
.append("disabled")
1359 if self
.readonly
: infos
.append("readonly")
1360 info
= ", ".join(infos
)
1361 if info
: info
= " (%s)" % info
1363 return "<%s(%s=%s)%s>" % (self
.__class
__.__name
__, name
, value
, info
)
1366 #---------------------------------------------------
1367 class TextControl(ScalarControl
):
1368 """Textual input control.
1378 def __init__(self
, type, name
, attrs
, index
=None):
1379 ScalarControl
.__init
__(self
, type, name
, attrs
, index
)
1380 if self
.type == "hidden": self
.readonly
= True
1381 if self
._value
is None:
1384 def is_of_kind(self
, kind
): return kind
== "text"
1386 #---------------------------------------------------
1387 class FileControl(ScalarControl
):
1388 """File upload with INPUT TYPE=FILE.
1390 The value attribute of a FileControl is always None. Use add_file instead.
1392 Additional public method: add_file
1396 def __init__(self
, type, name
, attrs
, index
=None):
1397 ScalarControl
.__init
__(self
, type, name
, attrs
, index
)
1399 self
._upload
_data
= []
1401 def is_of_kind(self
, kind
): return kind
== "file"
1405 raise AttributeError("control '%s' is readonly" % self
.name
)
1406 self
._upload
_data
= []
1408 def __setattr__(self
, name
, value
):
1409 if name
in ("value", "name", "type"):
1410 raise AttributeError("%s attribute is readonly" % name
)
1412 self
.__dict
__[name
] = value
1414 def add_file(self
, file_object
, content_type
=None, filename
=None):
1415 if not hasattr(file_object
, "read"):
1416 raise TypeError("file-like object must have read method")
1417 if content_type
is not None and not isstringlike(content_type
):
1418 raise TypeError("content type must be None or string-like")
1419 if filename
is not None and not isstringlike(filename
):
1420 raise TypeError("filename must be None or string-like")
1421 if content_type
is None:
1422 content_type
= "application/octet-stream"
1423 self
._upload
_data
.append((file_object
, content_type
, filename
))
1425 def _totally_ordered_pairs(self
):
1426 # XXX should it be successful even if unnamed?
1427 if self
.name
is None or self
.disabled
:
1429 return [(self
._index
, self
.name
, "")]
1431 def _write_mime_data(self
, mw
, _name
, _value
):
1432 # called by HTMLForm
1433 # assert _name == self.name and _value == ''
1434 if len(self
._upload
_data
) == 1:
1436 file_object
, content_type
, filename
= self
._upload
_data
[0]
1438 fn_part
= filename
and ('; filename="%s"' % filename
) or ""
1439 disp
= 'form-data; name="%s"%s' % (self
.name
, fn_part
)
1440 mw2
.addheader("Content-disposition", disp
, prefix
=1)
1441 fh
= mw2
.startbody(content_type
, prefix
=0)
1442 fh
.write(file_object
.read())
1443 elif len(self
._upload
_data
) != 0:
1446 disp
= 'form-data; name="%s"' % self
.name
1447 mw2
.addheader("Content-disposition", disp
, prefix
=1)
1448 fh
= mw2
.startmultipartbody("mixed", prefix
=0)
1449 for file_object
, content_type
, filename
in self
._upload
_data
:
1450 mw3
= mw2
.nextpart()
1451 fn_part
= filename
and ('; filename="%s"' % filename
) or ""
1452 disp
= "file%s" % fn_part
1453 mw3
.addheader("Content-disposition", disp
, prefix
=1)
1454 fh2
= mw3
.startbody(content_type
, prefix
=0)
1455 fh2
.write(file_object
.read())
1460 if name
is None: name
= "<None>"
1462 if not self
._upload
_data
:
1463 value
= "<No files added>"
1466 for file, ctype
, filename
in self
._upload
_data
:
1467 if filename
is None:
1468 value
.append("<Unnamed file>")
1470 value
.append(filename
)
1471 value
= ", ".join(value
)
1474 if self
.disabled
: info
.append("disabled")
1475 if self
.readonly
: info
.append("readonly")
1476 info
= ", ".join(info
)
1477 if info
: info
= " (%s)" % info
1479 return "<%s(%s=%s)%s>" % (self
.__class
__.__name
__, name
, value
, info
)
1482 #---------------------------------------------------
1483 class IsindexControl(ScalarControl
):
1486 ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
1487 part of regular HTML forms at all, and predates it. You're only allowed
1488 one ISINDEX per HTML document. ISINDEX and regular form submission are
1489 mutually exclusive -- either submit a form, or the ISINDEX.
1491 Having said this, since ISINDEX controls may appear in forms (which is
1492 probably bad HTML), ParseFile / ParseResponse will include them in the
1493 HTMLForm instances it returns. You can set the ISINDEX's value, as with
1494 any other control (but note that ISINDEX controls have no name, so you'll
1495 need to use the type argument of set_value!). When you submit the form,
1496 the ISINDEX will not be successful (ie., no data will get returned to the
1497 server as a result of its presence), unless you click on the ISINDEX
1498 control, in which case the ISINDEX gets submitted instead of the form:
1500 form.set_value("my isindex value", type="isindex")
1501 urllib2.urlopen(form.click(type="isindex"))
1503 ISINDEX elements outside of FORMs are ignored. If you want to submit one
1504 by hand, do it like so:
1506 url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
1507 result = urllib2.urlopen(url)
1510 def __init__(self
, type, name
, attrs
, index
=None):
1511 ScalarControl
.__init
__(self
, type, name
, attrs
, index
)
1512 if self
._value
is None:
1515 def is_of_kind(self
, kind
): return kind
in ["text", "clickable"]
1517 def _totally_ordered_pairs(self
):
1520 def _click(self
, form
, coord
, return_type
, request_class
=urllib2
.Request
):
1521 # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
1523 # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
1524 # deprecated in 4.01, but it should still say how to submit it).
1525 # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
1526 parts
= self
._urlparse
(form
.action
)
1527 rest
, (query
, frag
) = parts
[:-2], parts
[-2:]
1528 parts
= rest
+ (urllib
.quote_plus(self
.value
), None)
1529 url
= self
._urlunparse
(parts
)
1530 req_data
= url
, None, []
1532 if return_type
== "pairs":
1534 elif return_type
== "request_data":
1537 return request_class(url
)
1541 if value
is None: value
= "<None>"
1544 if self
.disabled
: infos
.append("disabled")
1545 if self
.readonly
: infos
.append("readonly")
1546 info
= ", ".join(infos
)
1547 if info
: info
= " (%s)" % info
1549 return "<%s(%s)%s>" % (self
.__class
__.__name
__, value
, info
)
1552 #---------------------------------------------------
1553 class IgnoreControl(ScalarControl
):
1554 """Control that we're not interested in.
1563 These controls are always unsuccessful, in the terminology of HTML 4 (ie.
1564 they never require any information to be returned to the server).
1566 BUTTON/BUTTON is used to generate events for script embedded in HTML.
1568 The value attribute of IgnoreControl is always None.
1571 def __init__(self
, type, name
, attrs
, index
=None):
1572 ScalarControl
.__init
__(self
, type, name
, attrs
, index
)
1575 def is_of_kind(self
, kind
): return False
1577 def __setattr__(self
, name
, value
):
1579 raise AttributeError(
1580 "control '%s' is ignored, hence read-only" % self
.name
)
1581 elif name
in ("name", "type"):
1582 raise AttributeError("%s attribute is readonly" % name
)
1584 self
.__dict
__[name
] = value
1587 #---------------------------------------------------
1590 # helpers and subsidiary classes
1593 def __init__(self
, control
, attrs
, index
=None):
1594 label
= _get_label(attrs
)
1595 self
.__dict
__.update({
1596 "name": attrs
["value"],
1597 "_labels": label
and [label
] or [],
1599 "_control": control
,
1600 "disabled": attrs
.has_key("disabled"),
1602 "id": attrs
.get("id"),
1605 control
.items
.append(self
)
1607 def get_labels(self
):
1608 """Return all labels (Label instances) for this item.
1610 For items that represent radio buttons or checkboxes, if the item was
1611 surrounded by a <label> tag, that will be the first label; all other
1612 labels, connected by 'for' and 'id', are in the order that appear in
1615 For items that represent select options, if the option had a label
1616 attribute, that will be the first label. If the option has contents
1617 (text within the option tags) and it is not the same as the label
1618 attribute (if any), that will be a label. There is nothing in the
1619 spec to my knowledge that makes an option with an id unable to be the
1620 target of a label's for attribute, so those are included, if any, for
1621 the sake of consistency and completeness.
1625 res
.extend(self
._labels
)
1627 res
.extend(self
._control
._form
._id
_to
_labels
.get(self
.id, ()))
1630 def __getattr__(self
, name
):
1631 if name
=="selected":
1632 return self
._selected
1633 raise AttributeError(name
)
1635 def __setattr__(self
, name
, value
):
1636 if name
== "selected":
1637 self
._control
._set
_selected
_state
(self
, value
)
1638 elif name
== "disabled":
1639 self
.__dict
__["disabled"] = bool(value
)
1641 raise AttributeError(name
)
1652 # XXX appending the attrs without distinguishing them from name and id
1654 attrs
= [("name", self
.name
), ("id", self
.id)]+self
.attrs
.items()
1655 return "<%s %s>" % (
1656 self
.__class
__.__name
__,
1657 " ".join(["%s=%r" % (k
, v
) for k
, v
in attrs
])
1660 def disambiguate(items
, nr
, **kwds
):
1662 for key
, value
in kwds
.items():
1663 msgs
.append("%s=%r" % (key
, value
))
1664 msg
= " ".join(msgs
)
1666 raise ItemNotFoundError(msg
)
1669 raise AmbiguityError(msg
)
1671 if len(items
) <= nr
:
1672 raise ItemNotFoundError(msg
)
1675 class ListControl(Control
):
1676 """Control representing a sequence of items.
1678 The value attribute of a ListControl represents the successful list items
1679 in the control. The successful list items are those that are selected and
1682 ListControl implements both list controls that take a length-1 value
1683 (single-selection) and those that take length >1 values
1684 (multiple-selection).
1686 ListControls accept sequence values only. Some controls only accept
1687 sequences of length 0 or 1 (RADIO, and single-selection SELECT).
1688 In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
1689 and multiple-selection SELECTs (those having the "multiple" HTML attribute)
1690 accept sequences of any length.
1692 Note the following mistake:
1694 control.value = some_value
1695 assert control.value == some_value # not necessarily true
1697 The reason for this is that the value attribute always gives the list items
1698 in the order they were listed in the HTML.
1700 ListControl items can also be referred to by their labels instead of names.
1701 Use the label argument to .get(), and the .set_value_by_label(),
1702 .get_value_by_label() methods.
1704 Note that, rather confusingly, though SELECT controls are represented in
1705 HTML by SELECT elements (which contain OPTION elements, representing
1706 individual list items), CHECKBOXes and RADIOs are not represented by *any*
1707 element. Instead, those controls are represented by a collection of INPUT
1708 elements. For example, this is a SELECT control, named "control1":
1710 <select name="control1">
1711 <option>foo</option>
1712 <option value="1">bar</option>
1715 and this is a CHECKBOX control, named "control2":
1717 <input type="checkbox" name="control2" value="foo" id="cbe1">
1718 <input type="checkbox" name="control2" value="bar" id="cbe2">
1720 The id attribute of a CHECKBOX or RADIO ListControl is always that of its
1721 first element (for example, "cbe1" above).
1724 Additional read-only public attribute: multiple.
1728 # ListControls are built up by the parser from their component items by
1729 # creating one ListControl per item, consolidating them into a single
1730 # master ListControl held by the HTMLForm:
1732 # -User calls form.new_control(...)
1733 # -Form creates Control, and calls control.add_to_form(self).
1734 # -Control looks for a Control with the same name and type in the form,
1735 # and if it finds one, merges itself with that control by calling
1736 # control.merge_control(self). The first Control added to the form, of
1737 # a particular name and type, is the only one that survives in the
1739 # -Form calls control.fixup for all its controls. ListControls in the
1740 # form know they can now safely pick their default values.
1742 # To create a ListControl without an HTMLForm, use:
1744 # control.merge_control(new_control)
1746 # (actually, it's much easier just to use ParseFile)
1750 def __init__(self
, type, name
, attrs
={}, select_default
=False,
1751 called_as_base_class
=False, index
=None):
1753 select_default: for RADIO and multiple-selection SELECT controls, pick
1754 the first item as the default if no 'selected' HTML attribute is
1758 if not called_as_base_class
:
1759 raise NotImplementedError()
1761 self
.__dict
__["type"] = type.lower()
1762 self
.__dict
__["name"] = name
1763 self
._value
= attrs
.get("value")
1764 self
.disabled
= False
1765 self
.readonly
= False
1766 self
.id = attrs
.get("id")
1767 self
._closed
= False
1769 # As Controls are merged in with .merge_control(), self.attrs will
1770 # refer to each Control in turn -- always the most recently merged
1771 # control. Each merged-in Control instance corresponds to a single
1772 # list item: see ListControl.__doc__.
1776 self
._select
_default
= select_default
1777 self
._clicked
= False
1782 def is_of_kind(self
, kind
):
1785 elif kind
== "multilist":
1786 return bool(self
.multiple
)
1787 elif kind
== "singlelist":
1788 return not self
.multiple
1792 def get_items(self
, name
=None, label
=None, id=None,
1793 exclude_disabled
=False):
1794 """Return matching items by name or label.
1796 For argument docs, see the docstring for .get()
1799 if name
is not None and not isstringlike(name
):
1800 raise TypeError("item name must be string-like")
1801 if label
is not None and not isstringlike(label
):
1802 raise TypeError("item label must be string-like")
1803 if id is not None and not isstringlike(id):
1804 raise TypeError("item id must be string-like")
1805 items
= [] # order is important
1806 compat
= self
._form
.backwards_compat
1807 for o
in self
.items
:
1808 if exclude_disabled
and o
.disabled
:
1810 if name
is not None and o
.name
!= name
:
1812 if label
is not None:
1813 for l
in o
.get_labels():
1814 if ((compat
and l
.text
== label
) or
1815 (not compat
and l
.text
.find(label
) > -1)):
1819 if id is not None and o
.id != id:
1824 def get(self
, name
=None, label
=None, id=None, nr
=None,
1825 exclude_disabled
=False):
1826 """Return item by name or label, disambiguating if necessary with nr.
1828 All arguments must be passed by name, with the exception of 'name',
1829 which may be used as a positional argument.
1831 If name is specified, then the item must have the indicated name.
1833 If label is specified, then the item must have a label whose
1834 whitespace-compressed, stripped, text substring-matches the indicated
1835 label string (eg. label="please choose" will match
1836 " Do please choose an item ").
1838 If id is specified, then the item must have the indicated id.
1840 nr is an optional 0-based index of the items matching the query.
1842 If nr is the default None value and more than item is found, raises
1843 AmbiguityError (unless the HTMLForm instance's backwards_compat
1846 If no item is found, or if items are found but nr is specified and not
1847 found, raises ItemNotFoundError.
1849 Optionally excludes disabled items.
1852 if nr
is None and self
._form
.backwards_compat
:
1854 items
= self
.get_items(name
, label
, id, exclude_disabled
)
1855 return disambiguate(items
, nr
, name
=name
, label
=label
, id=id)
1857 def _get(self
, name
, by_label
=False, nr
=None, exclude_disabled
=False):
1858 # strictly for use by deprecated methods
1860 name
, label
= None, name
1862 name
, label
= name
, None
1863 return self
.get(name
, label
, nr
, exclude_disabled
)
1865 def toggle(self
, name
, by_label
=False, nr
=None):
1866 """Deprecated: given a name or label and optional disambiguating index
1867 nr, toggle the matching item's selection.
1869 Selecting items follows the behavior described in the docstring of the
1872 if the item is disabled, or this control is disabled or readonly,
1873 raise AttributeError.
1877 "item = control.get(...); item.selected = not item.selected")
1878 o
= self
._get
(name
, by_label
, nr
)
1879 self
._set
_selected
_state
(o
, not o
.selected
)
1881 def set(self
, selected
, name
, by_label
=False, nr
=None):
1882 """Deprecated: given a name or label and optional disambiguating index
1883 nr, set the matching item's selection to the bool value of selected.
1885 Selecting items follows the behavior described in the docstring of the
1888 if the item is disabled, or this control is disabled or readonly,
1889 raise AttributeError.
1893 "control.get(...).selected = <boolean>")
1894 self
._set
_selected
_state
(self
._get
(name
, by_label
, nr
), selected
)
1896 def _set_selected_state(self
, item
, action
):
1901 raise AttributeError("control '%s' is disabled" % self
.name
)
1903 raise AttributeError("control '%s' is readonly" % self
.name
)
1904 action
== bool(action
)
1905 compat
= self
._form
.backwards_compat
1906 if not compat
and item
.disabled
:
1907 raise AttributeError("item is disabled")
1909 if compat
and item
.disabled
and action
:
1910 raise AttributeError("item is disabled")
1912 item
.__dict
__["_selected"] = action
1915 item
.__dict
__["_selected"] = False
1917 for o
in self
.items
:
1918 o
.__dict
__["_selected"] = False
1919 item
.__dict
__["_selected"] = True
1921 def toggle_single(self
, by_label
=None):
1922 """Deprecated: toggle the selection of the single item in this control.
1924 Raises ItemCountError if the control does not contain only one item.
1926 by_label argument is ignored, and included only for backwards
1931 "control.items[0].selected = not control.items[0].selected")
1932 if len(self
.items
) != 1:
1933 raise ItemCountError(
1934 "'%s' is not a single-item control" % self
.name
)
1935 item
= self
.items
[0]
1936 self
._set
_selected
_state
(item
, not item
.selected
)
1938 def set_single(self
, selected
, by_label
=None):
1939 """Deprecated: set the selection of the single item in this control.
1941 Raises ItemCountError if the control does not contain only one item.
1943 by_label argument is ignored, and included only for backwards
1948 "control.items[0].selected = <boolean>")
1949 if len(self
.items
) != 1:
1950 raise ItemCountError(
1951 "'%s' is not a single-item control" % self
.name
)
1952 self
._set
_selected
_state
(self
.items
[0], selected
)
1954 def get_item_disabled(self
, name
, by_label
=False, nr
=None):
1955 """Get disabled state of named list item in a ListControl."""
1957 "control.get(...).disabled")
1958 return self
._get
(name
, by_label
, nr
).disabled
1960 def set_item_disabled(self
, disabled
, name
, by_label
=False, nr
=None):
1961 """Set disabled state of named list item in a ListControl.
1963 disabled: boolean disabled state
1967 "control.get(...).disabled = <boolean>")
1968 self
._get
(name
, by_label
, nr
).disabled
= disabled
1970 def set_all_items_disabled(self
, disabled
):
1971 """Set disabled state of all list items in a ListControl.
1973 disabled: boolean disabled state
1976 for o
in self
.items
:
1977 o
.disabled
= disabled
1979 def get_item_attrs(self
, name
, by_label
=False, nr
=None):
1980 """Return dictionary of HTML attributes for a single ListControl item.
1982 The HTML element types that describe list items are: OPTION for SELECT
1983 controls, INPUT for the rest. These elements have HTML attributes that
1984 you may occasionally want to know about -- for example, the "alt" HTML
1985 attribute gives a text string describing the item (graphical browsers
1986 usually display this as a tooltip).
1988 The returned dictionary maps HTML attribute names to values. The names
1989 and values are taken from the original HTML.
1993 "control.get(...).attrs")
1994 return self
._get
(name
, by_label
, nr
).attrs
1996 def close_control(self
):
1999 def add_to_form(self
, form
):
2000 assert self
._form
is None or form
== self
._form
, (
2001 "can't add control to more than one form")
2003 if self
.name
is None:
2004 # always count nameless elements as separate controls
2005 Control
.add_to_form(self
, form
)
2007 for ii
in range(len(form
.controls
)-1, -1, -1):
2008 control
= form
.controls
[ii
]
2009 if control
.name
== self
.name
and control
.type == self
.type:
2011 Control
.add_to_form(self
, form
)
2013 control
.merge_control(self
)
2016 Control
.add_to_form(self
, form
)
2018 def merge_control(self
, control
):
2019 assert bool(control
.multiple
) == bool(self
.multiple
)
2020 # usually, isinstance(control, self.__class__)
2021 self
.items
.extend(control
.items
)
2025 ListControls are built up from component list items (which are also
2026 ListControls) during parsing. This method should be called after all
2027 items have been added. See ListControl.__doc__ for the reason this is
2031 # Need to set default selection where no item was indicated as being
2032 # selected by the HTML:
2035 # Nothing should be selected.
2036 # SELECT/single, SELECT/multiple and RADIO:
2037 # RFC 1866 (HTML 2.0): says first item should be selected.
2038 # W3C HTML 4.01 Specification: says that client behaviour is
2039 # undefined in this case. For RADIO, exactly one must be selected,
2040 # though which one is undefined.
2041 # Both Netscape and Microsoft Internet Explorer (IE) choose first
2042 # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
2043 # and Firebird 0.6) leave all items unselected for RADIO and
2046 # Since both Netscape and IE all choose the first item for
2047 # SELECT/single, we do the same. OTOH, both Netscape and IE
2048 # leave SELECT/multiple with nothing selected, in violation of RFC 1866
2049 # (but not in violation of the W3C HTML 4 standard); the same is true
2050 # of RADIO (which *is* in violation of the HTML 4 standard). We follow
2051 # RFC 1866 if the _select_default attribute is set, and Netscape and IE
2052 # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
2053 # can deselect all items in a RadioControl.
2055 for o
in self
.items
:
2056 # set items' controls to self, now that we've merged
2057 o
.__dict
__["_control"] = self
2059 def __getattr__(self
, name
):
2061 compat
= self
._form
.backwards_compat
2062 if self
.name
is None:
2064 return [o
.name
for o
in self
.items
if o
.selected
and
2065 (not o
.disabled
or compat
)]
2067 raise AttributeError("%s instance has no attribute '%s'" %
2068 (self
.__class
__.__name
__, name
))
2070 def __setattr__(self
, name
, value
):
2073 raise AttributeError("control '%s' is disabled" % self
.name
)
2075 raise AttributeError("control '%s' is readonly" % self
.name
)
2076 self
._set
_value
(value
)
2077 elif name
in ("name", "type", "multiple"):
2078 raise AttributeError("%s attribute is readonly" % name
)
2080 self
.__dict
__[name
] = value
2082 def _set_value(self
, value
):
2083 if value
is None or isstringlike(value
):
2084 raise TypeError("ListControl, must set a sequence")
2086 compat
= self
._form
.backwards_compat
2087 for o
in self
.items
:
2088 if not o
.disabled
or compat
:
2091 self
._multiple
_set
_value
(value
)
2092 elif len(value
) > 1:
2093 raise ItemCountError(
2094 "single selection list, must set sequence of "
2097 self
._single
_set
_value
(value
)
2099 def _get_items(self
, name
, target
=1):
2100 all_items
= self
.get_items(name
)
2101 items
= [o
for o
in all_items
if not o
.disabled
]
2102 if len(items
) < target
:
2103 if len(all_items
) < target
:
2104 raise ItemNotFoundError(
2105 "insufficient items with name %r" % name
)
2107 raise AttributeError(
2108 "insufficient non-disabled items with name %s" % name
)
2118 def _single_set_value(self
, value
):
2119 assert len(value
) == 1
2120 on
, off
= self
._get
_items
(value
[0])
2123 off
[0].selected
= True
2125 def _multiple_set_value(self
, value
):
2126 compat
= self
._form
.backwards_compat
2127 turn_on
= [] # transactional-ish
2128 turn_off
= [item
for item
in self
.items
if
2129 item
.selected
and (not item
.disabled
or compat
)]
2132 if nn
in names
.keys():
2136 for name
, count
in names
.items():
2137 on
, off
= self
._get
_items
(name
, count
)
2138 for i
in range(count
):
2142 del turn_off
[turn_off
.index(item
)]
2146 turn_on
.append(item
)
2147 for item
in turn_off
:
2148 item
.selected
= False
2149 for item
in turn_on
:
2150 item
.selected
= True
2152 def set_value_by_label(self
, value
):
2153 """Set the value of control by item labels.
2155 value is expected to be an iterable of strings that are substrings of
2156 the item labels that should be selected. Before substring matching is
2157 performed, the original label text is whitespace-compressed
2158 (consecutive whitespace characters are converted to a single space
2159 character) and leading and trailing whitespace is stripped. Ambiguous
2160 labels are accepted without complaint if the form's backwards_compat is
2161 True; otherwise, it will not complain as long as all ambiguous labels
2162 share the same item name (e.g. OPTION value).
2165 if isstringlike(value
):
2166 raise TypeError(value
)
2167 if not self
.multiple
and len(value
) > 1:
2168 raise ItemCountError(
2169 "single selection list, must set sequence of "
2173 found
= self
.get_items(label
=nn
)
2175 if not self
._form
.backwards_compat
:
2176 # ambiguous labels are fine as long as item names (e.g.
2177 # OPTION values) are same
2178 opt_name
= found
[0].name
2179 if [o
for o
in found
[1:] if o
.name
!= opt_name
]:
2180 raise AmbiguityError(nn
)
2182 # OK, we'll guess :-( Assume first available item.
2185 # For the multiple-item case, we could try to be smarter,
2186 # saving them up and trying to resolve, but that's too much.
2187 if self
._form
.backwards_compat
or o
not in items
:
2190 else: # all of them are used
2191 raise ItemNotFoundError(nn
)
2192 # now we have all the items that should be on
2193 # let's just turn everything off and then back on.
2198 def get_value_by_label(self
):
2199 """Return the value of the control as given by normalized labels."""
2201 compat
= self
._form
.backwards_compat
2202 for o
in self
.items
:
2203 if (not o
.disabled
or compat
) and o
.selected
:
2204 for l
in o
.get_labels():
2212 def possible_items(self
, by_label
=False):
2213 """Deprecated: return the names or labels of all possible items.
2215 Includes disabled items, which may be misleading for some use cases.
2219 "[item.name for item in self.items]")
2222 for o
in self
.items
:
2223 for l
in o
.get_labels():
2230 return [o
.name
for o
in self
.items
]
2232 def _totally_ordered_pairs(self
):
2233 if self
.disabled
or self
.name
is None:
2236 return [(o
._index
, self
.name
, o
.name
) for o
in self
.items
2237 if o
.selected
and not o
.disabled
]
2241 if name
is None: name
= "<None>"
2243 display
= [str(o
) for o
in self
.items
]
2246 if self
.disabled
: infos
.append("disabled")
2247 if self
.readonly
: infos
.append("readonly")
2248 info
= ", ".join(infos
)
2249 if info
: info
= " (%s)" % info
2251 return "<%s(%s=[%s])%s>" % (self
.__class
__.__name
__,
2252 name
, ", ".join(display
), info
)
2255 class RadioControl(ListControl
):
2262 def __init__(self
, type, name
, attrs
, select_default
=False, index
=None):
2263 attrs
.setdefault("value", "on")
2264 ListControl
.__init
__(self
, type, name
, attrs
, select_default
,
2265 called_as_base_class
=True, index
=index
)
2266 self
.__dict
__["multiple"] = False
2267 o
= Item(self
, attrs
, index
)
2268 o
.__dict
__["_selected"] = attrs
.has_key("checked")
2271 ListControl
.fixup(self
)
2272 found
= [o
for o
in self
.items
if o
.selected
and not o
.disabled
]
2274 if self
._select
_default
:
2275 for o
in self
.items
:
2280 # Ensure only one item selected. Choose the last one,
2281 # following IE and Firefox.
2282 for o
in found
[:-1]:
2285 def get_labels(self
):
2288 class CheckboxControl(ListControl
):
2295 def __init__(self
, type, name
, attrs
, select_default
=False, index
=None):
2296 attrs
.setdefault("value", "on")
2297 ListControl
.__init
__(self
, type, name
, attrs
, select_default
,
2298 called_as_base_class
=True, index
=index
)
2299 self
.__dict
__["multiple"] = True
2300 o
= Item(self
, attrs
, index
)
2301 o
.__dict
__["_selected"] = attrs
.has_key("checked")
2303 def get_labels(self
):
2307 class SelectControl(ListControl
):
2314 OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
2316 SELECT control values and labels are subject to some messy defaulting
2317 rules. For example, if the HTML representation of the control is:
2320 <OPTION value=0 label="2002">current year</OPTION>
2321 <OPTION value=1>2001</OPTION>
2322 <OPTION>2000</OPTION>
2325 The items, in order, have labels "2002", "2001" and "2000", whereas their
2326 names (the OPTION values) are "0", "1" and "2000" respectively. Note that
2327 the value of the last OPTION in this example defaults to its contents, as
2328 specified by RFC 1866, as do the labels of the second and third OPTIONs.
2330 The OPTION labels are sometimes more meaningful than the OPTION values,
2331 which can make for more maintainable code.
2333 Additional read-only public attribute: attrs
2335 The attrs attribute is a dictionary of the original HTML attributes of the
2336 SELECT element. Other ListControls do not have this attribute, because in
2337 other cases the control as a whole does not correspond to any single HTML
2338 element. control.get(...).attrs may be used as usual to get at the HTML
2339 attributes of the HTML elements corresponding to individual list items (for
2340 SELECT controls, these are OPTION elements).
2342 Another special case is that the Item.attrs dictionaries have a special key
2343 "contents" which does not correspond to any real HTML attribute, but rather
2344 contains the contents of the OPTION element:
2346 <OPTION>this bit</OPTION>
2349 # HTML attributes here are treated slightly differently from other list
2351 # -The SELECT HTML attributes dictionary is stuffed into the OPTION
2352 # HTML attributes dictionary under the "__select" key.
2353 # -The content of each OPTION element is stored under the special
2354 # "contents" key of the dictionary.
2355 # After all this, the dictionary is passed to the SelectControl constructor
2356 # as the attrs argument, as usual. However:
2357 # -The first SelectControl constructed when building up a SELECT control
2358 # has a constructor attrs argument containing only the __select key -- so
2359 # this SelectControl represents an empty SELECT control.
2360 # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
2361 # the __select dictionary containing the SELECT HTML-attributes.
2363 def __init__(self
, type, name
, attrs
, select_default
=False, index
=None):
2364 # fish out the SELECT HTML attributes from the OPTION HTML attributes
2366 self
.attrs
= attrs
["__select"].copy()
2367 self
.__dict
__["_label"] = _get_label(self
.attrs
)
2368 self
.__dict
__["id"] = self
.attrs
.get("id")
2369 self
.__dict
__["multiple"] = self
.attrs
.has_key("multiple")
2370 # the majority of the contents, label, and value dance already happened
2371 contents
= attrs
.get("contents")
2372 attrs
= attrs
.copy()
2373 del attrs
["__select"]
2375 ListControl
.__init
__(self
, type, name
, self
.attrs
, select_default
,
2376 called_as_base_class
=True, index
=index
)
2377 self
.disabled
= self
.attrs
.has_key("disabled")
2378 self
.readonly
= self
.attrs
.has_key("readonly")
2379 if attrs
.has_key("value"):
2380 # otherwise it is a marker 'select started' token
2381 o
= Item(self
, attrs
, index
)
2382 o
.__dict
__["_selected"] = attrs
.has_key("selected")
2383 # add 'label' label and contents label, if different. If both are
2384 # provided, the 'label' label is used for display in HTML
2385 # 4.0-compliant browsers (and any lower spec? not sure) while the
2386 # contents are used for display in older or less-compliant
2387 # browsers. We make label objects for both, if the values are
2389 label
= attrs
.get("label")
2391 o
._labels
.append(Label({"__text": label
}))
2392 if contents
and contents
!= label
:
2393 o
._labels
.append(Label({"__text": contents
}))
2395 o
._labels
.append(Label({"__text": contents
}))
2398 ListControl
.fixup(self
)
2399 # Firefox doesn't exclude disabled items from those considered here
2400 # (i.e. from 'found', for both branches of the if below). Note that
2401 # IE6 doesn't support the disabled attribute on OPTIONs at all.
2402 found
= [o
for o
in self
.items
if o
.selected
]
2404 if not self
.multiple
or self
._select
_default
:
2405 for o
in self
.items
:
2407 was_disabled
= self
.disabled
2408 self
.disabled
= False
2412 o
.disabled
= was_disabled
2414 elif not self
.multiple
:
2415 # Ensure only one item selected. Choose the last one,
2416 # following IE and Firefox.
2417 for o
in found
[:-1]:
2421 #---------------------------------------------------
2422 class SubmitControl(ScalarControl
):
2430 def __init__(self
, type, name
, attrs
, index
=None):
2431 ScalarControl
.__init
__(self
, type, name
, attrs
, index
)
2432 # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
2433 # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
2435 if self
.value
is None: self
.value
= ""
2436 self
.readonly
= True
2438 def get_labels(self
):
2441 res
.append(Label({"__text": self
.value
}))
2442 res
.extend(ScalarControl
.get_labels(self
))
2445 def is_of_kind(self
, kind
): return kind
== "clickable"
2447 def _click(self
, form
, coord
, return_type
, request_class
=urllib2
.Request
):
2448 self
._clicked
= coord
2449 r
= form
._switch
_click
(return_type
, request_class
)
2450 self
._clicked
= False
2453 def _totally_ordered_pairs(self
):
2454 if not self
._clicked
:
2456 return ScalarControl
._totally
_ordered
_pairs
(self
)
2459 #---------------------------------------------------
2460 class ImageControl(SubmitControl
):
2466 Coordinates are specified using one of the HTMLForm.click* methods.
2469 def __init__(self
, type, name
, attrs
, index
=None):
2470 SubmitControl
.__init
__(self
, type, name
, attrs
, index
)
2471 self
.readonly
= False
2473 def _totally_ordered_pairs(self
):
2474 clicked
= self
._clicked
2475 if self
.disabled
or not clicked
:
2478 if name
is None: return []
2480 (self
._index
, "%s.x" % name
, str(clicked
[0])),
2481 (self
._index
+1, "%s.y" % name
, str(clicked
[1])),
2485 pairs
.append((self
._index
+2, name
, value
))
2488 get_labels
= ScalarControl
.get_labels
2490 # aliases, just to make str(control) and str(form) clearer
2491 class PasswordControl(TextControl
): pass
2492 class HiddenControl(TextControl
): pass
2493 class TextareaControl(TextControl
): pass
2494 class SubmitButtonControl(SubmitControl
): pass
2497 def is_listcontrol(control
): return control
.is_of_kind("list")
2501 """Represents a single HTML <form> ... </form> element.
2503 A form consists of a sequence of controls that usually have names, and
2504 which can take on various values. The values of the various types of
2505 controls represent variously: text, zero-or-one-of-many or many-of-many
2506 choices, and files to be uploaded. Some controls can be clicked on to
2507 submit the form, and clickable controls' values sometimes include the
2508 coordinates of the click.
2510 Forms can be filled in with data to be returned to the server, and then
2511 submitted, using the click method to generate a request object suitable for
2512 passing to urllib2.urlopen (or the click_request_data or click_pairs
2513 methods if you're not using urllib2).
2516 forms = ClientForm.ParseFile(html, base_uri)
2519 form["query"] = "Python"
2520 form.find_control("nr_results").get("lots").selected = True
2522 response = urllib2.urlopen(form.click())
2524 Usually, HTMLForm instances are not created directly. Instead, the
2525 ParseFile or ParseResponse factory functions are used. If you do construct
2526 HTMLForm objects yourself, however, note that an HTMLForm instance is only
2527 properly initialised after the fixup method has been called (ParseFile and
2528 ParseResponse do this for you). See ListControl.__doc__ for the reason
2531 Indexing a form (form["control_name"]) returns the named Control's value
2532 attribute. Assignment to a form index (form["control_name"] = something)
2533 is equivalent to assignment to the named Control's value attribute. If you
2534 need to be more specific than just supplying the control's name, use the
2535 set_value and get_value methods.
2537 ListControl values are lists of item names (specifically, the names of the
2538 items that are selected and not disabled, and hence are "successful" -- ie.
2539 cause data to be returned to the server). The list item's name is the
2540 value of the corresponding HTML element's"value" attribute.
2544 <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
2545 <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
2547 defines a CHECKBOX control with name "cheeses" which has two items, named
2548 "leicester" and "cheddar".
2552 <SELECT name="more_cheeses">
2554 <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
2557 defines a SELECT control with name "more_cheeses" which has two items,
2558 named "1" and "2" (because the OPTION element's value HTML attribute
2559 defaults to the element contents -- see SelectControl.__doc__ for more on
2560 these defaulting rules).
2562 To select, deselect or otherwise manipulate individual list items, use the
2563 HTMLForm.find_control() and ListControl.get() methods. To set the whole
2564 value, do as for any other control: use indexing or the set_/get_value
2569 # select *only* the item named "cheddar"
2570 form["cheeses"] = ["cheddar"]
2571 # select "cheddar", leave other items unaffected
2572 form.find_control("cheeses").get("cheddar").selected = True
2574 Some controls (RADIO and SELECT without the multiple attribute) can only
2575 have zero or one items selected at a time. Some controls (CHECKBOX and
2576 SELECT with the multiple attribute) can have multiple items selected at a
2577 time. To set the whole value of a ListControl, assign a sequence to a form
2580 form["cheeses"] = ["cheddar", "leicester"]
2582 If the ListControl is not multiple-selection, the assigned list must be of
2585 To check if a control has an item, if an item is selected, or if an item is
2586 successful (selected and not disabled), respectively:
2588 "cheddar" in [item.name for item in form.find_control("cheeses").items]
2589 "cheddar" in [item.name for item in form.find_control("cheeses").items and
2591 "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
2593 Note that some list items may be disabled (see below).
2595 Note the following mistake:
2597 form[control_name] = control_value
2598 assert form[control_name] == control_value # not necessarily true
2600 The reason for this is that form[control_name] always gives the list items
2601 in the order they were listed in the HTML.
2603 List items (hence list values, too) can be referred to in terms of list
2604 item labels rather than list item names using the appropriate label
2605 arguments. Note that each item may have several labels.
2607 The question of default values of OPTION contents, labels and values is
2608 somewhat complicated: see SelectControl.__doc__ and
2609 ListControl.get_item_attrs.__doc__ if you think you need to know.
2611 Controls can be disabled or readonly. In either case, the control's value
2612 cannot be changed until you clear those flags (see example below).
2613 Disabled is the state typically represented by browsers by 'greying out' a
2614 control. Disabled controls are not 'successful' -- they don't cause data
2615 to get returned to the server. Readonly controls usually appear in
2616 browsers as read-only text boxes. Readonly controls are successful. List
2617 items can also be disabled. Attempts to select or deselect disabled items
2618 fail with AttributeError.
2620 If a lot of controls are readonly, it can be useful to do this:
2622 form.set_all_readonly(False)
2624 To clear a control's value attribute, so that it is not successful (until a
2625 value is subsequently set):
2627 form.clear("cheeses")
2631 control = form.find_control("cheeses")
2632 control.disabled = False
2633 control.readonly = False
2634 control.get("gruyere").disabled = True
2635 control.items[0].selected = True
2637 See the various Control classes for further documentation. Many methods
2638 take name, type, kind, id, label and nr arguments to specify the control to
2639 be operated on: see HTMLForm.find_control.__doc__.
2641 ControlNotFoundError (subclass of ValueError) is raised if the specified
2642 control can't be found. This includes occasions where a non-ListControl
2643 is found, but the method (set, for example) requires a ListControl.
2644 ItemNotFoundError (subclass of ValueError) is raised if a list item can't
2645 be found. ItemCountError (subclass of ValueError) is raised if an attempt
2646 is made to select more than one item and the control doesn't allow that, or
2647 set/get_single are called and the control contains more than one item.
2648 AttributeError is raised if a control or item is readonly or disabled and
2649 an attempt is made to alter its value.
2651 Security note: Remember that any passwords you store in HTMLForm instances
2652 will be saved to disk in the clear if you pickle them (directly or
2653 indirectly). The simplest solution to this is to avoid pickling HTMLForm
2654 objects. You could also pickle before filling in any password, or just set
2655 the password to "" before pickling.
2660 action: full (absolute URI) form action
2661 method: "GET" or "POST"
2662 enctype: form transfer encoding MIME type
2663 name: name of form (None if no name was specified)
2664 attrs: dictionary mapping original HTML form attributes to their values
2666 controls: list of Control instances; do not alter this list
2667 (instead, call form.new_control to make a Control and add it to the
2668 form, or control.add_to_form if you already have a Control instance)
2672 Methods for form filling:
2673 -------------------------
2675 Most of the these methods have very similar arguments. See
2676 HTMLForm.find_control.__doc__ for details of the name, type, kind, label
2679 def find_control(self,
2680 name=None, type=None, kind=None, id=None, predicate=None,
2681 nr=None, label=None)
2683 get_value(name=None, type=None, kind=None, id=None, nr=None,
2684 by_label=False, # by_label is deprecated
2687 name=None, type=None, kind=None, id=None, nr=None,
2688 by_label=False, # by_label is deprecated
2692 clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
2694 set_all_readonly(readonly)
2697 Method applying only to FileControls:
2699 add_file(file_object,
2700 content_type="application/octet-stream", filename=None,
2701 name=None, id=None, nr=None, label=None)
2704 Methods applying only to clickable controls:
2706 click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
2707 click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
2709 click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
2714 "text": TextControl
,
2715 "password": PasswordControl
,
2716 "hidden": HiddenControl
,
2717 "textarea": TextareaControl
,
2719 "isindex": IsindexControl
,
2721 "file": FileControl
,
2723 "button": IgnoreControl
,
2724 "buttonbutton": IgnoreControl
,
2725 "reset": IgnoreControl
,
2726 "resetbutton": IgnoreControl
,
2728 "submit": SubmitControl
,
2729 "submitbutton": SubmitButtonControl
,
2730 "image": ImageControl
,
2732 "radio": RadioControl
,
2733 "checkbox": CheckboxControl
,
2734 "select": SelectControl
,
2737 #---------------------------------------------------
2738 # Initialisation. Use ParseResponse / ParseFile instead.
2740 def __init__(self
, action
, method
="GET",
2741 enctype
="application/x-www-form-urlencoded",
2742 name
=None, attrs
=None,
2743 request_class
=urllib2
.Request
,
2744 forms
=None, labels
=None, id_to_labels
=None,
2745 backwards_compat
=True):
2747 In the usual case, use ParseResponse (or ParseFile) to create new
2750 action: full (absolute URI) form action
2751 method: "GET" or "POST"
2752 enctype: form transfer encoding MIME type
2754 attrs: dictionary mapping original HTML form attributes to their values
2757 self
.action
= action
2758 self
.method
= method
2759 self
.enctype
= enctype
2761 if attrs
is not None:
2762 self
.attrs
= attrs
.copy()
2766 self
._request
_class
= request_class
2768 # these attributes are used by zope.testbrowser
2769 self
._forms
= forms
# this is a semi-public API!
2770 self
._labels
= labels
# this is a semi-public API!
2771 self
._id
_to
_labels
= id_to_labels
# this is a semi-public API!
2773 self
.backwards_compat
= backwards_compat
# note __setattr__
2775 self
._urlunparse
= urlparse
.urlunparse
2776 self
._urlparse
= urlparse
.urlparse
2778 def __getattr__(self
, name
):
2779 if name
== "backwards_compat":
2780 return self
._backwards
_compat
2781 return getattr(HTMLForm
, name
)
2783 def __setattr__(self
, name
, value
):
2785 if name
== "backwards_compat":
2786 name
= "_backwards_compat"
2788 for cc
in self
.controls
:
2791 except AttributeError:
2795 for ll
in ii
.get_labels():
2796 ll
._backwards
_compat
= value
2797 self
.__dict
__[name
] = value
2799 def new_control(self
, type, name
, attrs
,
2800 ignore_unknown
=False, select_default
=False, index
=None):
2801 """Adds a new control to the form.
2803 This is usually called by ParseFile and ParseResponse. Don't call it
2804 youself unless you're building your own Control instances.
2806 Note that controls representing lists of items are built up from
2807 controls holding only a single list item. See ListControl.__doc__ for
2808 further information.
2810 type: type of control (see Control.__doc__ for a list)
2811 attrs: HTML attributes of control
2812 ignore_unknown: if true, use a dummy Control instance for controls of
2813 unknown type; otherwise, use a TextControl
2814 select_default: for RADIO and multiple-selection SELECT controls, pick
2815 the first item as the default if no 'selected' HTML attribute is
2816 present (this defaulting happens when the HTMLForm.fixup method is
2818 index: index of corresponding element in HTML (see
2819 MoreFormTests.test_interspersed_controls for motivation)
2823 klass
= self
.type2class
.get(type)
2826 klass
= IgnoreControl
2831 if issubclass(klass
, ListControl
):
2832 control
= klass(type, name
, a
, select_default
, index
)
2834 control
= klass(type, name
, a
, index
)
2836 if type == "select" and len(attrs
) == 1:
2837 for ii
in range(len(self
.controls
)-1, -1, -1):
2838 ctl
= self
.controls
[ii
]
2839 if ctl
.type == "select":
2843 control
.add_to_form(self
)
2844 control
._urlparse
= self
._urlparse
2845 control
._urlunparse
= self
._urlunparse
2848 """Normalise form after all controls have been added.
2850 This is usually called by ParseFile and ParseResponse. Don't call it
2851 youself unless you're building your own Control instances.
2853 This method should only be called once, after all controls have been
2857 for control
in self
.controls
:
2859 self
.backwards_compat
= self
._backwards
_compat
2861 #---------------------------------------------------
2863 header
= "%s%s %s %s" % (
2864 (self
.name
and self
.name
+" " or ""),
2865 self
.method
, self
.action
, self
.enctype
)
2867 for control
in self
.controls
:
2868 rep
.append(" %s" % str(control
))
2869 return "<%s>" % "\n".join(rep
)
2871 #---------------------------------------------------
2872 # Form-filling methods.
2874 def __getitem__(self
, name
):
2875 return self
.find_control(name
).value
2876 def __contains__(self
, name
):
2877 return bool(self
.find_control(name
))
2878 def __setitem__(self
, name
, value
):
2879 control
= self
.find_control(name
)
2881 control
.value
= value
2882 except AttributeError, e
:
2883 raise ValueError(str(e
))
2886 name
=None, type=None, kind
=None, id=None, nr
=None,
2887 by_label
=False, # by_label is deprecated
2889 """Return value of control.
2891 If only name and value arguments are supplied, equivalent to
2897 deprecation("form.get_value_by_label(...)")
2898 c
= self
.find_control(name
, type, kind
, id, label
=label
, nr
=nr
)
2901 meth
= c
.get_value_by_label
2902 except AttributeError:
2903 raise NotImplementedError(
2904 "control '%s' does not yet support by_label" % c
.name
)
2909 def set_value(self
, value
,
2910 name
=None, type=None, kind
=None, id=None, nr
=None,
2911 by_label
=False, # by_label is deprecated
2913 """Set value of control.
2915 If only name and value arguments are supplied, equivalent to
2921 deprecation("form.get_value_by_label(...)")
2922 c
= self
.find_control(name
, type, kind
, id, label
=label
, nr
=nr
)
2925 meth
= c
.set_value_by_label
2926 except AttributeError:
2927 raise NotImplementedError(
2928 "control '%s' does not yet support by_label" % c
.name
)
2933 def get_value_by_label(
2934 self
, name
=None, type=None, kind
=None, id=None, label
=None, nr
=None):
2937 All arguments should be passed by name.
2940 c
= self
.find_control(name
, type, kind
, id, label
=label
, nr
=nr
)
2941 return c
.get_value_by_label()
2943 def set_value_by_label(
2945 name
=None, type=None, kind
=None, id=None, label
=None, nr
=None):
2948 All arguments should be passed by name.
2951 c
= self
.find_control(name
, type, kind
, id, label
=label
, nr
=nr
)
2952 c
.set_value_by_label(value
)
2954 def set_all_readonly(self
, readonly
):
2955 for control
in self
.controls
:
2956 control
.readonly
= bool(readonly
)
2958 def clear_all(self
):
2959 """Clear the value attributes of all controls in the form.
2961 See HTMLForm.clear.__doc__.
2964 for control
in self
.controls
:
2968 name
=None, type=None, kind
=None, id=None, nr
=None, label
=None):
2969 """Clear the value attribute of a control.
2971 As a result, the affected control will not be successful until a value
2972 is subsequently set. AttributeError is raised on readonly controls.
2975 c
= self
.find_control(name
, type, kind
, id, label
=label
, nr
=nr
)
2979 #---------------------------------------------------
2980 # Form-filling methods applying only to ListControls.
2982 def possible_items(self
, # deprecated
2983 name
=None, type=None, kind
=None, id=None,
2984 nr
=None, by_label
=False, label
=None):
2985 """Return a list of all values that the specified control can take."""
2986 c
= self
._find
_list
_control
(name
, type, kind
, id, label
, nr
)
2987 return c
.possible_items(by_label
)
2989 def set(self
, selected
, item_name
, # deprecated
2990 name
=None, type=None, kind
=None, id=None, nr
=None,
2991 by_label
=False, label
=None):
2992 """Select / deselect named list item.
2994 selected: boolean selected state
2997 self
._find
_list
_control
(name
, type, kind
, id, label
, nr
).set(
2998 selected
, item_name
, by_label
)
2999 def toggle(self
, item_name
, # deprecated
3000 name
=None, type=None, kind
=None, id=None, nr
=None,
3001 by_label
=False, label
=None):
3002 """Toggle selected state of named list item."""
3003 self
._find
_list
_control
(name
, type, kind
, id, label
, nr
).toggle(
3004 item_name
, by_label
)
3006 def set_single(self
, selected
, # deprecated
3007 name
=None, type=None, kind
=None, id=None,
3008 nr
=None, by_label
=None, label
=None):
3009 """Select / deselect list item in a control having only one item.
3011 If the control has multiple list items, ItemCountError is raised.
3013 This is just a convenience method, so you don't need to know the item's
3014 name -- the item name in these single-item controls is usually
3015 something meaningless like "1" or "on".
3017 For example, if a checkbox has a single item named "on", the following
3018 two calls are equivalent:
3020 control.toggle("on")
3021 control.toggle_single()
3023 """ # by_label ignored and deprecated
3024 self
._find
_list
_control
(
3025 name
, type, kind
, id, label
, nr
).set_single(selected
)
3026 def toggle_single(self
, name
=None, type=None, kind
=None, id=None,
3027 nr
=None, by_label
=None, label
=None): # deprecated
3028 """Toggle selected state of list item in control having only one item.
3030 The rest is as for HTMLForm.set_single.__doc__.
3032 """ # by_label ignored and deprecated
3033 self
._find
_list
_control
(name
, type, kind
, id, label
, nr
).toggle_single()
3035 #---------------------------------------------------
3036 # Form-filling method applying only to FileControls.
3038 def add_file(self
, file_object
, content_type
=None, filename
=None,
3039 name
=None, id=None, nr
=None, label
=None):
3040 """Add a file to be uploaded.
3042 file_object: file-like object (with read method) from which to read
3044 content_type: MIME content type of data to upload
3045 filename: filename to pass to server
3047 If filename is None, no filename is sent to the server.
3049 If content_type is None, the content type is guessed based on the
3050 filename and the data from read from the file object.
3053 At the moment, guessed content type is always application/octet-stream.
3054 Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
3057 Note the following useful HTML attributes of file upload controls (see
3058 HTML 4.01 spec, section 17):
3060 accept: comma-separated list of content types that the server will
3061 handle correctly; you can use this to filter out non-conforming files
3062 size: XXX IIRC, this is indicative of whether form wants multiple or
3064 maxlength: XXX hint of max content length in bytes?
3067 self
.find_control(name
, "file", id=id, label
=label
, nr
=nr
).add_file(
3068 file_object
, content_type
, filename
)
3070 #---------------------------------------------------
3071 # Form submission methods, applying only to clickable controls.
3073 def click(self
, name
=None, type=None, id=None, nr
=0, coord
=(1,1),
3074 request_class
=urllib2
.Request
,
3076 """Return request that would result from clicking on a control.
3078 The request object is a urllib2.Request instance, which you can pass to
3079 urllib2.urlopen (or ClientCookie.urlopen).
3081 Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
3082 IMAGEs) can be clicked.
3084 Will click on the first clickable control, subject to the name, type
3085 and nr arguments (as for find_control). If no name, type, id or number
3086 is specified and there are no clickable controls, a request will be
3087 returned for the form in its current, un-clicked, state.
3089 IndexError is raised if any of name, type, id or nr is specified but no
3090 matching control is found. ValueError is raised if the HTMLForm has an
3091 enctype attribute that is not recognised.
3093 You can optionally specify a coordinate to click at, which only makes a
3094 difference if you clicked on an image.
3097 return self
._click
(name
, type, id, label
, nr
, coord
, "request",
3098 self
._request
_class
)
3100 def click_request_data(self
,
3101 name
=None, type=None, id=None,
3103 request_class
=urllib2
.Request
,
3105 """As for click method, but return a tuple (url, data, headers).
3107 You can use this data to send a request to the server. This is useful
3108 if you're using httplib or urllib rather than urllib2. Otherwise, use
3111 # Untested. Have to subclass to add headers, I think -- so use urllib2
3114 url, data, hdrs = form.click_request_data()
3115 r = urllib.urlopen(url, data)
3117 # Untested. I don't know of any reason to use httplib -- you can get
3118 # just as much control with urllib2.
3119 import httplib, urlparse
3120 url, data, hdrs = form.click_request_data()
3122 host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
3123 conn = httplib.HTTPConnection(host)
3125 httplib.request("POST", path, data, hdrs)
3127 httplib.request("GET", path, headers=hdrs)
3128 r = conn.getresponse()
3131 return self
._click
(name
, type, id, label
, nr
, coord
, "request_data",
3132 self
._request
_class
)
3134 def click_pairs(self
, name
=None, type=None, id=None,
3137 """As for click_request_data, but returns a list of (key, value) pairs.
3139 You can use this list as an argument to ClientForm.urlencode. This is
3140 usually only useful if you're using httplib or urllib rather than
3141 urllib2 or ClientCookie. It may also be useful if you want to manually
3142 tweak the keys and/or values, but this should not be necessary.
3143 Otherwise, use the click method.
3145 Note that this method is only useful for forms of MIME type
3146 x-www-form-urlencoded. In particular, it does not return the
3147 information required for file upload. If you need file upload and are
3148 not using urllib2, use click_request_data.
3150 Also note that Python 2.0's urllib.urlencode is slightly broken: it
3151 only accepts a mapping, not a sequence of pairs, as an argument. This
3152 messes up any ordering in the argument. Use ClientForm.urlencode
3156 return self
._click
(name
, type, id, label
, nr
, coord
, "pairs",
3157 self
._request
_class
)
3159 #---------------------------------------------------
3161 def find_control(self
,
3162 name
=None, type=None, kind
=None, id=None,
3163 predicate
=None, nr
=None,
3165 """Locate and return some specific control within the form.
3167 At least one of the name, type, kind, predicate and nr arguments must
3168 be supplied. If no matching control is found, ControlNotFoundError is
3171 If name is specified, then the control must have the indicated name.
3173 If type is specified then the control must have the specified type (in
3174 addition to the types possible for <input> HTML tags: "text",
3175 "password", "hidden", "submit", "image", "button", "radio", "checkbox",
3176 "file" we also have "reset", "buttonbutton", "submitbutton",
3177 "resetbutton", "textarea", "select" and "isindex").
3179 If kind is specified, then the control must fall into the specified
3180 group, each of which satisfies a particular interface. The types are
3181 "text", "list", "multilist", "singlelist", "clickable" and "file".
3183 If id is specified, then the control must have the indicated id.
3185 If predicate is specified, then the control must match that function.
3186 The predicate function is passed the control as its single argument,
3187 and should return a boolean value indicating whether the control
3190 nr, if supplied, is the sequence number of the control (where 0 is the
3191 first). Note that control 0 is the first control matching all the
3192 other arguments (if supplied); it is not necessarily the first control
3193 in the form. If no nr is supplied, AmbiguityError is raised if
3194 multiple controls match the other arguments (unless the
3195 .backwards-compat attribute is true).
3197 If label is specified, then the control must have this label. Note
3198 that radio controls and checkboxes never have labels: their items do.
3201 if ((name
is None) and (type is None) and (kind
is None) and
3202 (id is None) and (label
is None) and (predicate
is None) and
3205 "at least one argument must be supplied to specify control")
3206 return self
._find
_control
(name
, type, kind
, id, label
, predicate
, nr
)
3208 #---------------------------------------------------
3211 def _find_list_control(self
,
3212 name
=None, type=None, kind
=None, id=None,
3213 label
=None, nr
=None):
3214 if ((name
is None) and (type is None) and (kind
is None) and
3215 (id is None) and (label
is None) and (nr
is None)):
3217 "at least one argument must be supplied to specify control")
3219 return self
._find
_control
(name
, type, kind
, id, label
,
3222 def _find_control(self
, name
, type, kind
, id, label
, predicate
, nr
):
3223 if ((name
is not None) and (name
is not Missing
) and
3224 not isstringlike(name
)):
3225 raise TypeError("control name must be string-like")
3226 if (type is not None) and not isstringlike(type):
3227 raise TypeError("control type must be string-like")
3228 if (kind
is not None) and not isstringlike(kind
):
3229 raise TypeError("control kind must be string-like")
3230 if (id is not None) and not isstringlike(id):
3231 raise TypeError("control id must be string-like")
3232 if (label
is not None) and not isstringlike(label
):
3233 raise TypeError("control label must be string-like")
3234 if (predicate
is not None) and not callable(predicate
):
3235 raise TypeError("control predicate must be callable")
3236 if (nr
is not None) and nr
< 0:
3237 raise ValueError("control number must be a positive integer")
3242 if nr
is None and self
.backwards_compat
:
3245 for control
in self
.controls
:
3246 if ((name
is not None and name
!= control
.name
) and
3247 (name
is not Missing
or control
.name
is not None)):
3249 if type is not None and type != control
.type:
3251 if kind
is not None and not control
.is_of_kind(kind
):
3253 if id is not None and id != control
.id:
3255 if predicate
and not predicate(control
):
3258 for l
in control
.get_labels():
3259 if l
.text
.find(label
) > -1:
3265 return control
# early exit: unambiguous due to nr
3273 if found
and not ambiguous
:
3277 if name
is not None: description
.append("name %s" % repr(name
))
3278 if type is not None: description
.append("type '%s'" % type)
3279 if kind
is not None: description
.append("kind '%s'" % kind
)
3280 if id is not None: description
.append("id '%s'" % id)
3281 if label
is not None: description
.append("label '%s'" % label
)
3282 if predicate
is not None:
3283 description
.append("predicate %s" % predicate
)
3284 if orig_nr
: description
.append("nr %d" % orig_nr
)
3285 description
= ", ".join(description
)
3288 raise AmbiguityError("more than one control matching "+description
)
3290 raise ControlNotFoundError("no control matching "+description
)
3293 def _click(self
, name
, type, id, label
, nr
, coord
, return_type
,
3294 request_class
=urllib2
.Request
):
3296 control
= self
._find
_control
(
3297 name
, type, "clickable", id, label
, None, nr
)
3298 except ControlNotFoundError
:
3299 if ((name
is not None) or (type is not None) or (id is not None) or
3302 # no clickable controls, but no control was explicitly requested,
3303 # so return state without clicking any control
3304 return self
._switch
_click
(return_type
, request_class
)
3306 return control
._click
(self
, coord
, return_type
, request_class
)
3309 """Return sequence of (key, value) pairs suitable for urlencoding."""
3310 return [(k
, v
) for (i
, k
, v
, c_i
) in self
._pairs
_and
_controls
()]
3313 def _pairs_and_controls(self
):
3314 """Return sequence of (index, key, value, control_index)
3315 of totally ordered pairs suitable for urlencoding.
3317 control_index is the index of the control in self.controls
3320 for control_index
in range(len(self
.controls
)):
3321 control
= self
.controls
[control_index
]
3322 for ii
, key
, val
in control
._totally
_ordered
_pairs
():
3323 pairs
.append((ii
, key
, val
, control_index
))
3325 # stable sort by ONLY first item in tuple
3330 def _request_data(self
):
3331 """Return a tuple (url, data, headers)."""
3332 method
= self
.method
.upper()
3333 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
3334 parts
= self
._urlparse
(self
.action
)
3335 rest
, (query
, frag
) = parts
[:-2], parts
[-2:]
3338 if self
.enctype
!= "application/x-www-form-urlencoded":
3340 "unknown GET form encoding type '%s'" % self
.enctype
)
3341 parts
= rest
+ (urlencode(self
._pairs
()), None)
3342 uri
= self
._urlunparse
(parts
)
3343 return uri
, None, []
3344 elif method
== "POST":
3345 parts
= rest
+ (query
, None)
3346 uri
= self
._urlunparse
(parts
)
3347 if self
.enctype
== "application/x-www-form-urlencoded":
3348 return (uri
, urlencode(self
._pairs
()),
3349 [("Content-type", self
.enctype
)])
3350 elif self
.enctype
== "multipart/form-data":
3353 mw
= MimeWriter(data
, http_hdrs
)
3354 f
= mw
.startmultipartbody("form-data", add_to_http_hdrs
=True,
3356 for ii
, k
, v
, control_index
in self
._pairs
_and
_controls
():
3357 self
.controls
[control_index
]._write
_mime
_data
(mw
, k
, v
)
3359 return uri
, data
.getvalue(), http_hdrs
3362 "unknown POST form encoding type '%s'" % self
.enctype
)
3364 raise ValueError("Unknown method '%s'" % method
)
3366 def _switch_click(self
, return_type
, request_class
=urllib2
.Request
):
3367 # This is called by HTMLForm and clickable Controls to hide switching
3369 if return_type
== "pairs":
3370 return self
._pairs
()
3371 elif return_type
== "request_data":
3372 return self
._request
_data
()
3374 req_data
= self
._request
_data
()
3375 req
= request_class(req_data
[0], req_data
[1])
3376 for key
, val
in req_data
[2]:
3377 add_hdr
= req
.add_header
3378 if key
.lower() == "content-type":
3380 add_hdr
= req
.add_unredirected_header
3381 except AttributeError:
3382 # pre-2.4 and not using ClientCookie