1 """HTML form handling for web clients.
3 ClientForm is a Python module for handling HTML forms on the client
4 side, useful for parsing HTML forms, filling them in and returning the
5 completed forms to the server. It has developed from a port of Gisle
6 Aas' Perl module HTML::Form, from the libwww-perl library, but the
7 interface is not the same.
9 The most useful docstring is the one for HTMLForm.
12 RFC 1867: Form-based File Upload in HTML
13 RFC 2388: Returning Values from Forms: multipart/form-data
14 HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
15 HTML 4.01 Specification, W3C Recommendation 24 December 1999
18 Copyright 2002-2003 John J. Lee <jjl@pobox.com>
19 Copyright 1998-2000 Gisle Aas.
21 This code is free software; you can redistribute it and/or modify it
22 under the terms of the BSD License (see the file COPYING included with
28 # Treat unknown controls as text controls? (this was a recent LWP
29 # HTML::Form change) I guess this is INPUT with no TYPE? Check LWP
30 # source and browser behaviour.
31 # Support for list item ids. How to handle missing ids? (How do I deal
32 # with duplicate OPTION labels ATM? Can't remember...)
33 # Arrange things so can automatically PyPI-register with categories
34 # without messing up 1.5.2 compatibility.
36 # Test single and multiple file upload some more on the web.
37 # Does file upload work when name is missing? Sourceforge tracker form
38 # doesn't like it. Check standards, and test with Apache. Test binary
40 # Add label support for CHECKBOX and RADIO.
42 # Deal with character sets properly. Not sure what the issues are here.
43 # I don't *think* any encoding of control names, filenames or data is
44 # necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
45 # doesn't seem to do it.
46 # Add charset parameter to Content-type headers? How to find value??
47 # Get rid of MapBase, AList and MimeWriter.
48 # I'm not going to fix this unless somebody tells me what real servers
49 # that want this encoding actually expect: If enctype is
50 # application/x-www-form-urlencoded and there's a FILE control present.
51 # Strictly, it should be 'name=data' (see HTML 4.01 spec., section
52 # 17.13.2), but I send "name=" ATM. What about multiple file upload??
53 # Get rid of the two type-switches (for kind and click*).
54 # Remove single-selection code: can be special case of multi-selection,
55 # with a few variations, I think.
56 # Factor out multiple-selection list code? May not be easy. Maybe like
61 # | MultipleListControlMixin
71 # Maybe a 0.2.x, cleaned up a bit and with id support for list items?
72 # Not sure it's worth it, really.
73 # Remove toggle methods.
74 # Replace by_label with choice between value / id / label /
75 # element contents (see discussion with Gisle about labels on
79 # XForms? Don't know if there's a need here.
93 import sys
, urllib
, urllib2
, types
, string
, mimetools
, copy
94 from urlparse
import urljoin
95 from cStringIO
import StringIO
105 CHUNK
= 1024 # size of chunks fed to parser, in bytes
107 # This version of urlencode is from my Python 1.5.2 back-port of the
108 # Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
109 # of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
110 def urlencode(query
,doseq
=False,):
111 """Encode a sequence of two-element tuples or dictionary into a URL query \
114 If any values in the query arg are sequences and doseq is true, each
115 sequence element is converted to a separate parameter.
117 If the query arg is a sequence of two-element tuples, the order of the
118 parameters in the output will match the order of parameters in the
122 if hasattr(query
,"items"):
124 query
= query
.items()
126 # it's a bother at times that strings and string-like objects are
129 # non-sequence items should not work with len()
131 # non-empty strings will fail this
132 if len(query
) and type(query
[0]) != types
.TupleType
:
134 # zero-length sequences of all types will get here and succeed,
135 # but that's a minor nit - since the original implementation
136 # allowed empty dicts that type of behavior probably should be
137 # preserved for consistency
139 ty
,va
,tb
= sys
.exc_info()
140 raise TypeError("not a valid non-string sequence or mapping "
145 # preserve old behavior
147 k
= urllib
.quote_plus(str(k
))
148 v
= urllib
.quote_plus(str(v
))
149 l
.append(k
+ '=' + v
)
152 k
= urllib
.quote_plus(str(k
))
153 if type(v
) == types
.StringType
:
154 v
= urllib
.quote_plus(v
)
155 l
.append(k
+ '=' + v
)
156 elif UNICODE
and type(v
) == types
.UnicodeType
:
157 # is there a reasonable way to convert to ASCII?
158 # encode generates a string, but "replace" or "ignore"
159 # lose information and "strict" can raise UnicodeError
160 v
= urllib
.quote_plus(v
.encode("ASCII","replace"))
161 l
.append(k
+ '=' + v
)
164 # is this a sufficient test for sequence-ness?
168 v
= urllib
.quote_plus(str(v
))
169 l
.append(k
+ '=' + v
)
171 # loop over the sequence
173 l
.append(k
+ '=' + urllib
.quote_plus(str(elt
)))
174 return string
.join(l
, '&')
176 def startswith(string
, initial
):
177 if len(initial
) > len(string
): return False
178 return string
[:len(initial
)] == initial
183 except (TypeError, KeyError):
195 # XXX don't really want to drag this along (MapBase, AList, MimeWriter)
198 """Mapping designed to be easily derived from.
200 Subclass it and override __init__, __setitem__, __getitem__, __delitem__
201 and keys. Nothing else should need to be overridden, unlike UserDict.
202 This significantly simplifies dictionary-like classes.
204 Also different from UserDict in that it has a redonly flag, and can be
205 updated (and initialised) with a sequence of pairs (key, value).
208 def __init__(self
, init
=None):
210 self
.readonly
= False
211 if init
is not None: self
.update(init
)
213 def __getitem__(self
, key
):
214 return self
._data
[key
]
216 def __setitem__(self
, key
, item
):
217 if not self
.readonly
:
218 self
._data
[key
] = item
220 raise TypeError("object doesn't support item assignment")
222 def __delitem__(self
, key
):
223 if not self
.readonly
:
226 raise TypeError("object doesn't support item deletion")
229 return self
._data
.keys()
231 # now the internal workings, there should be no need to override these:
234 for k
in self
.keys():
239 for k
, v
in self
.items():
240 rep
.append("%s: %s" % (repr(k
), repr(v
)))
241 return self
.__class
__.__name
__+"{"+(string
.join(rep
, ", "))+"}"
244 return copy
.copy(self
)
246 def __cmp__(self
, dict):
247 # note: return value is *not* boolean
248 for k
, v
in self
.items():
249 if not (dict.has_key(k
) and dict[k
] == v
):
254 return len(self
.keys())
258 for k
in self
.keys():
267 r
.append((keys
[i
], vals
[i
]))
270 def has_key(self
, key
):
271 return key
in self
.keys()
273 def update(self
, map):
274 if issequence(map) and not isstringlike(map):
279 if not isinstance(tup
, TupleType
):
281 "MapBase.update requires a map or a sequence of pairs")
285 def get(self
, key
, failobj
=None):
286 if key
in self
.keys():
291 def setdefault(self
, key
, failobj
=None):
292 if not self
.has_key(key
):
297 class AList(MapBase
):
298 """Read-only ordered mapping."""
299 def __init__(self
, seq
=[]):
301 self
._inverted
= False
302 self
._data
= list(seq
[:])
305 for key
, value
in seq
:
306 self
._keys
.append(key
)
307 self
._values
.append(value
)
309 def set_inverted(self
, inverted
):
310 if (inverted
and not self
._inverted
) or (
311 not inverted
and self
._inverted
):
312 self
._keys
, self
._values
= self
._values
, self
._keys
313 if inverted
: self
._inverted
= True
314 else: self
._inverted
= False
316 def __getitem__(self
, key
):
318 i
= self
._keys
.index(key
)
321 return self
._values
[i
]
323 def __delitem__(self
, key
):
325 i
= self
._keys
.index
[key
]
330 def keys(self
): return list(self
._keys
[:])
331 def values(self
): return list(self
._values
[:])
334 if not self
._inverted
:
339 newdata
.append((v
, k
))
343 # This cut-n-pasted MimeWriter from standard library is here so can add
344 # to HTTP headers rather than message body when appropriate. It also uses
345 # \r\n in place of \n. This is nasty.
348 """Generic MIME writer.
360 A MIME writer is much more primitive than a MIME parser. It
361 doesn't seek around on the output file, and it doesn't use large
362 amounts of buffer space, so you have to write the parts in the
363 order they should occur on the output file. It does buffer the
364 headers you add, allowing you to rearrange their order.
368 f = <open the output file>
370 ...call w.addheader(key, value) 0 or more times...
374 f = w.startbody(content_type)
375 ...call f.write(data) for body data...
379 w.startmultipartbody(subtype)
381 subwriter = w.nextpart()
382 ...use the subwriter's methods to create the subpart...
385 The subwriter is another MimeWriter instance, and should be
386 treated in the same way as the toplevel MimeWriter. This way,
387 writing recursive body parts is easy.
389 Warning: don't forget to call lastpart()!
391 XXX There should be more state so calls made in the wrong order
396 - startbody() just returns the file passed to the constructor;
397 but don't use this knowledge, as it may be changed.
399 - startmultipartbody() actually returns a file as well;
400 this can be used to write the initial 'if you can read this your
401 mailer is not MIME-aware' message.
403 - If you call flushheaders(), the headers accumulated so far are
404 written out (and forgotten); this is useful if you don't need a
405 body part at all, e.g. for a subpart of type message/rfc822
406 that's (mis)used to store some header-like information.
408 - Passing a keyword argument 'prefix=<flag>' to addheader(),
409 start*body() affects where the header is inserted; 0 means
410 append at the end, 1 means insert at the start; default is
411 append for addheader(), but insert for start*body(), which use
412 it to determine where the Content-type header goes.
416 def __init__(self
, fp
, http_hdrs
=None):
417 self
._http
_hdrs
= http_hdrs
421 self
._first
_part
= True
423 def addheader(self
, key
, value
, prefix
=0,
426 prefix is ignored if add_to_http_hdrs is true.
428 lines
= string
.split(value
, "\r\n")
429 while lines
and not lines
[-1]: del lines
[-1]
430 while lines
and not lines
[0]: del lines
[0]
432 value
= string
.join(lines
, "")
433 self
._http
_hdrs
.append((key
, value
))
435 for i
in range(1, len(lines
)):
436 lines
[i
] = " " + string
.strip(lines
[i
])
437 value
= string
.join(lines
, "\r\n") + "\r\n"
438 line
= key
+ ": " + value
440 self
._headers
.insert(0, line
)
442 self
._headers
.append(line
)
444 def flushheaders(self
):
445 self
._fp
.writelines(self
._headers
)
448 def startbody(self
, ctype
=None, plist
=[], prefix
=1,
449 add_to_http_hdrs
=0, content_type
=1):
451 prefix is ignored if add_to_http_hdrs is true.
453 if content_type
and ctype
:
454 for name
, value
in plist
:
455 ctype
= ctype
+ ';\r\n %s=\"%s\"' % (name
, value
)
456 self
.addheader("Content-type", ctype
, prefix
=prefix
,
457 add_to_http_hdrs
=add_to_http_hdrs
)
459 if not add_to_http_hdrs
: self
._fp
.write("\r\n")
460 self
._first
_part
= True
463 def startmultipartbody(self
, subtype
, boundary
=None, plist
=[], prefix
=1,
464 add_to_http_hdrs
=0, content_type
=1):
465 boundary
= boundary
or mimetools
.choose_boundary()
466 self
._boundary
.append(boundary
)
467 return self
.startbody("multipart/" + subtype
,
468 [("boundary", boundary
)] + plist
,
470 add_to_http_hdrs
=add_to_http_hdrs
,
471 content_type
=content_type
)
474 boundary
= self
._boundary
[-1]
476 self
._first
_part
= False
478 self
._fp
.write("\r\n")
479 self
._fp
.write("--" + boundary
+ "\r\n")
480 return self
.__class
__(self
._fp
)
485 boundary
= self
._boundary
.pop()
486 self
._fp
.write("\r\n--" + boundary
+ "--\r\n")
489 class ControlNotFoundError(ValueError): pass
490 class ItemNotFoundError(ValueError): pass
491 class ItemCountError(ValueError): pass
493 class ParseError(Exception): pass
496 def ParseResponse(response
, select_default
=False, ignore_errors
=False):
497 """Parse HTTP response and return a list of HTMLForm instances.
499 The return value of urllib2.urlopen can be conveniently passed to this
500 function as the response parameter.
502 ClientForm.ParseError is raised on parse errors.
504 response: file-like object (supporting read() method) with a method
505 geturl(), returning the base URI of the HTTP response
506 select_default: for multiple-selection SELECT controls and RADIO controls,
507 pick the first item as the default if none are selected in the HTML
508 ignore_errors: don't raise ParseError, and carry on regardless if the
511 Pass a true value for select_default if you want the behaviour specified by
512 RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
513 RADIO or multiple-selection SELECT control if none were selected in the
514 HTML. Most browsers (including Microsoft Internet Explorer (IE) and
515 Netscape Navigator) instead leave all items unselected in these cases. The
516 W3C HTML 4.0 standard leaves this behaviour undefined in the case of
517 multiple-selection SELECT controls, but insists that at least one RADIO
518 button should be checked at all times, in contradiction to browser
521 Precisely what ignore_errors does isn't well-defined yet, so don't rely too
522 much on the current behaviour -- if you want robustness, you're better off
523 fixing the HTML before passing it to this function.
526 return ParseFile(response
, response
.geturl(), select_default
)
528 def ParseFile(file, base_uri
, select_default
=False, ignore_errors
=False):
529 """Parse HTML and return a list of HTMLForm instances.
531 ClientForm.ParseError is raised on parse errors.
533 file: file-like object (supporting read() method) containing HTML with zero
534 or more forms to be parsed
535 base_uri: the base URI of the document
537 For the other arguments and further details, see ParseResponse.__doc__.
540 fp
= _FORM_PARSER_CLASS(ignore_errors
)
542 data
= file.read(CHUNK
)
544 if len(data
) != CHUNK
: break
546 for (name
, action
, method
, enctype
), attrs
, controls
in fp
.forms
:
550 action
= urljoin(base_uri
, action
)
551 form
= HTMLForm(action
, method
, enctype
, name
, attrs
)
552 for type, name
, attr
in controls
:
553 form
.new_control(type, name
, attr
, select_default
=select_default
)
560 class _AbstractFormParser
:
561 """forms attribute contains HTMLForm instances on completion."""
562 # pinched (and modified) from Moshe Zadka
563 def __init__(self
, ignore_errors
, entitydefs
=None):
564 if entitydefs
is not None:
565 self
.entitydefs
= entitydefs
566 self
._ignore
_errors
= ignore_errors
568 self
._current
_form
= None
570 self
._optgroup
= None
572 self
._textarea
= None
574 def error(self
, error
):
575 if not self
._ignore
_errors
: raise error
577 def start_form(self
, attrs
):
578 if self
._current
_form
is not None:
579 self
.error(ParseError("nested FORMs"))
582 enctype
= "application/x-www-form-urlencoded"
585 for key
, value
in attrs
:
588 elif key
== "action":
590 elif key
== "method":
591 method
= string
.upper(value
)
592 elif key
== "enctype":
593 enctype
= string
.lower(value
)
597 self
._current
_form
= (name
, action
, method
, enctype
), d
, controls
600 if self
._current
_form
is None:
601 self
.error(ParseError("end of FORM before start"))
602 self
.forms
.append(self
._current
_form
)
603 self
._current
_form
= None
605 def start_select(self
, attrs
):
606 if self
._current
_form
is None:
607 self
.error(ParseError("start of SELECT before start of FORM"))
608 if self
._select
is not None:
609 self
.error(ParseError("nested SELECTs"))
610 if self
._textarea
is not None:
611 self
.error(ParseError("SELECT inside TEXTAREA"))
613 for key
, val
in attrs
:
618 self
._append
_select
_control
({"__select": d
})
620 def end_select(self
):
621 if self
._current
_form
is None:
622 self
.error(ParseError("end of SELECT before start of FORM"))
623 if self
._select
is None:
624 self
.error(ParseError("end of SELECT before start"))
626 if self
._option
is not None:
631 def start_optgroup(self
, attrs
):
632 if self
._select
is None:
633 self
.error(ParseError("OPTGROUP outside of SELECT"))
635 for key
, val
in attrs
:
640 def end_optgroup(self
):
641 if self
._optgroup
is None:
642 self
.error(ParseError("end of OPTGROUP before start"))
643 self
._optgroup
= None
645 def _start_option(self
, attrs
):
646 if self
._select
is None:
647 self
.error(ParseError("OPTION outside of SELECT"))
648 if self
._option
is not None:
652 for key
, val
in attrs
:
656 self
._option
.update(d
)
657 if (self
._optgroup
and self
._optgroup
.has_key("disabled") and
658 not self
._option
.has_key("disabled")):
659 self
._option
["disabled"] = None
661 def _end_option(self
):
662 if self
._option
is None:
663 self
.error(ParseError("end of OPTION before start"))
665 contents
= string
.strip(self
._option
.get("contents", ""))
666 #contents = string.strip(self._option["contents"])
667 self
._option
["contents"] = contents
668 if not self
._option
.has_key("value"):
669 self
._option
["value"] = contents
670 if not self
._option
.has_key("label"):
671 self
._option
["label"] = contents
672 # stuff dict of SELECT HTML attrs into a special private key
673 # (gets deleted again later)
674 self
._option
["__select"] = self
._select
675 self
._append
_select
_control
(self
._option
)
678 def _append_select_control(self
, attrs
):
679 controls
= self
._current
_form
[2]
680 name
= self
._select
.get("name")
681 controls
.append(("select", name
, attrs
))
683 ## def do_option(self, attrs):
684 ## if self._select is None:
685 ## self.error(ParseError("OPTION outside of SELECT"))
687 ## for key, val in attrs:
691 ## self._option.update(d)
692 ## if (self._optgroup and self._optgroup.has_key("disabled") and
693 ## not self._option.has_key("disabled")):
694 ## self._option["disabled"] = None
696 def start_textarea(self
, attrs
):
697 if self
._current
_form
is None:
698 self
.error(ParseError("start of TEXTAREA before start of FORM"))
699 if self
._textarea
is not None:
700 self
.error(ParseError("nested TEXTAREAs"))
701 if self
._select
is not None:
702 self
.error(ParseError("TEXTAREA inside SELECT"))
704 for key
, val
in attrs
:
709 def end_textarea(self
):
710 if self
._current
_form
is None:
711 self
.error(ParseError("end of TEXTAREA before start of FORM"))
712 if self
._textarea
is None:
713 self
.error(ParseError("end of TEXTAREA before start"))
714 controls
= self
._current
_form
[2]
715 name
= self
._textarea
.get("name")
716 controls
.append(("textarea", name
, self
._textarea
))
717 self
._textarea
= None
719 def handle_data(self
, data
):
720 if self
._option
is not None:
721 # self._option is a dictionary of the OPTION element's HTML
722 # attributes, but it has two special keys, one of which is the
723 # special "contents" key contains text between OPTION tags (the
724 # other is the "__select" key: see the end_option method)
727 elif self
._textarea
is not None:
733 if not map.has_key(key
):
736 map[key
] = map[key
] + data
738 ## def handle_data(self, data):
739 ## if self._option is not None:
740 ## contents = string.strip(data)
741 ## controls = self._current_form[2]
742 ## if not self._option.has_key("value"):
743 ## self._option["value"] = contents
744 ## if not self._option.has_key("label"):
745 ## self._option["label"] = contents
746 ## # self._option is a dictionary of the OPTION element's HTML
747 ## # attributes, but it has two special keys:
748 ## # 1. special "contents" key contains text between OPTION tags
749 ## self._option["contents"] = contents
750 ## # 2. stuff dict of SELECT HTML attrs into a special private key
751 ## # (gets deleted again later)
752 ## self._option["__select"] = self._select
753 ## self._append_select_control(self._option)
754 ## self._option = None
755 ## elif self._textarea is not None:
756 ## #self._textarea["value"] = data
757 ## if self._textarea.get("value") is None:
758 ## self._textarea["value"] = data
760 ## self._textarea["value"] = self._textarea["value"] + data
762 def do_button(self
, attrs
):
763 if self
._current
_form
is None:
764 self
.error(ParseError("start of BUTTON before start of FORM"))
766 d
["type"] = "submit" # default
767 for key
, val
in attrs
:
769 controls
= self
._current
_form
[2]
773 # we don't want to lose information, so use a type string that
774 # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
775 # eg. type for BUTTON/RESET is "resetbutton"
776 # (type for INPUT/RESET is "reset")
778 controls
.append((type, name
, d
))
780 def do_input(self
, attrs
):
781 if self
._current
_form
is None:
782 self
.error(ParseError("start of INPUT before start of FORM"))
784 d
["type"] = "text" # default
785 for key
, val
in attrs
:
787 controls
= self
._current
_form
[2]
791 controls
.append((type, name
, d
))
793 def do_isindex(self
, attrs
):
794 if self
._current
_form
is None:
795 self
.error(ParseError("start of ISINDEX before start of FORM"))
797 for key
, val
in attrs
:
799 controls
= self
._current
_form
[2]
801 # isindex doesn't have type or name HTML attributes
802 controls
.append(("isindex", None, d
))
804 # use HTMLParser if we have it (it does XHTML), htmllib otherwise
808 import htmllib
, formatter
809 class _FormParser(_AbstractFormParser
, htmllib
.HTMLParser
):
810 # This is still here for compatibility with Python 1.5.2.
811 # It doesn't do the right thing with XHTML.
812 def __init__(self
, ignore_errors
, entitydefs
=None):
813 htmllib
.HTMLParser
.__init
__(self
, formatter
.NullFormatter())
814 _AbstractFormParser
.__init
__(self
, ignore_errors
, entitydefs
)
816 def do_option(self
, attrs
):
817 _AbstractFormParser
._start
_option
(self
, attrs
)
819 _FORM_PARSER_CLASS
= _FormParser
821 class _XHTMLCompatibleFormParser(_AbstractFormParser
, HTMLParser
.HTMLParser
):
822 # thanks to Michael Howitz for this!
823 def __init__(self
, ignore_errors
, entitydefs
=None):
824 HTMLParser
.HTMLParser
.__init
__(self
)
825 _AbstractFormParser
.__init
__(self
, ignore_errors
, entitydefs
)
827 def start_option(self
, attrs
):
828 _AbstractFormParser
._start
_option
(self
, attrs
)
830 def end_option(self
):
831 _AbstractFormParser
._end
_option
(self
)
833 def handle_starttag(self
, tag
, attrs
):
835 method
= getattr(self
, 'start_' + tag
)
836 except AttributeError:
838 method
= getattr(self
, 'do_' + tag
)
839 except AttributeError:
846 def handle_endtag(self
, tag
):
848 method
= getattr(self
, 'end_' + tag
)
849 except AttributeError:
854 # handle_charref, handle_entityref and default entitydefs are taken
856 def handle_charref(self
, name
):
860 self
.unknown_charref(name
)
862 if not 0 <= n
<= 255:
863 self
.unknown_charref(name
)
865 self
.handle_data(chr(n
))
867 # Definition of entities -- derived classes may override
869 {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
871 def handle_entityref(self
, name
):
872 table
= self
.entitydefs
874 self
.handle_data(table
[name
])
876 self
.unknown_entityref(name
)
879 # These methods would have passed through the ref intact if I'd thought
880 # of it earlier, but since the old parser silently swallows unknown
881 # refs, so does this new parser.
882 def unknown_entityref(self
, ref
): pass
883 def unknown_charref(self
, ref
): pass
885 _FORM_PARSER_CLASS
= _XHTMLCompatibleFormParser
889 """An HTML form control.
891 An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
892 things to Control objects, and most of Control's methods are, in effect,
893 documented by the HTMLForm docstrings.
895 The Controls in an HTMLForm can be got at via the HTMLForm.find_control
896 method or the HTMLForm.controls attribute.
898 Control instances are usually constructed using the ParseFile /
899 ParseResponse functions, so you can probably ignore the rest of this
900 paragraph. A Control is only properly initialised after the fixup method
901 has been called. In fact, this is only strictly necessary for ListControl
902 instances. This is necessary because ListControls are built up from
903 ListControls each containing only a single item, and their initial value(s)
904 can only be known after the sequence is complete.
906 The types and values that are acceptable for assignment to the value
907 attribute are defined by subclasses.
909 If the disabled attribute is true, this represents the state typically
910 represented by browsers by `greying out' a control. If the disabled
911 attribute is true, the Control will raise AttributeError if an attempt is
912 made to change its value. In addition, the control will not be considered
913 `successful' as defined by the W3C HTML 4 standard -- ie. it will
914 contribute no data to the return value of the HTMLForm.click* methods. To
915 enable a control, set the disabled attribute to a false value.
917 If the readonly attribute is true, the Control will raise AttributeError if
918 an attempt is made to change its value. To make a control writable, set
919 the readonly attribute to a false value.
921 All controls have the disabled and readonly attributes, not only those that
922 may have the HTML attributes of the same names.
924 On assignment to the value attribute, the following exceptions are raised:
925 TypeError, AttributeError (if the value attribute should not be assigned
926 to, because the control is disabled, for example) and ValueError.
928 If the name or value attributes are None, or the value is an empty list, or
929 if the control is disabled, the control is not successful.
933 type: string describing type of control (see the keys of the
934 HTMLForm.type2class dictionary for the allowable values) (readonly)
935 name: name of control (readonly)
936 value: current value of control (subclasses may allow a single value, a
937 sequence of values, or either)
938 disabled: disabled state
939 readonly: readonly state
940 id: value of id HTML attribute
943 def __init__(self
, type, name
, attrs
):
945 type: string describing type of control (see the keys of the
946 HTMLForm.type2class dictionary for the allowable values)
948 attrs: HTML attributes of control's HTML element
951 raise NotImplementedError()
953 def add_to_form(self
, form
):
954 form
.controls
.append(self
)
959 def __getattr__(self
, name
): raise NotImplementedError()
960 def __setattr__(self
, name
, value
): raise NotImplementedError()
963 """Return list of (key, value) pairs suitable for passing to urlencode.
965 raise NotImplementedError()
967 def _write_mime_data(self
, mw
):
968 """Write data for this control to a MimeWriter."""
970 for name
, value
in self
.pairs():
972 mw2
.addheader("Content-disposition",
973 'form-data; name="%s"' % name
, 1)
974 f
= mw2
.startbody(prefix
=0)
978 raise NotImplementedError()
981 #---------------------------------------------------
982 class ScalarControl(Control
):
983 """Control whose value is not restricted to one of a prescribed set.
985 Some ScalarControls don't accept any value attribute. Otherwise, takes a
986 single value, which must be string-like.
988 Additional read-only public attribute:
990 attrs: dictionary mapping the names of original HTML attributes of the
991 control to their values
994 def __init__(self
, type, name
, attrs
):
995 self
.__dict
__["type"] = string
.lower(type)
996 self
.__dict
__["name"] = name
997 self
._value
= attrs
.get("value")
998 self
.disabled
= attrs
.has_key("disabled")
999 self
.readonly
= attrs
.has_key("readonly")
1000 self
.id = attrs
.get("id")
1002 self
.attrs
= attrs
.copy()
1004 self
._clicked
= False
1006 def __getattr__(self
, name
):
1008 return self
.__dict
__["_value"]
1010 raise AttributeError("%s instance has no attribute '%s'" %
1011 (self
.__class
__.__name
__, name
))
1013 def __setattr__(self
, name
, value
):
1015 if not isstringlike(value
):
1016 raise TypeError("must assign a string")
1018 raise AttributeError("control '%s' is readonly" % self
.name
)
1020 raise AttributeError("control '%s' is disabled" % self
.name
)
1021 self
.__dict
__["_value"] = value
1022 elif name
in ("name", "type"):
1023 raise AttributeError("%s attribute is readonly" % name
)
1025 self
.__dict
__[name
] = value
1030 if name
is None or value
is None or self
.disabled
:
1032 return [(name
, value
)]
1037 if name
is None: name
= "<None>"
1038 if value
is None: value
= "<None>"
1041 if self
.disabled
: infos
.append("disabled")
1042 if self
.readonly
: infos
.append("readonly")
1043 info
= string
.join(infos
, ", ")
1044 if info
: info
= " (%s)" % info
1046 return "<%s(%s=%s)%s>" % (self
.__class
__.__name
__, name
, value
, info
)
1049 #---------------------------------------------------
1050 class TextControl(ScalarControl
):
1051 """Textual input control.
1062 def __init__(self
, type, name
, attrs
):
1063 ScalarControl
.__init
__(self
, type, name
, attrs
)
1064 if self
.type == "hidden": self
.readonly
= True
1065 if self
._value
is None:
1069 #---------------------------------------------------
1070 class FileControl(ScalarControl
):
1071 """File upload with INPUT TYPE=FILE.
1073 The value attribute of a FileControl is always None.
1075 Additional public method: add_file
1078 def __init__(self
, type, name
, attrs
):
1079 ScalarControl
.__init
__(self
, type, name
, attrs
)
1081 self
._upload
_data
= []
1083 def __setattr__(self
, name
, value
):
1084 if name
in ("value", "name", "type"):
1085 raise AttributeError("%s attribute is readonly" % name
)
1087 self
.__dict
__[name
] = value
1089 def add_file(self
, file_object
, content_type
=None, filename
=None):
1090 if not hasattr(file_object
, "read"):
1091 raise TypeError("file-like object must have read method")
1092 if content_type
is not None and not isstringlike(content_type
):
1093 raise TypeError("content type must be None or string-like")
1094 if filename
is not None and not isstringlike(filename
):
1095 raise TypeError("filename must be None or string-like")
1096 if content_type
is None:
1097 content_type
= "application/octet-stream"
1098 self
._upload
_data
.append((file_object
, content_type
, filename
))
1101 # XXX should it be successful even if unnamed?
1102 if self
.name
is None or self
.disabled
:
1104 return [(self
.name
, "")]
1106 def _write_mime_data(self
, mw
):
1107 # called by HTMLForm
1108 if len(self
._upload
_data
) == 1:
1110 file_object
, content_type
, filename
= self
._upload
_data
[0]
1112 fn_part
= filename
and ('; filename="%s"' % filename
) or ''
1113 disp
= 'form-data; name="%s"%s' % (self
.name
, fn_part
)
1114 mw2
.addheader("Content-disposition", disp
, prefix
=1)
1115 fh
= mw2
.startbody(content_type
, prefix
=0)
1116 fh
.write(file_object
.read())
1117 elif len(self
._upload
_data
) != 0:
1120 disp
= 'form-data; name="%s"' % self
.name
1121 mw2
.addheader("Content-disposition", disp
, prefix
=1)
1122 fh
= mw2
.startmultipartbody("mixed", prefix
=0)
1123 for file_object
, content_type
, filename
in self
._upload
_data
:
1124 mw3
= mw2
.nextpart()
1125 fn_part
= filename
and ('; filename="%s"' % filename
) or ''
1126 disp
= 'file%s' % fn_part
1127 mw3
.addheader("Content-disposition", disp
, prefix
=1)
1128 fh2
= mw3
.startbody(content_type
, prefix
=0)
1129 fh2
.write(file_object
.read())
1134 if name
is None: name
= "<None>"
1136 if not self
._upload
_data
:
1137 value
= "<No files added>"
1140 for file, ctype
, filename
in self
._upload
_data
:
1141 if filename
is None:
1142 value
.append("<Unnamed file>")
1144 value
.append(filename
)
1145 value
= string
.join(value
, ", ")
1148 if self
.disabled
: info
.append("disabled")
1149 if self
.readonly
: info
.append("readonly")
1150 info
= string
.join(info
, ", ")
1151 if info
: info
= " (%s)" % info
1153 return "<%s(%s=%s)%s>" % (self
.__class
__.__name
__, name
, value
, info
)
1156 #---------------------------------------------------
1157 class IsindexControl(ScalarControl
):
1160 ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
1161 part of regular HTML forms at all, and predates it. You're only allowed
1162 one ISINDEX per HTML document. ISINDEX and regular form submission are
1163 mutually exclusive -- either submit a form, or the ISINDEX.
1165 Having said this, since ISINDEX controls may appear in forms (which is
1166 probably bad HTML), ParseFile / ParseResponse will include them in the
1167 HTMLForm instances it returns. You can set the ISINDEX's value, as with
1168 any other control (but note that ISINDEX controls have no name, so you'll
1169 need to use the type argument of set_value!). When you submit the form,
1170 the ISINDEX will not be successful (ie., no data will get returned to the
1171 server as a result of its presence), unless you click on the ISINDEX
1172 control, in which case the ISINDEX gets submitted instead of the form:
1174 form.set_value("my isindex value", type="isindex")
1175 urllib2.urlopen(form.click(type="isindex"))
1177 ISINDEX elements outside of FORMs are ignored. If you want to submit one
1178 by hand, do it like so:
1180 url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
1181 result = urllib2.urlopen(url)
1184 def __init__(self
, type, name
, attrs
):
1185 ScalarControl
.__init
__(self
, type, name
, attrs
)
1186 if self
._value
is None:
1192 def _click(self
, form
, coord
, return_type
):
1193 # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
1195 # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
1196 # deprecated in 4.01, but it should still say how to submit it).
1197 # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
1198 url
= urljoin(form
.action
, "?"+urllib
.quote_plus(self
.value
))
1199 req_data
= url
, None, []
1201 if return_type
== "pairs":
1203 elif return_type
== "request_data":
1206 return urllib2
.Request(url
)
1210 if value
is None: value
= "<None>"
1213 if self
.disabled
: infos
.append("disabled")
1214 if self
.readonly
: infos
.append("readonly")
1215 info
= string
.join(infos
, ", ")
1216 if info
: info
= " (%s)" % info
1218 return "<%s(%s)%s>" % (self
.__class
__.__name
__, value
, info
)
1221 #---------------------------------------------------
1222 class IgnoreControl(ScalarControl
):
1223 """Control that we're not interested in.
1232 These controls are always unsuccessful, in the terminology of HTML 4 (ie.
1233 they never require any information to be returned to the server).
1235 BUTTON/BUTTON is used to generate events for script embedded in HTML.
1237 The value attribute of IgnoreControl is always None.
1240 def __init__(self
, type, name
, attrs
):
1241 ScalarControl
.__init
__(self
, type, name
, attrs
)
1244 def __setattr__(self
, name
, value
):
1246 raise AttributeError(
1247 "control '%s' is ignored, hence read-only" % self
.name
)
1248 elif name
in ("name", "type"):
1249 raise AttributeError("%s attribute is readonly" % name
)
1251 self
.__dict
__[name
] = value
1254 #---------------------------------------------------
1255 class ListControl(Control
):
1256 """Control representing a sequence of items.
1258 The value attribute of a ListControl represents the selected list items in
1261 ListControl implements both list controls that take a single value and
1262 those that take multiple values.
1264 ListControls accept sequence values only. Some controls only accept
1265 sequences of length 0 or 1 (RADIO, and single-selection SELECT).
1266 In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
1267 and multiple-selection SELECTs (those having the "multiple" HTML attribute)
1268 accept sequences of any length.
1270 Note the following mistake:
1272 control.value = some_value
1273 assert control.value == some_value # not necessarily true
1275 The reason for this is that the value attribute always gives the list items
1276 in the order they were listed in the HTML.
1278 ListControl items can also be referred to by their labels instead of names.
1279 Use the by_label argument, and the set_value_by_label, get_value_by_label
1282 XXX RadioControl and CheckboxControl don't implement by_label yet.
1284 Note that, rather confusingly, though SELECT controls are represented in
1285 HTML by SELECT elements (which contain OPTION elements, representing
1286 individual list items), CHECKBOXes and RADIOs are not represented by *any*
1287 element. Instead, those controls are represented by a collection of INPUT
1288 elements. For example, this is a SELECT control, named "control1":
1290 <select name="control1">
1291 <option>foo</option>
1292 <option value="1">bar</option>
1295 and this is a CHECKBOX control, named "control2":
1297 <input type="checkbox" name="control2" value="foo" id="cbe1">
1298 <input type="checkbox" name="control2" value="bar" id="cbe2">
1300 The id attribute of a CHECKBOX or RADIO ListControl is always that of its
1301 first element (for example, "cbe1" above).
1304 Additional read-only public attribute: multiple.
1307 ListControls are built up by the parser from their component items by
1308 creating one ListControl per item, consolidating them into a single master
1309 ListControl held by the HTMLForm:
1311 -User calls form.new_control(...)
1312 -Form creates Control, and calls control.add_to_form(self).
1313 -Control looks for a Control with the same name and type in the form, and
1314 if it finds one, merges itself with that control by calling
1315 control.merge_control(self). The first Control added to the form, of a
1316 particular name and type, is the only one that survives in the form.
1317 -Form calls control.fixup for all its controls. ListControls in the form
1318 know they can now safely pick their default values.
1320 To create a ListControl without an HTMLForm, use:
1322 control.merge_control(new_control)
1325 def __init__(self
, type, name
, attrs
={}, select_default
=False,
1326 called_as_base_class
=False):
1328 select_default: for RADIO and multiple-selection SELECT controls, pick
1329 the first item as the default if no 'selected' HTML attribute is
1333 if not called_as_base_class
:
1334 raise NotImplementedError()
1336 self
.__dict
__["type"] = string
.lower(type)
1337 self
.__dict
__["name"] = name
1338 self
._value
= attrs
.get("value")
1339 self
.disabled
= False
1340 self
.readonly
= False
1341 self
.id = attrs
.get("id")
1343 self
._attrs
= attrs
.copy()
1344 # As Controls are merged in with .merge_control(), self._attrs will
1345 # refer to each Control in turn -- always the most recently merged
1346 # control. Each merged-in Control instance corresponds to a single
1347 # list item: see ListControl.__doc__.
1349 self
._attrs
_list
= [self
._attrs
] # extended by .merge_control()
1350 self
._disabled
_list
= [self
._attrs
.has_key("disabled")] # ditto
1352 self
._attrs
_list
= [] # extended by .merge_control()
1353 self
._disabled
_list
= [] # ditto
1355 self
._select
_default
= select_default
1356 self
._clicked
= False
1357 # Some list controls can have their default set only after all items
1358 # are known. If so, self._value_is_set is false, and the self.fixup
1359 # method, called after all items have been added, sets the default.
1360 self
._value
_is
_set
= False
1362 def _value_from_label(self
, label
):
1363 raise NotImplementedError("control '%s' does not yet support "
1364 "by_label" % self
.name
)
1366 def toggle(self
, name
, by_label
=False):
1367 return self
._set
_selected
_state
(name
, 2, by_label
)
1368 def set(self
, selected
, name
, by_label
=False):
1369 action
= int(bool(selected
))
1370 return self
._set
_selected
_state
(name
, action
, by_label
)
1372 def _set_selected_state(self
, name
, action
, by_label
):
1381 if not isstringlike(name
):
1382 raise TypeError("item name must be string-like")
1384 raise AttributeError("control '%s' is disabled" % self
.name
)
1386 raise AttributeError("control '%s' is readonly" % self
.name
)
1388 name
= self
._value
_from
_label
(name
)
1390 i
= self
._menu
.index(name
)
1392 raise ItemNotFoundError("no item named '%s'" % name
)
1396 action
= not self
._selected
[i
]
1397 if action
and self
._disabled
_list
[i
]:
1398 raise AttributeError("item '%s' is disabled" % name
)
1399 self
._selected
[i
] = bool(action
)
1402 if self
._selected
== name
:
1406 if action
== 0 and self
._selected
== name
:
1407 self
._selected
= None
1409 if self
._disabled
_list
[i
]:
1410 raise AttributeError("item '%s' is disabled" % name
)
1411 self
._selected
= name
1413 def toggle_single(self
, by_label
=False):
1414 self
._set
_single
_selected
_state
(2, by_label
)
1415 def set_single(self
, selected
, by_label
=False):
1416 action
= int(bool(selected
))
1417 self
._set
_single
_selected
_state
(action
, by_label
)
1419 def _set_single_selected_state(self
, action
, by_label
):
1420 if len(self
._menu
) != 1:
1421 raise ItemCountError("'%s' is not a single-item control" %
1424 name
= self
._menu
[0]
1426 name
= self
._value
_from
_label
(name
)
1427 self
._set
_selected
_state
(name
, action
, by_label
)
1429 def get_item_disabled(self
, name
, by_label
=False):
1430 """Get disabled state of named list item in a ListControl."""
1432 name
= self
._value
_from
_label
(name
)
1434 i
= self
._menu
.index(name
)
1436 raise ItemNotFoundError()
1438 return self
._disabled
_list
[i
]
1440 def set_item_disabled(self
, disabled
, name
, by_label
=False):
1441 """Set disabled state of named list item in a ListControl.
1443 disabled: boolean disabled state
1447 name
= self
._value
_from
_label
(name
)
1449 i
= self
._menu
.index(name
)
1451 raise ItemNotFoundError()
1453 self
._disabled
_list
[i
] = bool(disabled
)
1455 def set_all_items_disabled(self
, disabled
):
1456 """Set disabled state of all list items in a ListControl.
1458 disabled: boolean disabled state
1461 for i
in range(len(self
._disabled
_list
)):
1462 self
._disabled
_list
[i
] = bool(disabled
)
1464 def get_item_attrs(self
, name
, by_label
=False):
1465 """Return dictionary of HTML attributes for a single ListControl item.
1467 The HTML element types that describe list items are: OPTION for SELECT
1468 controls, INPUT for the rest. These elements have HTML attributes that
1469 you may occasionally want to know about -- for example, the "alt" HTML
1470 attribute gives a text string describing the item (graphical browsers
1471 usually display this as a tooltip).
1473 The returned dictionary maps HTML attribute names to values. The names
1474 and values are taken from the original HTML.
1476 Note that for SELECT controls, the returned dictionary contains a
1477 special key "contents" -- see SelectControl.__doc__.
1481 name
= self
._value
_from
_label
(name
)
1483 i
= self
._menu
.index(name
)
1485 raise ItemNotFoundError()
1486 return self
._attrs
_list
[i
]
1488 def add_to_form(self
, form
):
1490 control
= form
.find_control(self
.name
, self
.type)
1491 except ControlNotFoundError
:
1492 Control
.add_to_form(self
, form
)
1494 control
.merge_control(self
)
1496 def merge_control(self
, control
):
1497 assert bool(control
.multiple
) == bool(self
.multiple
)
1498 assert isinstance(control
, self
.__class
__)
1499 self
._menu
.extend(control
._menu
)
1500 self
._attrs
_list
.extend(control
._attrs
_list
)
1501 self
._disabled
_list
.extend(control
._disabled
_list
)
1502 if control
.multiple
:
1503 self
._selected
.extend(control
._selected
)
1505 if control
._value
_is
_set
:
1506 self
._selected
= control
._selected
1507 if control
._value
_is
_set
:
1508 self
._value
_is
_set
= True
1512 ListControls are built up from component list items (which are also
1513 ListControls) during parsing. This method should be called after all
1514 items have been added. See ListControl.__doc__ for the reason this is
1518 # Need to set default selection where no item was indicated as being
1519 # selected by the HTML:
1522 # Nothing should be selected.
1523 # SELECT/single, SELECT/multiple and RADIO:
1524 # RFC 1866 (HTML 2.0): says first item should be selected.
1525 # W3C HTML 4.01 Specification: says that client behaviour is
1526 # undefined in this case. For RADIO, exactly one must be selected,
1527 # though which one is undefined.
1528 # Both Netscape and Microsoft Internet Explorer (IE) choose first
1529 # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
1530 # and Firebird 0.6) leave all items unselected for RADIO and
1533 # Since both Netscape and IE all choose the first item for
1534 # SELECT/single, we do the same. OTOH, both Netscape and IE
1535 # leave SELECT/multiple with nothing selected, in violation of RFC 1866
1536 # (but not in violation of the W3C HTML 4 standard); the same is true
1537 # of RADIO (which *is* in violation of the HTML 4 standard). We follow
1538 # RFC 1866 if the select_default attribute is set, and Netscape and IE
1539 # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
1540 # can deselect all items in a RadioControl.
1542 raise NotImplementedError()
1544 def __getattr__(self
, name
):
1549 for i
in range(len(menu
)):
1550 if self
._selected
[i
]: values
.append(menu
[i
])
1553 if self
._selected
is None: return []
1554 else: return [self
._selected
]
1556 raise AttributeError("%s instance has no attribute '%s'" %
1557 (self
.__class
__.__name
__, name
))
1559 def __setattr__(self
, name
, value
):
1562 raise AttributeError("control '%s' is disabled" % self
.name
)
1564 raise AttributeError("control '%s' is readonly" % self
.name
)
1565 self
._set
_value
(value
)
1566 elif name
in ("name", "type", "multiple"):
1567 raise AttributeError("%s attribute is readonly" % name
)
1569 self
.__dict
__[name
] = value
1571 def _set_value(self
, value
):
1573 self
._multiple
_set
_value
(value
)
1575 self
._single
_set
_value
(value
)
1577 def _single_set_value(self
, value
):
1578 if value
is None or isstringlike(value
):
1579 raise TypeError("ListControl, must set a sequence")
1581 if not (0 <= nr
<= 1):
1582 raise ItemCountError("single selection list, must set sequence of "
1586 self
._selected
= None
1590 i
= self
._menu
.index(value
)
1592 raise ItemNotFoundError("no item named '%s'" %
1594 if self
._disabled
_list
[i
]:
1595 raise AttributeError("item '%s' is disabled" % value
)
1596 self
._selected
= value
1598 def _multiple_set_value(self
, value
):
1599 if value
is None or isstringlike(value
):
1600 raise TypeError("ListControl, must set a sequence")
1602 selected
= [False]*len(self
._selected
)
1604 disabled_list
= self
._disabled
_list
1608 for i
in range(len(menu
)):
1611 if disabled_list
[i
]:
1612 raise AttributeError("item '%s' is disabled" % value
)
1617 raise ItemNotFoundError("no item named '%s'" % repr(v
))
1618 self
._selected
= selected
1620 def set_value_by_label(self
, value
):
1621 raise NotImplementedError("control '%s' does not yet support "
1622 "by_label" % self
.name
)
1623 def get_value_by_label(self
):
1624 raise NotImplementedError("control '%s' does not yet support "
1625 "by_label" % self
.name
)
1627 def possible_items(self
, by_label
=False):
1629 raise NotImplementedError(
1630 "control '%s' does not yet support by_label" % self
.name
)
1631 return copy
.copy(self
._menu
)
1637 if not self
.multiple
:
1639 value
= self
._selected
1640 if name
is None or value
is None:
1642 return [(name
, value
)]
1644 control_name
= self
.name
# usually the name HTML attribute
1646 for i
in range(len(self
._menu
)):
1647 item_name
= self
._menu
[i
] # usually the value HTML attribute
1648 if self
._selected
[i
]:
1649 pairs
.append((control_name
, item_name
))
1652 def _item_str(self
, i
):
1653 item_name
= self
._menu
[i
]
1655 if self
._selected
[i
]:
1656 item_name
= "*"+item_name
1658 if self
._selected
== item_name
:
1659 item_name
= "*"+item_name
1660 if self
._disabled
_list
[i
]:
1661 item_name
= "(%s)" % item_name
1666 if name
is None: name
= "<None>"
1669 for i
in range(len(self
._menu
)):
1670 s
= self
._item
_str
(i
)
1674 if self
.disabled
: infos
.append("disabled")
1675 if self
.readonly
: infos
.append("readonly")
1676 info
= string
.join(infos
, ", ")
1677 if info
: info
= " (%s)" % info
1679 return "<%s(%s=[%s])%s>" % (self
.__class
__.__name
__,
1680 name
, string
.join(display
, ", "), info
)
1683 class RadioControl(ListControl
):
1690 def __init__(self
, type, name
, attrs
, select_default
=False):
1691 ListControl
.__init
__(self
, type, name
, attrs
, select_default
,
1692 called_as_base_class
=True)
1693 self
.__dict
__["multiple"] = False
1694 value
= attrs
.get("value", "on")
1695 self
._menu
= [value
]
1696 checked
= attrs
.has_key("checked")
1698 self
._value
_is
_set
= True
1699 self
._selected
= value
1701 self
._selected
= None
1704 if not self
._value
_is
_set
:
1705 # no item explicitly selected
1706 assert self
._selected
is None
1707 if self
._select
_default
:
1708 self
._selected
= self
._menu
[0]
1709 self
._value
_is
_set
= True
1712 class CheckboxControl(ListControl
):
1719 def __init__(self
, type, name
, attrs
, select_default
=False):
1720 ListControl
.__init
__(self
, type, name
, attrs
, select_default
,
1721 called_as_base_class
=True)
1722 self
.__dict
__["multiple"] = True
1723 value
= attrs
.get("value", "on")
1724 self
._menu
= [value
]
1725 checked
= attrs
.has_key("checked")
1726 self
._selected
= [checked
]
1727 self
._value
_is
_set
= True
1730 # If no items were explicitly checked in HTML, that's how we must
1731 # leave it, so we have nothing to do here.
1732 assert self
._value
_is
_set
1735 class SelectControl(ListControl
):
1741 SELECT control values and labels are subject to some messy defaulting
1742 rules. For example, if the HTML repreentation of the control is:
1745 <OPTION value=0 label="2002">current year</OPTION>
1746 <OPTION value=1>2001</OPTION>
1747 <OPTION>2000</OPTION>
1750 The items, in order, have labels "2002", "2001" and "2000", whereas their
1751 values are "0", "1" and "2000" respectively. Note that the value of the
1752 last OPTION in this example defaults to its contents, as specified by RFC
1753 1866, as do the labels of the second and third OPTIONs.
1755 The purpose of these methods is that the OPTION labels are sometimes much
1756 more meaningful, than are the OPTION values, which can make for more
1759 Additional read-only public attribute: attrs
1761 The attrs attribute is a dictionary of the original HTML attributes of the
1762 SELECT element. Other ListControls do not have this attribute, because in
1763 other cases the control as a whole does not correspond to any single HTML
1764 element. The get_item_attrs method may be used as usual to get at the
1765 HTML attributes of the HTML elements corresponding to individual list items
1766 (for SELECT controls, these are OPTION elements).
1768 Another special case is that the attributes dictionaries returned by
1769 get_item_attrs have a special key "contents" which does not correspond to
1770 any real HTML attribute, but rather contains the contents of the OPTION
1773 <OPTION>this bit</OPTION>
1776 # HTML attributes here are treated slightly from other list controls:
1777 # -The SELECT HTML attributes dictionary is stuffed into the OPTION
1778 # HTML attributes dictionary under the "__select" key.
1779 # -The content of each OPTION element is stored under the special
1780 # "contents" key of the dictionary.
1781 # After all this, the dictionary is passed to the SelectControl constructor
1782 # as the attrs argument, as usual. However:
1783 # -The first SelectControl constructed when building up a SELECT control
1784 # has a constructor attrs argument containing only the __select key -- so
1785 # this SelectControl represents an empty SELECT control.
1786 # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
1787 # the __select dictionary containing the SELECT HTML-attributes.
1788 def __init__(self
, type, name
, attrs
, select_default
=False):
1789 # fish out the SELECT HTML attributes from the OPTION HTML attributes
1791 self
.attrs
= attrs
["__select"].copy()
1792 attrs
= attrs
.copy()
1793 del attrs
["__select"]
1795 ListControl
.__init
__(self
, type, name
, attrs
, select_default
,
1796 called_as_base_class
=True)
1798 self
._label
_map
= None
1799 self
.disabled
= self
.attrs
.has_key("disabled")
1800 self
.id = self
.attrs
.get("id")
1804 self
._value
_is
_set
= False
1805 if self
.attrs
.has_key("multiple"):
1806 self
.__dict
__["multiple"] = True
1809 self
.__dict
__["multiple"] = False
1810 self
._selected
= None
1812 if attrs
: # OPTION item data was provided
1813 value
= attrs
["value"]
1814 self
._menu
.append(value
)
1815 selected
= attrs
.has_key("selected")
1817 self
._value
_is
_set
= True
1818 if self
.attrs
.has_key("multiple"):
1819 self
._selected
.append(selected
)
1821 self
._selected
= value
1823 def _build_select_label_map(self
):
1824 """Return an ordered mapping of labels to values.
1826 For example, if the HTML repreentation of the control is as given in
1827 SelectControl.__doc__, this function will return a mapping like:
1829 {"2002": "0", "2001": "1", "2000": "2000"}
1833 for val
in self
._menu
:
1834 attrs
= self
.get_item_attrs(val
)
1835 alist
.append((attrs
["label"], val
))
1838 def _value_from_label(self
, label
):
1840 return self
._label
_map
[label
]
1842 raise ItemNotFoundError("no item has label '%s'" % label
)
1845 if not self
._value
_is
_set
:
1846 # No item explicitly selected.
1847 if len(self
._menu
) > 0:
1849 if self
._select
_default
:
1850 self
._selected
[0] = True
1852 assert self
._selected
is None
1853 self
._selected
= self
._menu
[0]
1854 self
._value
_is
_set
= True
1855 self
._label
_map
= self
._build
_select
_label
_map
()
1857 def possible_items(self
, by_label
=False):
1859 return copy
.copy(self
._menu
)
1861 self
._label
_map
.set_inverted(True)
1863 r
= map(lambda v
, self
=self
: self
._label
_map
[v
], self
._menu
)
1865 self
._label
_map
.set_inverted(False)
1868 def set_value_by_label(self
, value
):
1869 if isstringlike(value
):
1870 raise TypeError("ListControl, must set a sequence, not a string")
1872 raise AttributeError("control '%s' is disabled" % self
.name
)
1874 raise AttributeError("control '%s' is readonly" % self
.name
)
1877 value
= map(lambda v
, self
=self
: self
._label
_map
[v
], value
)
1879 raise ItemNotFoundError("no item has label '%s'" % e
.args
[0])
1880 self
._set
_value
(value
)
1882 def get_value_by_label(self
):
1884 self
._label
_map
.set_inverted(True)
1888 for i
in range(len(menu
)):
1889 if self
._selected
[i
]:
1890 values
.append(self
._label
_map
[menu
[i
]])
1893 return [self
._label
_map
[self
._selected
]]
1895 self
._label
_map
.set_inverted(False)
1898 #---------------------------------------------------
1899 class SubmitControl(ScalarControl
):
1907 def __init__(self
, type, name
, attrs
):
1908 ScalarControl
.__init
__(self
, type, name
, attrs
)
1909 # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
1910 # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
1912 if self
.value
is None: self
.value
= ""
1913 self
.readonly
= True
1915 def _click(self
, form
, coord
, return_type
):
1916 self
._clicked
= coord
1917 r
= form
._switch
_click
(return_type
)
1918 self
._clicked
= False
1922 if not self
._clicked
:
1924 return ScalarControl
.pairs(self
)
1927 #---------------------------------------------------
1928 class ImageControl(SubmitControl
):
1934 The value attribute of an ImageControl is always None. Coordinates are
1935 specified using one of the HTMLForm.click* methods.
1938 def __init__(self
, type, name
, attrs
):
1939 ScalarControl
.__init
__(self
, type, name
, attrs
)
1940 self
.__dict
__["value"] = None
1942 def __setattr__(self
, name
, value
):
1943 if name
in ("value", "name", "type"):
1944 raise AttributeError("%s attribute is readonly" % name
)
1946 self
.__dict
__[name
] = value
1949 clicked
= self
._clicked
1950 if self
.disabled
or not clicked
:
1953 if name
is None: return []
1954 return [("%s.x" % name
, str(clicked
[0])),
1955 ("%s.y" % name
, str(clicked
[1]))]
1958 # aliases, just to make str(control) and str(form) clearer
1959 class PasswordControl(TextControl
): pass
1960 class HiddenControl(TextControl
): pass
1961 class TextareaControl(TextControl
): pass
1962 class SubmitButtonControl(SubmitControl
): pass
1965 def is_listcontrol(control
): return isinstance(control
, ListControl
)
1969 """Represents a single HTML <form> ... </form> element.
1971 A form consists of a sequence of controls that usually have names, and
1972 which can take on various values. The values of the various types of
1973 controls represent variously: text, zero-, one- or many-of-many choices,
1974 and files to be uploaded.
1976 Forms can be filled in with data to be returned to the server, and then
1977 submitted, using the click method to generate a request object suitable for
1978 passing to urllib2.urlopen (or the click_request_data or click_pairs
1979 methods if you're not using urllib2).
1982 forms = ClientForm.ParseFile(html, base_uri)
1985 form["query"] = "Python"
1986 form.set("lots", "nr_results")
1988 response = urllib2.urlopen(form.click())
1990 Usually, HTMLForm instances are not created directly. Instead, the
1991 ParseFile or ParseResponse factory functions are used. If you do construct
1992 HTMLForm objects yourself, however, note that an HTMLForm instance is only
1993 properly initialised after the fixup method has been called (ParseFile and
1994 ParseResponse do this for you). See ListControl.__doc__ for the reason
1997 Indexing a form (form["control_name"]) returns the named Control's value
1998 attribute. Assignment to a form index (form["control_name"] = something)
1999 is equivalent to assignment to the named Control's value attribute. If you
2000 need to be more specific than just supplying the control's name, use the
2001 set_value and get_value methods.
2003 ListControl values are lists of item names. The list item's name is the
2004 value of the corresponding HTML element's "value" attribute.
2008 <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
2009 <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
2011 defines a CHECKBOX control with name "cheeses" which has two items, named
2012 "leicester" and "cheddar".
2016 <SELECT name="more_cheeses">
2018 <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
2021 defines a SELECT control with name "more_cheeses" which has two items,
2024 To set, clear or toggle individual list items, use the set and toggle
2025 methods. To set the whole value, do as for any other control:use indexing
2026 or the set_/get_value methods.
2030 # select *only* the item named "cheddar"
2031 form["cheeses"] = ["cheddar"]
2032 # select "cheddar", leave other items unaffected
2033 form.set("cheddar", "cheeses")
2035 Some controls (RADIO and SELECT without the multiple attribute) can only
2036 have zero or one items selected at a time. Some controls (CHECKBOX and
2037 SELECT with the multiple attribute) can have multiple items selected at a
2038 time. To set the whole value of a multiple-selection ListControl, assign a
2039 sequence to a form index:
2041 form["cheeses"] = ["cheddar", "leicester"]
2043 To check whether a control has an item, or whether an item is selected,
2046 "cheddar" in form.possible_items("cheeses")
2047 "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
2049 Note that some items may be disabled (see below).
2051 Note the following mistake:
2053 form[control_name] = control_value
2054 assert form[control_name] == control_value # not necessarily true
2056 The reason for this is that form[control_name] always gives the list items
2057 in the order they were listed in the HTML.
2059 List items (hence list values, too) can be referred to in terms of list
2060 item labels rather than list item names. Currently, this is only possible
2061 for SELECT controls (this is a bug). To use this feature, use the by_label
2062 arguments to the various HTMLForm methods. Note that it is *item* names
2063 (hence ListControl values also), not *control* names, that can be referred
2066 The question of default values of OPTION contents, labels and values is
2067 somewhat complicated: see SelectControl.__doc__ and
2068 ListControl.get_item_attrs.__doc__ if you think you need to know.
2070 Controls can be disabled or readonly. In either case, the control's value
2071 cannot be changed until you clear those flags (using the methods on
2072 HTMLForm). Disabled is the state typically represented by browsers by
2073 `greying out' a control. Disabled controls are not `successful' -- they
2074 don't cause data to get returned to the server. Readonly controls usually
2075 appear in browsers as read-only text boxes. Readonly controls are
2076 successful. List items can also be disabled. Attempts to select disabled
2077 items (with form[name] = value, or using the ListControl.set method, for
2078 example) fail. Attempts to clear disabled items are allowed.
2080 If a lot of controls are readonly, it can be useful to do this:
2082 form.set_all_readonly(False)
2084 When you want to do several things with a single control, or want to do
2085 less common things, like changing which controls and items are disabled,
2086 you can get at a particular control:
2088 control = form.find_control("cheeses")
2089 control.set_item_disabled(False, "gruyere")
2090 control.set("gruyere")
2092 Most methods on HTMLForm just delegate to the contained controls, so see
2093 the docstrings of the various Control classes for further documentation.
2094 Most of these delegating methods take name, type, kind, id and nr arguments
2095 to specify the control to be operated on: see
2096 HTMLForm.find_control.__doc__.
2098 ControlNotFoundError (subclass of ValueError) is raised if the specified
2099 control can't be found. This includes occasions where a non-ListControl
2100 is found, but the method (set, for example) requires a ListControl.
2101 ItemNotFoundError (subclass of ValueError) is raised if a list item can't
2102 be found. ItemCountError (subclass of ValueError) is raised if an attempt
2103 is made to select more than one item and the control doesn't allow that, or
2104 set/get_single are called and the control contains more than one item.
2105 AttributeError is raised if a control or item is readonly or disabled and
2106 an attempt is made to alter its value.
2108 XXX CheckBoxControl and RadioControl don't yet support item access by label
2110 Security note: Remember that any passwords you store in HTMLForm instances
2111 will be saved to disk in the clear if you pickle them (directly or
2112 indirectly). The simplest solution to this is to avoid pickling HTMLForm
2113 objects. You could also pickle before filling in any password, or just set
2114 the password to "" before pickling.
2119 action: full (absolute URI) form action
2120 method: "GET" or "POST"
2121 enctype: form transfer encoding MIME type
2122 name: name of form (None if no name was specified)
2123 attrs: dictionary mapping original HTML form attributes to their values
2125 controls: list of Control instances; do not alter this list
2126 (instead, call form.new_control to make a Control and add it to the
2127 form, or control.add_to_form if you already have a Control instance)
2131 Methods for form filling:
2132 -------------------------
2134 Most of the these methods have very similar arguments. See
2135 HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
2136 arguments. See above for a description of by_label.
2138 def find_control(self,
2139 name=None, type=None, kind=None, id=None, predicate=None,
2142 get_value(name=None, type=None, kind=None, id=None, nr=None,
2145 name=None, type=None, kind=None, id=None, nr=None,
2148 set_all_readonly(readonly)
2151 Methods applying only to ListControls:
2153 possible_items(name=None, type=None, kind=None, id=None, nr=None,
2156 set(selected, item_name,
2157 name=None, type=None, kind=None, id=None, nr=None,
2160 name=None, type=None, id=None, nr=None,
2163 set_single(selected,
2164 name=None, type=None, kind=None, id=None, nr=None,
2166 toggle_single(name=None, type=None, kind=None, id=None, nr=None,
2170 Method applying only to FileControls:
2172 add_file(file_object,
2173 content_type="application/octet-stream", filename=None,
2174 name=None, id=None, nr=None)
2177 Methods applying only to clickable controls:
2179 click(name=None, type=None, id=None, nr=0, coord=(1,1))
2180 click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
2181 click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
2186 "text": TextControl
,
2187 "password": PasswordControl
,
2188 "hidden": HiddenControl
,
2189 "textarea": TextareaControl
,
2191 "isindex": IsindexControl
,
2193 "file": FileControl
,
2195 "button": IgnoreControl
,
2196 "buttonbutton": IgnoreControl
,
2197 "reset": IgnoreControl
,
2198 "resetbutton": IgnoreControl
,
2200 "submit": SubmitControl
,
2201 "submitbutton": SubmitButtonControl
,
2202 "image": ImageControl
,
2204 "radio": RadioControl
,
2205 "checkbox": CheckboxControl
,
2206 "select": SelectControl
,
2209 #---------------------------------------------------
2210 # Initialisation. Use ParseResponse / ParseFile instead.
2212 def __init__(self
, action
, method
="GET",
2213 enctype
="application/x-www-form-urlencoded",
2214 name
=None, attrs
=None):
2216 In the usual case, use ParseResponse (or ParseFile) to create new
2219 action: full (absolute URI) form action
2220 method: "GET" or "POST"
2221 enctype: form transfer encoding MIME type
2223 attrs: dictionary mapping original HTML form attributes to their values
2226 self
.action
= action
2227 self
.method
= method
2228 self
.enctype
= enctype
2230 if attrs
is not None:
2231 self
.attrs
= attrs
.copy()
2236 def new_control(self
, type, name
, attrs
,
2237 ignore_unknown
=False, select_default
=False):
2238 """Adds a new control to the form.
2240 This is usually called by ParseFile and ParseResponse. Don't call it
2241 youself unless you're building your own Control instances.
2243 Note that controls representing lists of items are built up from
2244 controls holding only a single list item. See ListControl.__doc__ for
2245 further information.
2247 type: type of control (see Control.__doc__ for a list)
2248 attrs: HTML attributes of control
2249 ignore_unknown: if true, use a dummy Control instance for controls of
2250 unknown type; otherwise, raise ValueError
2251 select_default: for RADIO and multiple-selection SELECT controls, pick
2252 the first item as the default if no 'selected' HTML attribute is
2253 present (this defaulting happens when the HTMLForm.fixup method is
2257 type = string
.lower(type)
2258 klass
= self
.type2class
.get(type)
2261 klass
= IgnoreControl
2263 raise ValueError("Unknown control type '%s'" % type)
2266 if issubclass(klass
, ListControl
):
2267 control
= klass(type, name
, a
, select_default
)
2269 control
= klass(type, name
, a
)
2270 control
.add_to_form(self
)
2273 """Normalise form after all controls have been added.
2275 This is usually called by ParseFile and ParseResponse. Don't call it
2276 youself unless you're building your own Control instances.
2278 This method should only be called once, after all controls have been
2282 for control
in self
.controls
:
2285 #---------------------------------------------------
2287 header
= "%s %s %s" % (self
.method
, self
.action
, self
.enctype
)
2289 for control
in self
.controls
:
2290 rep
.append(" %s" % str(control
))
2291 return "<%s>" % string
.join(rep
, "\n")
2293 #---------------------------------------------------
2294 # Form-filling methods.
2296 def __getitem__(self
, name
):
2297 return self
.find_control(name
).value
2298 def __setitem__(self
, name
, value
):
2299 control
= self
.find_control(name
)
2301 control
.value
= value
2302 except AttributeError, e
:
2303 raise ValueError(str(e
))
2306 name
=None, type=None, kind
=None, id=None, nr
=None,
2308 """Return value of control.
2310 If only name and value arguments are supplied, equivalent to
2315 c
= self
.find_control(name
, type, kind
, id, nr
=nr
)
2318 meth
= c
.get_value_by_label
2319 except AttributeError:
2320 raise NotImplementedError(
2321 "control '%s' does not yet support by_label" % c
.name
)
2326 def set_value(self
, value
,
2327 name
=None, type=None, kind
=None, id=None, nr
=None,
2329 """Set value of control.
2331 If only name and value arguments are supplied, equivalent to
2336 c
= self
.find_control(name
, type, kind
, id, nr
=nr
)
2339 meth
= c
.set_value_by_label
2340 except AttributeError:
2341 raise NotImplementedError(
2342 "control '%s' does not yet support by_label" % c
.name
)
2348 def set_all_readonly(self
, readonly
):
2349 for control
in self
.controls
:
2350 control
.readonly
= bool(readonly
)
2353 #---------------------------------------------------
2354 # Form-filling methods applying only to ListControls.
2356 def possible_items(self
,
2357 name
=None, type=None, kind
=None, id=None, nr
=None,
2359 """Return a list of all values that the specified control can take."""
2360 c
= self
._find
_list
_control
(name
, type, kind
, id, nr
)
2361 return c
.possible_items(by_label
)
2363 def set(self
, selected
, item_name
,
2364 name
=None, type=None, kind
=None, id=None, nr
=None,
2366 """Select / deselect named list item.
2368 selected: boolean selected state
2371 self
._find
_list
_control
(name
, type, kind
, id, nr
).set(
2372 selected
, item_name
, by_label
)
2373 def toggle(self
, item_name
,
2374 name
=None, type=None, kind
=None, id=None, nr
=None,
2376 """Toggle selected state of named list item."""
2377 self
._find
_list
_control
(name
, type, kind
, id, nr
).toggle(
2378 item_name
, by_label
)
2380 def set_single(self
, selected
,
2381 name
=None, type=None, kind
=None, id=None, nr
=None,
2383 """Select / deselect list item in a control having only one item.
2385 If the control has multiple list items, ItemCountError is raised.
2387 This is just a convenience method, so you don't need to know the item's
2388 name -- the item name in these single-item controls is usually
2389 something meaningless like "1" or "on".
2391 For example, if a checkbox has a single item named "on", the following
2392 two calls are equivalent:
2394 control.toggle("on")
2395 control.toggle_single()
2398 self
._find
_list
_control
(name
, type, kind
, id, nr
).set_single(
2400 def toggle_single(self
, name
=None, type=None, kind
=None, id=None, nr
=None,
2402 """Toggle selected state of list item in control having only one item.
2404 The rest is as for HTMLForm.set_single.__doc__.
2407 self
._find
_list
_control
(name
, type, kind
, id, nr
).toggle_single(
2410 #---------------------------------------------------
2411 # Form-filling method applying only to FileControls.
2413 def add_file(self
, file_object
, content_type
=None, filename
=None,
2414 name
=None, id=None, nr
=None):
2415 """Add a file to be uploaded.
2417 file_object: file-like object (with read method) from which to read
2419 content_type: MIME content type of data to upload
2420 filename: filename to pass to server
2422 If filename is None, no filename is sent to the server.
2424 If content_type is None, the content type is guessed based on the
2425 filename and the data from read from the file object.
2428 At the moment, guessed content type is always application/octet-stream.
2429 Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
2433 self
.find_control(name
, "file", id=id, nr
=nr
).add_file(
2434 file_object
, content_type
, filename
)
2436 #---------------------------------------------------
2437 # Form submission methods, applying only to clickable controls.
2439 def click(self
, name
=None, type=None, id=None, nr
=0, coord
=(1,1)):
2440 """Return request that would result from clicking on a control.
2442 The request object is a urllib2.Request instance, which you can pass to
2443 urllib2.urlopen (or ClientCookie.urlopen).
2445 Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
2446 IMAGEs) can be clicked.
2448 Will click on the first clickable control, subject to the name, type
2449 and nr arguments (as for find_control). If no name, type, id or number
2450 is specified and there are no clickable controls, a request will be
2451 returned for the form in its current, un-clicked, state.
2453 IndexError is raised if any of name, type, id or nr is specified but no
2454 matching control is found. ValueError is raised if the HTMLForm has an
2455 enctype attribute that is not recognised.
2457 You can optionally specify a coordinate to click at, which only makes a
2458 difference if you clicked on an image.
2461 return self
._click
(name
, type, id, nr
, coord
, "request")
2463 def click_request_data(self
,
2464 name
=None, type=None, id=None, nr
=0, coord
=(1,1)):
2465 """As for click method, but return a tuple (url, data, headers).
2467 You can use this data to send a request to the server. This is useful
2468 if you're using httplib or urllib rather than urllib2. Otherwise, use
2471 # Untested. Have to subclass to add headers, I think -- so use urllib2
2474 url, data, hdrs = form.click_request_data()
2475 r = urllib.urlopen(url, data)
2477 # Untested. I don't know of any reason to use httplib -- you can get
2478 # just as much control with urllib2.
2479 import httplib, urlparse
2480 url, data, hdrs = form.click_request_data()
2482 host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
2483 conn = httplib.HTTPConnection(host)
2485 httplib.request("POST", path, data, hdrs)
2487 httplib.request("GET", path, headers=hdrs)
2488 r = conn.getresponse()
2491 return self
._click
(name
, type, id, nr
, coord
, "request_data")
2493 def click_pairs(self
, name
=None, type=None, id=None, nr
=0, coord
=(1,1)):
2494 """As for click_request_data, but returns a list of (key, value) pairs.
2496 You can use this list as an argument to ClientForm.urlencode. This is
2497 usually only useful if you're using httplib or urllib rather than
2498 urllib2 or ClientCookie. It may also be useful if you want to manually
2499 tweak the keys and/or values, but this should not be necessary.
2500 Otherwise, use the click method.
2502 Note that this method is only useful for forms of MIME type
2503 x-www-form-urlencoded. In particular, it does not return the
2504 information required for file upload. If you need file upload and are
2505 not using urllib2, use click_request_data.
2507 Also note that Python 2.0's urllib.urlencode is slightly broken: it
2508 only accepts a mapping, not a sequence of pairs, as an argument. This
2509 messes up any ordering in the argument. Use ClientForm.urlencode
2513 return self
._click
(name
, type, id, nr
, coord
, "pairs")
2515 #---------------------------------------------------
2517 def find_control(self
,
2518 name
=None, type=None, kind
=None, id=None, predicate
=None,
2520 """Locate some specific control within the form.
2522 At least one of the name, type, kind, predicate and nr arguments must
2523 be supplied. If no matching control is found, ControlNotFoundError is
2526 If name is specified, then the control must have the indicated name.
2528 If type is specified then the control must have the specified type (in
2529 addition to the types possible for <input> HTML tags: "text",
2530 "password", "hidden", "submit", "image", "button", "radio", "checkbox",
2531 "file" we also have "reset", "buttonbutton", "submitbutton",
2532 "resetbutton", "textarea", "select" and "isindex").
2534 If kind is specified, then the control must fall into the specified
2535 group, each of which satisfies a particular interface. The types are
2536 "text", "list", "multilist", "singlelist", "clickable" and "file".
2538 If id is specified, then the control must have the indicated id.
2540 If predicate is specified, then the control must match that function.
2541 The predicate function is passed the control as its single argument,
2542 and should return a boolean value indicating whether the control
2545 nr, if supplied, is the sequence number of the control (where 0 is the
2546 first). Note that control 0 is the first control matching all the
2547 other arguments (if supplied); it is not necessarily the first control
2551 if ((name
is None) and (type is None) and (kind
is None) and
2552 (id is None) and (predicate
is None) and (nr
is None)):
2554 "at least one argument must be supplied to specify control")
2555 if nr
is None: nr
= 0
2557 return self
._find
_control
(name
, type, kind
, id, predicate
, nr
)
2559 #---------------------------------------------------
2562 def _find_list_control(self
,
2563 name
=None, type=None, kind
=None, id=None, nr
=None):
2564 if ((name
is None) and (type is None) and (kind
is None) and
2565 (id is None) and (nr
is None)):
2567 "at least one argument must be supplied to specify control")
2568 if nr
is None: nr
= 0
2570 return self
._find
_control
(name
, type, kind
, id, is_listcontrol
, nr
)
2572 def _find_control(self
, name
, type, kind
, id, predicate
, nr
):
2573 if (name
is not None) and not isstringlike(name
):
2574 raise TypeError("control name must be string-like")
2575 if (type is not None) and not isstringlike(type):
2576 raise TypeError("control type must be string-like")
2577 if (kind
is not None) and not isstringlike(kind
):
2578 raise TypeError("control kind must be string-like")
2579 if (id is not None) and not isstringlike(id):
2580 raise TypeError("control id must be string-like")
2581 if (predicate
is not None) and not callable(predicate
):
2582 raise TypeError("control predicate must be callable")
2583 if nr
< 0: raise ValueError("control number must be a positive "
2588 for control
in self
.controls
:
2589 if name
is not None and name
!= control
.name
:
2591 if type is not None and type != control
.type:
2593 if (kind
is not None and
2594 not self
._is
_control
_in
_kind
(control
, kind
)):
2596 if id is not None and id != control
.id:
2598 if predicate
and not predicate(control
):
2606 if name
is not None: description
.append("name '%s'" % name
)
2607 if type is not None: description
.append("type '%s'" % type)
2608 if kind
is not None: description
.append("kind '%s'" % kind
)
2609 if id is not None: description
.append("id '%s'" % id)
2610 if predicate
is not None:
2611 description
.append("matching predicate %s" % predicate
)
2612 if orig_nr
: description
.append("nr %d" % orig_nr
)
2613 description
= string
.join(description
, ", ")
2614 raise ControlNotFoundError("no control with "+description
)
2616 def _is_control_in_kind(self
, control
, kind
):
2619 return isinstance(control
, ListControl
)
2620 elif kind
== "multilist":
2621 return bool(isinstance(control
, ListControl
) and control
.multiple
)
2622 elif kind
== "singlelist":
2623 return bool(isinstance(control
, ListControl
) and
2624 not control
.multiple
)
2625 elif kind
== "file":
2626 return isinstance(control
, FileControl
)
2627 elif kind
== "text":
2628 return isinstance(control
, TextControl
)
2629 elif kind
== "clickable":
2630 return (isinstance(control
, SubmitControl
) or
2631 isinstance(control
, IsindexControl
))
2633 raise ValueError("no such control kind '%s'" % kind
)
2635 def _click(self
, name
, type, id, nr
, coord
, return_type
):
2637 control
= self
._find
_control
(name
, type, "clickable", id, None, nr
)
2638 except ControlNotFoundError
:
2639 if ((name
is not None) or (type is not None) or (id is not None) or
2642 # no clickable controls, but no control was explicitly requested,
2643 # so return state without clicking any control
2644 return self
._switch
_click
(return_type
)
2646 return control
._click
(self
, coord
, return_type
)
2649 """Return sequence of (key, value) pairs suitable for urlencoding."""
2651 for control
in self
.controls
:
2652 pairs
.extend(control
.pairs())
2655 def _request_data(self
):
2656 """Return a tuple (url, data, headers)."""
2657 method
= string
.upper(self
.method
)
2659 if self
.enctype
!= "application/x-www-form-urlencoded":
2661 "unknown GET form encoding type '%s'" % self
.enctype
)
2662 uri
= "%s?%s" % (self
.action
, urlencode(self
._pairs
()))
2663 return uri
, None, []
2664 elif method
== "POST":
2665 if self
.enctype
== "application/x-www-form-urlencoded":
2666 return (self
.action
, urlencode(self
._pairs
()),
2667 [("Content-type", self
.enctype
)])
2668 elif self
.enctype
== "multipart/form-data":
2671 mw
= MimeWriter(data
, http_hdrs
)
2672 f
= mw
.startmultipartbody("form-data", add_to_http_hdrs
=True,
2674 for control
in self
.controls
:
2675 control
._write
_mime
_data
(mw
)
2677 return self
.action
, data
.getvalue(), http_hdrs
2680 "unknown POST form encoding type '%s'" % self
.enctype
)
2682 raise ValueError("Unknown method '%s'" % method
)
2684 def _switch_click(self
, return_type
):
2685 # This is called by HTMLForm and clickable Controls to hide switching
2689 # duplicated in IsindexControl._click
2690 if return_type
== "pairs":
2691 return self
._pairs
()
2692 elif return_type
== "request_data":
2693 return self
._request
_data
()
2695 req_data
= self
._request
_data
()
2696 req
= urllib2
.Request(req_data
[0], req_data
[1])
2697 for key
, val
in req_data
[2]:
2698 req
.add_header(key
, val
)