3 # Client-side HTTP for GET, POST, and HEAD commands. These routines can
4 # be used in untrusted code that uses the Safesock security policy. These
5 # procedures use a callback interface to avoid using vwait, which is not
6 # defined in the safe base.
8 # See the file "license.terms" for information on usage and redistribution of
9 # this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 # RCS: @(#) $Id: http.tcl,v 1.43.2.13 2006/10/06 05:56:48 hobbs Exp $
13 # Rough version history:
14 # 1.0 Old http_get interface.
15 # 2.0 http:: namespace and http::geturl.
16 # 2.1 Added callbacks to handle arriving data, and timeouts.
17 # 2.2 Added ability to fetch into a channel.
18 # 2.3 Added SSL support, and ability to post from a channel. This version
19 # also cleans up error cases and eliminates the "ioerror" status in
20 # favor of raising an error
21 # 2.4 Added -binary option to http::geturl and charset element to the state
24 package require
Tcl 8.4
25 # Keep this in sync with pkgIndex.tcl and with the install directories
27 package provide
http 2.5.3
35 -proxyfilter http::ProxyRequired
38 set http(-useragent) "Tcl http client package [package provide http]"
41 # Set up the map for quoting chars. RFC3986 Section 2.3 say percent
42 # encode all except: "... percent-encoded octets in the ranges of ALPHA
43 # (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
44 # underscore (%5F), or tilde (%7E) should not be created by URI
46 for {set i
0} {$i <= 256} {incr i
} {
48 if {![string match
{[-._~a-zA-Z0-9
]} $c]} {
49 set map
($c) %[format %.2x
$i]
52 # These are handled specially
54 variable formMap
[array get map
]
63 variable encodings
[string tolower
[encoding names
]]
64 # This can be changed, but iso8859-1 is the RFC standard.
65 variable defaultCharset
"iso8859-1"
67 # Force RFC 3986 strictness in geturl url verification? Not for 8.4.x
70 namespace export geturl config reset wait formatQuery register unregister
71 # Useful, but not exported: data size status code
76 # See documentaion for details.
79 # proto URL protocol prefix, e.g. https
80 # port Default port for protocol
81 # command Command to use to create socket
83 # list of port and command that was registered.
85 proc http::register {proto port command
} {
87 set urlTypes
($proto) [list $port $command]
92 # Unregisters URL protocol handler
95 # proto URL protocol prefix, e.g. https
97 # list of port and command that was unregistered.
99 proc http::unregister {proto
} {
101 if {![info exists urlTypes
($proto)]} {
102 return -code error "unsupported url type \"$proto\""
104 set old
$urlTypes($proto)
105 unset urlTypes
($proto)
111 # See documentaion for details.
114 # args Options parsed by the procedure.
118 proc http::config {args
} {
120 set options [lsort [array names
http -*]]
121 set usage
[join $options ", "]
122 if {[llength $args] == 0} {
124 foreach name
$options {
125 lappend result
$name $http($name)
129 set options [string map
{- ""} $options]
130 set pat ^
-([join $options |
])$
131 if {[llength $args] == 1} {
132 set flag
[lindex $args 0]
133 if {[regexp -- $pat $flag]} {
136 return -code error "Unknown option $flag, must be: $usage"
139 foreach {flag value
} $args {
140 if {[regexp -- $pat $flag]} {
141 set http($flag) $value
143 return -code error "Unknown option $flag, must be: $usage"
151 # Clean up the socket and eval close time callbacks
154 # token Connection token.
155 # errormsg (optional) If set, forces status to error.
156 # skipCB (optional) If set, don't call the -command callback. This
157 # is useful when geturl wants to throw an exception instead
158 # of calling the callback. That way, the same error isn't
159 # reported to two places.
164 proc http::Finish { token
{errormsg
""} {skipCB
0}} {
167 global errorInfo errorCode
168 if {[string length
$errormsg] != 0} {
169 set state
(error) [list $errormsg $errorInfo $errorCode]
170 set state
(status
) error
172 catch {close $state(sock
)}
173 catch {after cancel
$state(after)}
174 if {[info exists state
(-command)] && !$skipCB} {
175 if {[catch {eval $state(-command) {$token}} err
]} {
176 if {[string length
$errormsg] == 0} {
177 set state
(error) [list $err $errorInfo $errorCode]
178 set state
(status
) error
181 if {[info exists state
(-command)]} {
182 # Command callback may already have unset our state
183 unset state
(-command)
190 # See documentaion for details.
193 # token Connection token.
199 proc http::reset { token
{why reset
} } {
202 set state
(status
) $why
203 catch {fileevent $state(sock
) readable
{}}
204 catch {fileevent $state(sock
) writable
{}}
206 if {[info exists state
(error)]} {
207 set errorlist
$state(error)
209 eval ::error $errorlist
215 # Establishes a connection to a remote url via http.
218 # url The http URL to goget.
219 # args Option value pairs. Valid options include:
220 # -blocksize, -validate, -headers, -timeout
222 # Returns a token for this connection. This token is the name of an array
223 # that the caller should unset to garbage collect the state.
225 proc http::geturl { url args
} {
228 variable defaultCharset
231 # Initialize the state variable, an array. We'll return the name of this
232 # array as the token for the transaction.
234 if {![info exists
http(uid
)]} {
237 set token
[namespace current
]::[incr http(uid
)]
242 # Process command options.
251 -type application
/x-www-form-urlencoded
265 # These flags have their types verified [Bug 811170]
269 -queryblocksize integer
273 set state
(charset
) $defaultCharset
274 set options {-binary -blocksize -channel -command -handler -headers \
275 -progress -query -queryblocksize -querychannel -queryprogress\
276 -validate -timeout -type}
277 set usage
[join $options ", "]
278 set options [string map
{- ""} $options]
279 set pat ^
-([join $options |
])$
280 foreach {flag value
} $args {
281 if {[regexp $pat $flag]} {
283 if {[info exists type
($flag)] && \
284 ![string is
$type($flag) -strict $value]} {
286 return -code error "Bad value for $flag ($value), must be $type($flag)"
288 set state
($flag) $value
291 return -code error "Unknown option $flag, can be: $usage"
295 # Make sure -query and -querychannel aren't both specified
297 set isQueryChannel
[info exists state
(-querychannel)]
298 set isQuery
[info exists state
(-query)]
299 if {$isQuery && $isQueryChannel} {
301 return -code error "Can't combine -query and -querychannel options!"
304 # Validate URL, determine the server host and port, and check proxy case
305 # Recognize user:pass@host URLs also, although we do not do anything with
308 # URLs have basically four parts.
309 # First, before the colon, is the protocol scheme (e.g. http)
310 # Second, for HTTP-like protocols, is the authority
311 # The authority is preceded by // and lasts up to (but not including)
312 # the following / and it identifies up to four parts, of which only one,
313 # the host, is required (if an authority is present at all). All other
314 # parts of the authority (user name, password, port number) are optional.
315 # Third is the resource name, which is split into two parts at a ?
316 # The first part (from the single "/" up to "?") is the path, and the
317 # second part (from that "?" up to "#") is the query. *HOWEVER*, we do
318 # not need to separate them; we send the whole lot to the server.
319 # Fourth is the fragment identifier, which is everything after the first
320 # "#" in the URL. The fragment identifier MUST NOT be sent to the server
321 # and indeed, we don't bother to validate it (it could be an error to
322 # pass it in here, but it's cheap to strip).
324 # An example of a URL that has all the parts:
325 # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
326 # The "http" is the protocol, the user is "jschmoe", the password is
327 # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
328 # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
330 # Note that the RE actually combines the user and password parts, as
331 # recommended in RFC 3986. Indeed, that RFC states that putting passwords
332 # in URLs is a Really Bad Idea, something with which I would agree utterly.
333 # Also note that we do not currently support IPv6 addresses.
335 # From a validation perspective, we need to ensure that the parts of the
336 # URL that are going to the server are correctly encoded.
337 # This is only done if $::http::strict is true (default 0 for compat).
339 set URLmatcher
{(?x
) # this is _expanded_ syntax
341 (?
: (\w
+) : ) ?
# <protocol scheme>
345 [^
@/\#?]+ # <userinfo part of authority>
348 ( [^
/:\#?]+ ) # <host part of authority>
349 (?
: : (\d
+) )?
# <port part of authority>
351 ( / [^
\#?]* (?: \? [^\#?]* )?)? # <path> (including query)
352 (?
: \# (.*) )? # <fragment>
357 if {![regexp -- $URLmatcher $url -> proto user host port srvurl
]} {
359 return -code error "Unsupported URL: $url"
361 # Phase two: validate
363 # Caller has to provide a host name; we do not have a "default host"
364 # that would enable us to handle relative URLs.
366 return -code error "Missing host part: $url"
367 # Note that we don't check the hostname for validity here; if it's
368 # invalid, we'll simply fail to resolve it later on.
370 if {$port ne
"" && $port>65535} {
372 return -code error "Invalid port number: $port"
374 # The user identification and resource identification parts of the URL can
375 # have encoded characters in them; take care!
377 # Check for validity according to RFC 3986, Appendix A
378 set validityRE
{(?xi
)
380 (?
: [-\w.~
!$&'
()*+,;=:] |
%[0-9a-f
][0-9a-f
] )+
383 if {$strict && ![regexp -- $validityRE $user]} {
385 # Provide a better error message in this error case
386 if {[regexp {(?i
)%(?
![0-9a-f
][0-9a-f
]).?.?
} $user bad
]} {
388 "Illegal encoding character usage \"$bad\" in URL user"
390 return -code error "Illegal characters in URL user"
394 # Check for validity according to RFC 3986, Appendix A
395 set validityRE
{(?xi
)
397 # Path part (already must start with / character)
398 (?
: [-\w.~
!$&'
()*+,;=:@/] |
%[0-9a-f
][0-9a-f
] )*
399 # Query part (optional, permits ? characters)
400 (?
: \? (?
: [-\w.~
!$&'
()*+,;=:@/?
] |
%[0-9a-f
][0-9a-f
] )* )?
403 if {$strict && ![regexp -- $validityRE $srvurl]} {
405 # Provide a better error message in this error case
406 if {[regexp {(?i
)%(?
![0-9a-f
][0-9a-f
])..
} $srvurl bad
]} {
408 "Illegal encoding character usage \"$bad\" in URL path"
410 return -code error "Illegal characters in URL path"
415 if {[string length
$proto] == 0} {
418 if {![info exists urlTypes
($proto)]} {
420 return -code error "Unsupported URL type \"$proto\""
422 set defport
[lindex $urlTypes($proto) 0]
423 set defcmd
[lindex $urlTypes($proto) 1]
425 if {[string length
$port] == 0} {
428 if {![catch {$http(-proxyfilter) $host} proxy
]} {
429 set phost
[lindex $proxy 0]
430 set pport
[lindex $proxy 1]
433 # OK, now reassemble into a full URL
440 if {$port != $defport} {
444 # Don't append the fragment!
447 # If a timeout is specified we set up the after event and arrange for an
448 # asynchronous socket connection.
450 if {$state(-timeout) > 0} {
451 set state
(after) [after $state(-timeout) \
452 [list http::reset $token timeout
]]
458 # If we are using the proxy, we must pass in the full URL that includes
461 if {[info exists phost
] && [string length
$phost]} {
463 set conStat
[catch {eval $defcmd $async {$phost $pport}} s
]
465 set conStat
[catch {eval $defcmd $async {$host $port}} s
]
469 # Something went wrong while trying to establish the connection. Clean
470 # up after events and such, but DON'T call the command callback (if
471 # available) because we're going to throw an exception from here
475 return -code error $s
479 # Wait for the connection to complete.
481 if {$state(-timeout) > 0} {
482 fileevent $s writable
[list http::Connect $token]
485 if {$state(status
) eq
"error"} {
486 # Something went wrong while trying to establish the connection.
487 # Clean up after events and such, but DON'T call the command
488 # callback (if available) because we're going to throw an
489 # exception from here instead.
490 set err
[lindex $state(error) 0]
492 return -code error $err
493 } elseif
{$state(status
) ne
"connect"} {
494 # Likely to be connection timeout
500 # Send data in cr-lf format, but accept any line terminators
502 fconfigure $s -translation {auto crlf
} -buffersize $state(-blocksize)
504 # The following is disallowed in safe interpreters, but the socket is
505 # already in non-blocking mode in that case.
507 catch {fconfigure $s -blocking off
}
510 set state
(querylength
) [string length
$state(-query)]
511 if {$state(querylength
) > 0} {
515 # There's no query data.
519 } elseif
{$state(-validate)} {
521 } elseif
{$isQueryChannel} {
523 # The query channel must be blocking for the async Write to
525 fconfigure $state(-querychannel) -blocking 1 -translation binary
530 puts $s "$how $srvurl HTTP/1.0"
531 puts $s "Accept: $http(-accept)"
532 if {$port == $defport} {
533 # Don't add port in this case, to handle broken servers. [Bug
535 puts $s "Host: $host"
537 puts $s "Host: $host:$port"
539 puts $s "User-Agent: $http(-useragent)"
540 foreach {key value
} $state(-headers) {
541 set value
[string map
[list \n "" \r ""] $value]
542 set key
[string trim
$key]
543 if {$key eq
"Content-Length"} {
545 set state
(querylength
) $value
547 if {[string length
$key]} {
548 puts $s "$key: $value"
551 if {$isQueryChannel && $state(querylength
) == 0} {
552 # Try to determine size of data in channel. If we cannot seek, the
553 # surrounding catch will trap us
555 set start
[tell $state(-querychannel)]
556 seek $state(-querychannel) 0 end
557 set state
(querylength
) \
558 [expr {[tell $state(-querychannel)] - $start}]
559 seek $state(-querychannel) $start
562 # Flush the request header and set up the fileevent that will either
563 # push the POST data or read the response.
567 # It is possible to have both the read and write fileevents active at
568 # this point. The only scenario it seems to affect is a server that
569 # closes the connection without reading the POST data. (e.g., early
570 # versions TclHttpd in various error cases). Depending on the platform,
571 # the client may or may not be able to get the response from the server
572 # because of the error it will get trying to write the post data.
573 # Having both fileevents active changes the timing and the behavior,
574 # but no two platforms (among Solaris, Linux, and NT) behave the same,
575 # and none behave all that well in any case. Servers should always read
576 # their POST data if they expect the client to read their response.
578 if {$isQuery ||
$isQueryChannel} {
579 puts $s "Content-Type: $state(-type)"
581 puts $s "Content-Length: $state(querylength)"
584 fconfigure $s -translation {auto
binary}
585 fileevent $s writable
[list http::Write $token]
589 fileevent $s readable
[list http::Event $token]
592 if {! [info exists state
(-command)]} {
593 # geturl does EVERYTHING asynchronously, so if the user calls it
594 # synchronously, we just do a wait here.
597 if {$state(status
) eq
"error"} {
598 # Something went wrong, so throw the exception, and the
599 # enclosing catch will do cleanup.
600 return -code error [lindex $state(error) 0]
604 # The socket probably was never connected, or the connection dropped
607 # Clean up after events and such, but DON'T call the command callback
608 # (if available) because we're going to throw an exception from here
611 # if state(status) is error, it means someone's already called Finish
612 # to do the above-described clean up.
613 if {$state(status
) eq
"error"} {
617 return -code error $err
623 # Data access functions:
624 # Data - the URL data
625 # Status - the transaction status: ok, reset, eof, timeout
626 # Code - the HTTP transaction code, e.g., 200
627 # Size - the size of the URL data
629 proc http::data {token
} {
634 proc http::status {token
} {
637 return $state(status
)
639 proc http::code {token
} {
644 proc http::ncode {token
} {
647 if {[regexp {[0-9]{3}} $state(http) numeric_code
]} {
653 proc http::size {token
} {
656 return $state(currentsize
)
659 proc http::error {token
} {
662 if {[info exists state
(error)]} {
670 # Garbage collect the state associated with a transaction
673 # token The token returned from http::geturl
676 # unsets the state array
678 proc http::cleanup {token
} {
681 if {[info exists state
]} {
688 # This callback is made when an asyncronous connection completes.
691 # token The token returned from http::geturl
694 # Sets the status of the connection, which unblocks
695 # the waiting geturl call
697 proc http::Connect {token
} {
700 global errorInfo errorCode
701 if {[eof $state(sock
)] ||
702 [string length
[fconfigure $state(sock
) -error]]} {
703 Finish
$token "connect failed [fconfigure $state(sock) -error]" 1
705 set state
(status
) connect
706 fileevent $state(sock
) writable
{}
713 # Write POST query data to the socket
716 # token The token for the connection
719 # Write the socket and handle callbacks.
721 proc http::Write {token
} {
726 # Output a block. Tcl will buffer this if the socket blocks
729 # Catch I/O errors on dead sockets
731 if {[info exists state
(-query)]} {
732 # Chop up large query strings so queryprogress callback can give
736 [string range
$state(-query) $state(queryoffset
) \
737 [expr {$state(queryoffset
) + $state(-queryblocksize) - 1}]]
738 incr state
(queryoffset
) $state(-queryblocksize)
739 if {$state(queryoffset
) >= $state(querylength
)} {
740 set state
(queryoffset
) $state(querylength
)
744 # Copy blocks from the query channel
746 set outStr
[read $state(-querychannel) $state(-queryblocksize)]
747 puts -nonewline $s $outStr
748 incr state
(queryoffset
) [string length
$outStr]
749 if {[eof $state(-querychannel)]} {
754 # Do not call Finish here, but instead let the read half of the socket
755 # process whatever server reply there is to get.
757 set state
(posterror
) $err
762 fileevent $s writable
{}
763 fileevent $s readable
[list http::Event $token]
766 # Callback to the client after we've completely handled everything.
768 if {[string length
$state(-queryprogress)]} {
769 eval $state(-queryprogress) [list $token $state(querylength
)\
776 # Handle input on the socket
779 # token The token returned from http::geturl
782 # Read the socket and handle callbacks.
784 proc http::Event {token
} {
793 if {$state(state
) eq
"header"} {
794 if {[catch {gets $s line
} n
]} {
798 set state
(state
) body
799 if {$state(-binary) ||
![string match
-nocase text* $state(type
)]
800 ||
[string match
*gzip
* $state(coding
)]
801 ||
[string match
*compress
* $state(coding
)]} {
802 # Turn off conversions for non-text data
803 fconfigure $s -translation binary
804 if {[info exists state
(-channel)]} {
805 fconfigure $state(-channel) -translation binary
808 # If we are getting text, set the incoming channel's encoding
809 # correctly. iso8859-1 is the RFC default, but this could be
810 # any IANA charset. However, we only know how to convert what
811 # we have encodings for.
812 set idx
[lsearch -exact $encodings \
813 [string tolower
$state(charset
)]]
815 fconfigure $s -encoding [lindex $encodings $idx]
818 if {[info exists state
(-channel)] && \
819 ![info exists state
(-handler)]} {
820 # Initiate a sequence of background fcopies
821 fileevent $s readable
{}
825 if {[regexp -nocase {^content-type
:(.
+)$} $line x type
]} {
826 set state
(type
) [string trim
$type]
827 # grab the optional charset information
828 regexp -nocase {charset
\s
*=\s
*(\S
+)} $type x state
(charset
)
830 if {[regexp -nocase {^content-length
:(.
+)$} $line x length
]} {
831 set state
(totalsize
) [string trim
$length]
833 if {[regexp -nocase {^content-encoding
:(.
+)$} $line x coding
]} {
834 set state
(coding
) [string trim
$coding]
836 if {[regexp -nocase {^
([^
:]+):(.
+)$} $line x key value
]} {
837 lappend state
(meta
) $key [string trim
$value]
838 } elseif
{[string match HTTP
* $line]} {
839 set state
(http) $line
844 if {[info exists state
(-handler)]} {
845 set n
[eval $state(-handler) {$s $token}]
847 set block
[read $s $state(-blocksize)]
848 set n
[string length
$block]
850 append state
(body
) $block
854 incr state
(currentsize
) $n
859 if {[info exists state
(-progress)]} {
860 eval $state(-progress) \
861 {$token $state(totalsize
) $state(currentsize
)}
869 # Error handling wrapper around fcopy
872 # s The socket to copy from
873 # token The token returned from http::geturl
876 # This closes the connection upon error
878 proc http::CopyStart {s token
} {
882 fcopy $s $state(-channel) -size $state(-blocksize) -command \
883 [list http::CopyDone $token]
891 # fcopy completion callback
894 # token The token returned from http::geturl
895 # count The amount transfered
900 proc http::CopyDone {token count
{error {}}} {
904 incr state
(currentsize
) $count
905 if {[info exists state
(-progress)]} {
906 eval $state(-progress) {$token $state(totalsize
) $state(currentsize
)}
908 # At this point the token may have been reset
909 if {[string length
$error]} {
911 } elseif
{[catch {eof $s} iseof
] ||
$iseof} {
920 # Handle eof on the socket
923 # token The token returned from http::geturl
926 # Clean up the socket
928 proc http::Eof {token
} {
931 if {$state(state
) eq
"header"} {
933 set state
(status
) eof
943 # See documentaion for details.
946 # token Connection token.
949 # The status after the wait.
951 proc http::wait {token
} {
955 if {![info exists state
(status
)] ||
[string length
$state(status
)] == 0} {
956 # We must wait on the original variable name, not the upvar alias
957 vwait $token\(status
)
960 return $state(status
)
963 # http::formatQuery --
965 # See documentaion for details. Call http::formatQuery with an even
966 # number of arguments, where the first is a name, the second is a value,
967 # the third is another name, and so on.
970 # args A list of name-value pairs.
975 proc http::formatQuery {args
} {
979 append result
$sep [mapReply
$i]
991 # Do x-www-urlencoded character mapping
994 # string The string the needs to be encoded
999 proc http::mapReply {string} {
1003 # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
1004 # a pre-computed map and [string map] to do the conversion (much faster
1005 # than [regsub]/[subst]). [Bug 1020491]
1007 if {$http(-urlencoding) ne
""} {
1008 set string [encoding convertto
$http(-urlencoding) $string]
1009 return [string map
$formMap $string]
1011 set converted
[string map
$formMap $string]
1012 if {[string match
"*\[\u0100-\uffff\]*" $converted]} {
1013 regexp {[\u0100-\uffff]} $converted badChar
1014 # Return this error message for maximum compatability... :^/
1015 return -code error \
1016 "can't read \"formMap($badChar)\": no such element in array"
1021 # http::ProxyRequired --
1022 # Default proxy filter.
1025 # host The destination host
1028 # The current proxy settings
1030 proc http::ProxyRequired {host
} {
1032 if {[info exists
http(-proxyhost)] && [string length
$http(-proxyhost)]} {
1033 if {![info exists
http(-proxyport)] ||
\
1034 ![string length
$http(-proxyport)]} {
1035 set http(-proxyport) 8080
1037 return [list $http(-proxyhost) $http(-proxyport)]