3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """URL downloading API.
23 Methods defined in this module:
24 Fetch(): fetchs a given URL using an HTTP request using on of the methods
25 GET, POST, HEAD, PUT, DELETE or PATCH request
44 from google
.appengine
.api
import apiproxy_stub_map
45 from google
.appengine
.api
import urlfetch_service_pb
46 from google
.appengine
.api
.urlfetch_errors
import *
47 from google
.appengine
.runtime
import apiproxy_errors
70 _VALID_METHODS
= frozenset(_URL_STRING_MAP
.values())
72 _thread_local_settings
= threading
.local()
75 class _CaselessDict(UserDict
.IterableUserDict
):
76 """Case insensitive dictionary.
78 This class was lifted from os.py and slightly modified.
81 def __init__(self
, dict=None, **kwargs
):
82 self
.caseless_keys
= {}
83 UserDict
.IterableUserDict
.__init
__(self
, dict, **kwargs
)
85 def __setitem__(self
, key
, item
):
86 """Set dictionary item.
89 key: Key of new item. Key is case insensitive, so "d['Key'] = value "
90 will replace previous values set by "d['key'] = old_value".
93 caseless_key
= key
.lower()
95 if caseless_key
in self
.caseless_keys
:
96 del self
.data
[self
.caseless_keys
[caseless_key
]]
97 self
.caseless_keys
[caseless_key
] = key
100 def __getitem__(self
, key
):
101 """Get dictionary item.
104 key: Key of item to get. Key is case insensitive, so "d['Key']" is the
108 Item associated with key.
110 return self
.data
[self
.caseless_keys
[key
.lower()]]
112 def __delitem__(self
, key
):
113 """Remove item from dictionary.
116 key: Key of item to remove. Key is case insensitive, so "del d['Key']" is
117 the same as "del d['key']"
119 caseless_key
= key
.lower()
120 del self
.data
[self
.caseless_keys
[caseless_key
]]
121 del self
.caseless_keys
[caseless_key
]
123 def has_key(self
, key
):
124 """Determine if dictionary has item with specific key.
127 key: Key to check for presence. Key is case insensitive, so
128 "d.has_key('Key')" evaluates to the same value as "d.has_key('key')".
131 True if dictionary contains key, else False.
133 return key
.lower() in self
.caseless_keys
135 def __contains__(self
, key
):
136 """Same as 'has_key', but used for 'in' operator.'"""
137 return self
.has_key(key
)
139 def get(self
, key
, failobj
=None):
140 """Get dictionary item, defaulting to another value if it does not exist.
143 key: Key of item to get. Key is case insensitive, so "d['Key']" is the
145 failobj: Value to return if key not in dictionary.
148 cased_key
= self
.caseless_keys
[key
.lower()]
151 return self
.data
[cased_key
]
153 def update(self
, dict=None, **kwargs
):
154 """Update dictionary using values from another dictionary and keywords.
157 dict: Dictionary to update from.
158 kwargs: Keyword arguments to update from.
163 except AttributeError:
177 """Make a shallow, case sensitive copy of self."""
181 def _is_fetching_self(url
, method
):
182 """Checks if the fetch is for the same URL from which it originated.
185 url: str, The URL being fetched.
186 method: value from _VALID_METHODS.
189 boolean indicating whether or not it seems that the app is trying to fetch
193 "HTTP_HOST" not in os
.environ
or
194 "PATH_INFO" not in os
.environ
):
197 scheme
, host_port
, path
, query
, fragment
= urlparse
.urlsplit(url
)
199 if host_port
== os
.environ
['HTTP_HOST']:
200 current_path
= urllib2
.unquote(os
.environ
['PATH_INFO'])
201 desired_path
= urllib2
.unquote(path
)
203 if (current_path
== desired_path
or
204 (current_path
in ('', '/') and desired_path
in ('', '/'))):
210 def create_rpc(deadline
=None, callback
=None):
211 """Creates an RPC object for use with the urlfetch API.
214 deadline: Optional deadline in seconds for the operation; the default
215 is a system-specific deadline (typically 5 seconds).
216 callback: Optional callable to invoke on completion.
219 An apiproxy_stub_map.UserRPC object specialized for this service.
222 deadline
= get_default_fetch_deadline()
223 return apiproxy_stub_map
.UserRPC('urlfetch', deadline
, callback
)
226 def fetch(url
, payload
=None, method
=GET
, headers
={},
227 allow_truncated
=False, follow_redirects
=True,
228 deadline
=None, validate_certificate
=None):
229 """Fetches the given HTTP URL, blocking until the result is returned.
231 Other optional parameters are:
232 method: The constants GET, POST, HEAD, PUT, DELETE, or PATCH or the
233 same HTTP methods as strings.
234 payload: POST, PUT, or PATCH payload (implies method is not GET, HEAD,
235 or DELETE). this is ignored if the method is not POST, PUT, or PATCH.
236 headers: dictionary of HTTP headers to send with the request
237 allow_truncated: if true, truncate large responses and return them without
238 error. Otherwise, ResponseTooLargeError is raised when a response is
240 follow_redirects: if true (the default), redirects are
241 transparently followed and the response (if less than 5
242 redirects) contains the final destination's payload and the
243 response status is 200. You lose, however, the redirect chain
244 information. If false, you see the HTTP response yourself,
245 including the 'Location' header, and redirects are not
247 deadline: deadline in seconds for the operation.
248 validate_certificate: if true, do not send request to server unless the
249 certificate is valid, signed by a trusted CA and the hostname matches
250 the certificate. A value of None indicates that the behaviour will be
251 chosen by the underlying urlfetch implementation.
253 We use a HTTP/1.1 compliant proxy to fetch the result.
255 The returned data structure has the following fields:
256 content: string containing the response from the server
257 status_code: HTTP status code returned by the server
258 headers: dictionary of headers returned by the server
260 If the URL is an empty string or obviously invalid, we throw an
261 urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
262 urlfetch.DownloadError. Note that HTTP errors are returned as a part
263 of the returned structure, so HTTP errors like 404 do not result in an
267 rpc
= create_rpc(deadline
=deadline
)
268 make_fetch_call(rpc
, url
, payload
, method
, headers
,
269 allow_truncated
, follow_redirects
, validate_certificate
)
270 return rpc
.get_result()
273 def make_fetch_call(rpc
, url
, payload
=None, method
=GET
, headers
={},
274 allow_truncated
=False, follow_redirects
=True,
275 validate_certificate
=None):
276 """Executes the RPC call to fetch a given HTTP URL.
278 The first argument is a UserRPC instance. See urlfetch.fetch for a
279 thorough description of remaining arguments.
282 The rpc object passed into the function.
285 assert rpc
.service
== 'urlfetch', repr(rpc
.service
)
286 if isinstance(method
, basestring
):
287 method
= method
.upper()
288 method
= _URL_STRING_MAP
.get(method
, method
)
289 if method
not in _VALID_METHODS
:
290 raise InvalidMethodError('Invalid method %s.' % str(method
))
292 if _is_fetching_self(url
, method
):
293 raise InvalidURLError("App cannot fetch the same URL as the one used for "
296 request
= urlfetch_service_pb
.URLFetchRequest()
297 response
= urlfetch_service_pb
.URLFetchResponse()
299 if isinstance(url
, unicode):
300 url
= url
.encode('UTF-8')
304 request
.set_method(urlfetch_service_pb
.URLFetchRequest
.GET
)
306 request
.set_method(urlfetch_service_pb
.URLFetchRequest
.POST
)
308 request
.set_method(urlfetch_service_pb
.URLFetchRequest
.HEAD
)
310 request
.set_method(urlfetch_service_pb
.URLFetchRequest
.PUT
)
311 elif method
== DELETE
:
312 request
.set_method(urlfetch_service_pb
.URLFetchRequest
.DELETE
)
313 elif method
== PATCH
:
314 request
.set_method(urlfetch_service_pb
.URLFetchRequest
.PATCH
)
317 if payload
and method
in (POST
, PUT
, PATCH
):
318 request
.set_payload(payload
)
321 for key
, value
in headers
.iteritems():
322 header_proto
= request
.add_header()
323 header_proto
.set_key(key
)
328 header_proto
.set_value(str(value
))
330 request
.set_followredirects(follow_redirects
)
331 if validate_certificate
is not None:
332 request
.set_mustvalidateservercertificate(validate_certificate
)
334 if rpc
.deadline
is not None:
335 request
.set_deadline(rpc
.deadline
)
339 rpc
.make_call('Fetch', request
, response
, _get_fetch_result
, allow_truncated
)
343 def _get_fetch_result(rpc
):
344 """Check success, handle exceptions, and return converted RPC result.
346 This method waits for the RPC if it has not yet finished, and calls the
347 post-call hooks on the first invocation.
350 rpc: A UserRPC object.
353 InvalidURLError if the url was invalid.
354 DownloadError if there was a problem fetching the url.
355 ResponseTooLargeError if the response was either truncated (and
356 allow_truncated=False was passed to make_fetch_call()), or if it
357 was too big for us to download.
360 A _URLFetchResult object.
362 assert rpc
.service
== 'urlfetch', repr(rpc
.service
)
363 assert rpc
.method
== 'Fetch', repr(rpc
.method
)
365 url
= rpc
.request
.url()
369 except apiproxy_errors
.ApplicationError
, err
:
372 error_detail
= ' Error: ' + err
.error_detail
373 if (err
.application_error
==
374 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
):
375 raise InvalidURLError(
376 'Invalid request URL: ' + url
+ error_detail
)
377 if (err
.application_error
==
378 urlfetch_service_pb
.URLFetchServiceError
.CLOSED
):
379 raise ConnectionClosedError(
380 'Connection closed unexpectedly by server at URL: ' + url
)
381 if (err
.application_error
==
382 urlfetch_service_pb
.URLFetchServiceError
.TOO_MANY_REDIRECTS
):
383 raise TooManyRedirectsError(
384 'Too many redirects at URL: ' + url
+ ' with redirect=true')
385 if (err
.application_error
==
386 urlfetch_service_pb
.URLFetchServiceError
.MALFORMED_REPLY
):
387 raise MalformedReplyError(
388 'Malformed HTTP reply received from server at URL: '
389 + url
+ error_detail
)
390 if (err
.application_error
==
391 urlfetch_service_pb
.URLFetchServiceError
.INTERNAL_TRANSIENT_ERROR
):
392 raise InteralTransientError(
393 'Temporary error in fetching URL: ' + url
+ ', please re-try')
394 if (err
.application_error
==
395 urlfetch_service_pb
.URLFetchServiceError
.DNS_ERROR
):
396 raise DNSLookupFailedError('DNS lookup failed for URL: ' + url
)
397 if (err
.application_error
==
398 urlfetch_service_pb
.URLFetchServiceError
.UNSPECIFIED_ERROR
):
399 raise DownloadError('Unspecified error in fetching URL: '
400 + url
+ error_detail
)
401 if (err
.application_error
==
402 urlfetch_service_pb
.URLFetchServiceError
.FETCH_ERROR
):
403 raise DownloadError("Unable to fetch URL: " + url
+ error_detail
)
404 if (err
.application_error
==
405 urlfetch_service_pb
.URLFetchServiceError
.RESPONSE_TOO_LARGE
):
406 raise ResponseTooLargeError('HTTP response too large from URL: ' + url
)
407 if (err
.application_error
==
408 urlfetch_service_pb
.URLFetchServiceError
.DEADLINE_EXCEEDED
):
409 raise DeadlineExceededError(
410 'Deadline exceeded while waiting for HTTP response from URL: ' + url
)
411 if (err
.application_error
==
412 urlfetch_service_pb
.URLFetchServiceError
.SSL_CERTIFICATE_ERROR
):
413 raise SSLCertificateError(
414 'Invalid and/or missing SSL certificate for URL: ' + url
)
417 response
= rpc
.response
418 allow_truncated
= rpc
.user_data
419 result
= _URLFetchResult(response
)
420 if response
.contentwastruncated() and not allow_truncated
:
421 raise ResponseTooLargeError(result
)
426 class _URLFetchResult(object):
427 """A Pythonic representation of our fetch response protocol buffer.
430 def __init__(self
, response_proto
):
434 response_proto: the URLFetchResponse proto buffer to wrap.
436 self
.__pb
= response_proto
437 self
.content
= response_proto
.content()
438 self
.status_code
= response_proto
.statuscode()
439 self
.content_was_truncated
= response_proto
.contentwastruncated()
440 self
.final_url
= response_proto
.finalurl() or None
441 self
.header_msg
= httplib
.HTTPMessage(
442 StringIO
.StringIO(''.join(['%s: %s\n' % (h
.key(), h
.value())
443 for h
in response_proto
.header_list()] + ['\n'])))
444 self
.headers
= _CaselessDict(self
.header_msg
.items())
446 def get_default_fetch_deadline():
447 """Get the default value for create_rpc()'s deadline parameter."""
448 return getattr(_thread_local_settings
, "default_fetch_deadline", None)
451 def set_default_fetch_deadline(value
):
452 """Set the default value for create_rpc()'s deadline parameter.
454 This setting is thread-specific (i.e. it's stored in a thread local).
455 This function doesn't do any range or type checking of the value. The
458 See also: create_rpc(), fetch()
461 _thread_local_settings
.default_fetch_deadline
= value