App Engine Python SDK version 1.8.9
[gae.git] / python / google / appengine / api / urlfetch.py
blob1c38f7d66d8033d83bf82c562dbcd4589affb0e4
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """URL downloading API.
23 Methods defined in this module:
24 Fetch(): fetchs a given URL using an HTTP request using on of the methods
25 GET, POST, HEAD, PUT, DELETE or PATCH request
26 """
36 import httplib
37 import os
38 import StringIO
39 import threading
40 import UserDict
41 import urllib2
42 import urlparse
44 from google.appengine.api import apiproxy_stub_map
45 from google.appengine.api import urlfetch_service_pb
46 from google.appengine.api.urlfetch_errors import *
47 from google.appengine.runtime import apiproxy_errors
51 MAX_REDIRECTS = 5
54 GET = 1
55 POST = 2
56 HEAD = 3
57 PUT = 4
58 DELETE = 5
59 PATCH = 6
61 _URL_STRING_MAP = {
62 'GET': GET,
63 'POST': POST,
64 'HEAD': HEAD,
65 'PUT': PUT,
66 'DELETE': DELETE,
67 'PATCH': PATCH,
70 _VALID_METHODS = frozenset(_URL_STRING_MAP.values())
72 _thread_local_settings = threading.local()
75 class _CaselessDict(UserDict.IterableUserDict):
76 """Case insensitive dictionary.
78 This class was lifted from os.py and slightly modified.
79 """
81 def __init__(self, dict=None, **kwargs):
82 self.caseless_keys = {}
83 UserDict.IterableUserDict.__init__(self, dict, **kwargs)
85 def __setitem__(self, key, item):
86 """Set dictionary item.
88 Args:
89 key: Key of new item. Key is case insensitive, so "d['Key'] = value "
90 will replace previous values set by "d['key'] = old_value".
91 item: Item to store.
92 """
93 caseless_key = key.lower()
95 if caseless_key in self.caseless_keys:
96 del self.data[self.caseless_keys[caseless_key]]
97 self.caseless_keys[caseless_key] = key
98 self.data[key] = item
100 def __getitem__(self, key):
101 """Get dictionary item.
103 Args:
104 key: Key of item to get. Key is case insensitive, so "d['Key']" is the
105 same as "d['key']".
107 Returns:
108 Item associated with key.
110 return self.data[self.caseless_keys[key.lower()]]
112 def __delitem__(self, key):
113 """Remove item from dictionary.
115 Args:
116 key: Key of item to remove. Key is case insensitive, so "del d['Key']" is
117 the same as "del d['key']"
119 caseless_key = key.lower()
120 del self.data[self.caseless_keys[caseless_key]]
121 del self.caseless_keys[caseless_key]
123 def has_key(self, key):
124 """Determine if dictionary has item with specific key.
126 Args:
127 key: Key to check for presence. Key is case insensitive, so
128 "d.has_key('Key')" evaluates to the same value as "d.has_key('key')".
130 Returns:
131 True if dictionary contains key, else False.
133 return key.lower() in self.caseless_keys
135 def __contains__(self, key):
136 """Same as 'has_key', but used for 'in' operator.'"""
137 return self.has_key(key)
139 def get(self, key, failobj=None):
140 """Get dictionary item, defaulting to another value if it does not exist.
142 Args:
143 key: Key of item to get. Key is case insensitive, so "d['Key']" is the
144 same as "d['key']".
145 failobj: Value to return if key not in dictionary.
147 try:
148 cased_key = self.caseless_keys[key.lower()]
149 except KeyError:
150 return failobj
151 return self.data[cased_key]
153 def update(self, dict=None, **kwargs):
154 """Update dictionary using values from another dictionary and keywords.
156 Args:
157 dict: Dictionary to update from.
158 kwargs: Keyword arguments to update from.
160 if dict:
161 try:
162 keys = dict.keys()
163 except AttributeError:
165 for k, v in dict:
166 self[k] = v
167 else:
171 for k in keys:
172 self[k] = dict[k]
173 if kwargs:
174 self.update(kwargs)
176 def copy(self):
177 """Make a shallow, case sensitive copy of self."""
178 return dict(self)
181 def _is_fetching_self(url, method):
182 """Checks if the fetch is for the same URL from which it originated.
184 Args:
185 url: str, The URL being fetched.
186 method: value from _VALID_METHODS.
188 Returns:
189 boolean indicating whether or not it seems that the app is trying to fetch
190 itself.
192 if (method != GET or
193 "HTTP_HOST" not in os.environ or
194 "PATH_INFO" not in os.environ):
195 return False
197 scheme, host_port, path, query, fragment = urlparse.urlsplit(url)
199 if host_port == os.environ['HTTP_HOST']:
200 current_path = urllib2.unquote(os.environ['PATH_INFO'])
201 desired_path = urllib2.unquote(path)
203 if (current_path == desired_path or
204 (current_path in ('', '/') and desired_path in ('', '/'))):
205 return True
207 return False
210 def create_rpc(deadline=None, callback=None):
211 """Creates an RPC object for use with the urlfetch API.
213 Args:
214 deadline: Optional deadline in seconds for the operation; the default
215 is a system-specific deadline (typically 5 seconds).
216 callback: Optional callable to invoke on completion.
218 Returns:
219 An apiproxy_stub_map.UserRPC object specialized for this service.
221 if deadline is None:
222 deadline = get_default_fetch_deadline()
223 return apiproxy_stub_map.UserRPC('urlfetch', deadline, callback)
226 def fetch(url, payload=None, method=GET, headers={},
227 allow_truncated=False, follow_redirects=True,
228 deadline=None, validate_certificate=None):
229 """Fetches the given HTTP URL, blocking until the result is returned.
231 Other optional parameters are:
232 method: The constants GET, POST, HEAD, PUT, DELETE, or PATCH or the
233 same HTTP methods as strings.
234 payload: POST, PUT, or PATCH payload (implies method is not GET, HEAD,
235 or DELETE). this is ignored if the method is not POST, PUT, or PATCH.
236 headers: dictionary of HTTP headers to send with the request
237 allow_truncated: if true, truncate large responses and return them without
238 error. Otherwise, ResponseTooLargeError is raised when a response is
239 truncated.
240 follow_redirects: if true (the default), redirects are
241 transparently followed and the response (if less than 5
242 redirects) contains the final destination's payload and the
243 response status is 200. You lose, however, the redirect chain
244 information. If false, you see the HTTP response yourself,
245 including the 'Location' header, and redirects are not
246 followed.
247 deadline: deadline in seconds for the operation.
248 validate_certificate: if true, do not send request to server unless the
249 certificate is valid, signed by a trusted CA and the hostname matches
250 the certificate. A value of None indicates that the behaviour will be
251 chosen by the underlying urlfetch implementation.
253 We use a HTTP/1.1 compliant proxy to fetch the result.
255 The returned data structure has the following fields:
256 content: string containing the response from the server
257 status_code: HTTP status code returned by the server
258 headers: dictionary of headers returned by the server
260 If the URL is an empty string or obviously invalid, we throw an
261 urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
262 urlfetch.DownloadError. Note that HTTP errors are returned as a part
263 of the returned structure, so HTTP errors like 404 do not result in an
264 exception.
267 rpc = create_rpc(deadline=deadline)
268 make_fetch_call(rpc, url, payload, method, headers,
269 allow_truncated, follow_redirects, validate_certificate)
270 return rpc.get_result()
273 def make_fetch_call(rpc, url, payload=None, method=GET, headers={},
274 allow_truncated=False, follow_redirects=True,
275 validate_certificate=None):
276 """Executes the RPC call to fetch a given HTTP URL.
278 The first argument is a UserRPC instance. See urlfetch.fetch for a
279 thorough description of remaining arguments.
281 Returns:
282 The rpc object passed into the function.
285 assert rpc.service == 'urlfetch', repr(rpc.service)
286 if isinstance(method, basestring):
287 method = method.upper()
288 method = _URL_STRING_MAP.get(method, method)
289 if method not in _VALID_METHODS:
290 raise InvalidMethodError('Invalid method %s.' % str(method))
292 if _is_fetching_self(url, method):
293 raise InvalidURLError("App cannot fetch the same URL as the one used for "
294 "the request.")
296 request = urlfetch_service_pb.URLFetchRequest()
297 response = urlfetch_service_pb.URLFetchResponse()
299 if isinstance(url, unicode):
300 url = url.encode('UTF-8')
301 request.set_url(url)
303 if method == GET:
304 request.set_method(urlfetch_service_pb.URLFetchRequest.GET)
305 elif method == POST:
306 request.set_method(urlfetch_service_pb.URLFetchRequest.POST)
307 elif method == HEAD:
308 request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD)
309 elif method == PUT:
310 request.set_method(urlfetch_service_pb.URLFetchRequest.PUT)
311 elif method == DELETE:
312 request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE)
313 elif method == PATCH:
314 request.set_method(urlfetch_service_pb.URLFetchRequest.PATCH)
317 if payload and method in (POST, PUT, PATCH):
318 request.set_payload(payload)
321 for key, value in headers.iteritems():
322 header_proto = request.add_header()
323 header_proto.set_key(key)
328 header_proto.set_value(str(value))
330 request.set_followredirects(follow_redirects)
331 if validate_certificate is not None:
332 request.set_mustvalidateservercertificate(validate_certificate)
334 if rpc.deadline is not None:
335 request.set_deadline(rpc.deadline)
339 rpc.make_call('Fetch', request, response, _get_fetch_result, allow_truncated)
340 return rpc
343 def _get_fetch_result(rpc):
344 """Check success, handle exceptions, and return converted RPC result.
346 This method waits for the RPC if it has not yet finished, and calls the
347 post-call hooks on the first invocation.
349 Args:
350 rpc: A UserRPC object.
352 Raises:
353 InvalidURLError if the url was invalid.
354 DownloadError if there was a problem fetching the url.
355 ResponseTooLargeError if the response was either truncated (and
356 allow_truncated=False was passed to make_fetch_call()), or if it
357 was too big for us to download.
359 Returns:
360 A _URLFetchResult object.
362 assert rpc.service == 'urlfetch', repr(rpc.service)
363 assert rpc.method == 'Fetch', repr(rpc.method)
365 url = rpc.request.url()
367 try:
368 rpc.check_success()
369 except apiproxy_errors.ApplicationError, err:
370 error_detail = ''
371 if err.error_detail:
372 error_detail = ' Error: ' + err.error_detail
373 if (err.application_error ==
374 urlfetch_service_pb.URLFetchServiceError.INVALID_URL):
375 raise InvalidURLError(
376 'Invalid request URL: ' + url + error_detail)
377 if (err.application_error ==
378 urlfetch_service_pb.URLFetchServiceError.CLOSED):
379 raise ConnectionClosedError(
380 'Connection closed unexpectedly by server at URL: ' + url)
381 if (err.application_error ==
382 urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS):
383 raise TooManyRedirectsError(
384 'Too many redirects at URL: ' + url + ' with redirect=true')
385 if (err.application_error ==
386 urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY):
387 raise MalformedReplyError(
388 'Malformed HTTP reply received from server at URL: '
389 + url + error_detail)
390 if (err.application_error ==
391 urlfetch_service_pb.URLFetchServiceError.INTERNAL_TRANSIENT_ERROR):
392 raise InteralTransientError(
393 'Temporary error in fetching URL: ' + url + ', please re-try')
394 if (err.application_error ==
395 urlfetch_service_pb.URLFetchServiceError.DNS_ERROR):
396 raise DNSLookupFailedError('DNS lookup failed for URL: ' + url)
397 if (err.application_error ==
398 urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR):
399 raise DownloadError('Unspecified error in fetching URL: '
400 + url + error_detail)
401 if (err.application_error ==
402 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR):
403 raise DownloadError("Unable to fetch URL: " + url + error_detail)
404 if (err.application_error ==
405 urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE):
406 raise ResponseTooLargeError('HTTP response too large from URL: ' + url)
407 if (err.application_error ==
408 urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED):
409 raise DeadlineExceededError(
410 'Deadline exceeded while waiting for HTTP response from URL: ' + url)
411 if (err.application_error ==
412 urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR):
413 raise SSLCertificateError(
414 'Invalid and/or missing SSL certificate for URL: ' + url)
415 raise err
417 response = rpc.response
418 allow_truncated = rpc.user_data
419 result = _URLFetchResult(response)
420 if response.contentwastruncated() and not allow_truncated:
421 raise ResponseTooLargeError(result)
422 return result
424 Fetch = fetch
426 class _URLFetchResult(object):
427 """A Pythonic representation of our fetch response protocol buffer.
430 def __init__(self, response_proto):
431 """Constructor.
433 Args:
434 response_proto: the URLFetchResponse proto buffer to wrap.
436 self.__pb = response_proto
437 self.content = response_proto.content()
438 self.status_code = response_proto.statuscode()
439 self.content_was_truncated = response_proto.contentwastruncated()
440 self.final_url = response_proto.finalurl() or None
441 self.header_msg = httplib.HTTPMessage(
442 StringIO.StringIO(''.join(['%s: %s\n' % (h.key(), h.value())
443 for h in response_proto.header_list()] + ['\n'])))
444 self.headers = _CaselessDict(self.header_msg.items())
446 def get_default_fetch_deadline():
447 """Get the default value for create_rpc()'s deadline parameter."""
448 return getattr(_thread_local_settings, "default_fetch_deadline", None)
451 def set_default_fetch_deadline(value):
452 """Set the default value for create_rpc()'s deadline parameter.
454 This setting is thread-specific (i.e. it's stored in a thread local).
455 This function doesn't do any range or type checking of the value. The
456 default is None.
458 See also: create_rpc(), fetch()
461 _thread_local_settings.default_fetch_deadline = value