1.9.30 sync.
[gae.git] / python / google / appengine / api / urlfetch.py
blob6432b6af5a7d4064d7105f7590b48072052ba6fd
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """URL downloading API.
23 Methods defined in this module:
24 Fetch(): fetchs a given URL using an HTTP request using on of the methods
25 GET, POST, HEAD, PUT, DELETE or PATCH request
26 """
37 import httplib
38 import os
39 import StringIO
40 import threading
41 import UserDict
42 import urllib2
43 import urlparse
45 from google.appengine.api import apiproxy_stub_map
46 from google.appengine.api import urlfetch_service_pb
47 from google.appengine.api.urlfetch_errors import *
48 from google.appengine.runtime import apiproxy_errors
52 MAX_REDIRECTS = 5
55 GET = 1
56 POST = 2
57 HEAD = 3
58 PUT = 4
59 DELETE = 5
60 PATCH = 6
62 _URL_STRING_MAP = {
63 'GET': GET,
64 'POST': POST,
65 'HEAD': HEAD,
66 'PUT': PUT,
67 'DELETE': DELETE,
68 'PATCH': PATCH,
71 _VALID_METHODS = frozenset(_URL_STRING_MAP.values())
73 _thread_local_settings = threading.local()
76 class _CaselessDict(UserDict.IterableUserDict):
77 """Case insensitive dictionary.
79 This class was lifted from os.py and slightly modified.
80 """
82 def __init__(self, dict=None, **kwargs):
83 self.caseless_keys = {}
84 UserDict.IterableUserDict.__init__(self, dict, **kwargs)
86 def __setitem__(self, key, item):
87 """Set dictionary item.
89 Args:
90 key: Key of new item. Key is case insensitive, so "d['Key'] = value "
91 will replace previous values set by "d['key'] = old_value".
92 item: Item to store.
93 """
94 caseless_key = key.lower()
96 if caseless_key in self.caseless_keys:
97 del self.data[self.caseless_keys[caseless_key]]
98 self.caseless_keys[caseless_key] = key
99 self.data[key] = item
101 def __getitem__(self, key):
102 """Get dictionary item.
104 Args:
105 key: Key of item to get. Key is case insensitive, so "d['Key']" is the
106 same as "d['key']".
108 Returns:
109 Item associated with key.
111 return self.data[self.caseless_keys[key.lower()]]
113 def __delitem__(self, key):
114 """Remove item from dictionary.
116 Args:
117 key: Key of item to remove. Key is case insensitive, so "del d['Key']" is
118 the same as "del d['key']"
120 caseless_key = key.lower()
121 del self.data[self.caseless_keys[caseless_key]]
122 del self.caseless_keys[caseless_key]
124 def has_key(self, key):
125 """Determine if dictionary has item with specific key.
127 Args:
128 key: Key to check for presence. Key is case insensitive, so
129 "d.has_key('Key')" evaluates to the same value as "d.has_key('key')".
131 Returns:
132 True if dictionary contains key, else False.
134 return key.lower() in self.caseless_keys
136 def __contains__(self, key):
137 """Same as 'has_key', but used for 'in' operator.'"""
138 return self.has_key(key)
140 def get(self, key, failobj=None):
141 """Get dictionary item, defaulting to another value if it does not exist.
143 Args:
144 key: Key of item to get. Key is case insensitive, so "d['Key']" is the
145 same as "d['key']".
146 failobj: Value to return if key not in dictionary.
148 try:
149 cased_key = self.caseless_keys[key.lower()]
150 except KeyError:
151 return failobj
152 return self.data[cased_key]
154 def update(self, dict=None, **kwargs):
155 """Update dictionary using values from another dictionary and keywords.
157 Args:
158 dict: Dictionary to update from.
159 kwargs: Keyword arguments to update from.
161 if dict:
162 try:
163 keys = dict.keys()
164 except AttributeError:
166 for k, v in dict:
167 self[k] = v
168 else:
172 for k in keys:
173 self[k] = dict[k]
174 if kwargs:
175 self.update(kwargs)
177 def copy(self):
178 """Make a shallow, case sensitive copy of self."""
179 return dict(self)
182 def _is_fetching_self(url, method):
183 """Checks if the fetch is for the same URL from which it originated.
185 Args:
186 url: str, The URL being fetched.
187 method: value from _VALID_METHODS.
189 Returns:
190 boolean indicating whether or not it seems that the app is trying to fetch
191 itself.
193 if (method != GET or
194 "HTTP_HOST" not in os.environ or
195 "PATH_INFO" not in os.environ):
196 return False
198 _, host_port, path, _, _ = urlparse.urlsplit(url)
200 if host_port == os.environ['HTTP_HOST']:
201 current_path = urllib2.unquote(os.environ['PATH_INFO'])
202 desired_path = urllib2.unquote(path)
204 if (current_path == desired_path or
205 (current_path in ('', '/') and desired_path in ('', '/'))):
206 return True
208 return False
211 def create_rpc(deadline=None, callback=None):
212 """Creates an RPC object for use with the urlfetch API.
214 Args:
215 deadline: Optional deadline in seconds for the operation; the default
216 is a system-specific deadline (typically 5 seconds).
217 callback: Optional callable to invoke on completion.
219 Returns:
220 An apiproxy_stub_map.UserRPC object specialized for this service.
222 if deadline is None:
223 deadline = get_default_fetch_deadline()
224 return apiproxy_stub_map.UserRPC('urlfetch', deadline, callback)
227 def fetch(url, payload=None, method=GET, headers={},
228 allow_truncated=False, follow_redirects=True,
229 deadline=None, validate_certificate=None):
230 """Fetches the given HTTP URL, blocking until the result is returned.
232 Other optional parameters are:
233 method: The constants GET, POST, HEAD, PUT, DELETE, or PATCH or the
234 same HTTP methods as strings.
235 payload: POST, PUT, or PATCH payload (implies method is not GET, HEAD,
236 or DELETE). this is ignored if the method is not POST, PUT, or PATCH.
237 headers: dictionary of HTTP headers to send with the request
238 allow_truncated: if true, truncate large responses and return them without
239 error. Otherwise, ResponseTooLargeError is raised when a response is
240 truncated.
241 follow_redirects: if true (the default), redirects are
242 transparently followed and the response (if less than 5
243 redirects) contains the final destination's payload and the
244 response status is 200. You lose, however, the redirect chain
245 information. If false, you see the HTTP response yourself,
246 including the 'Location' header, and redirects are not
247 followed.
248 deadline: deadline in seconds for the operation.
249 validate_certificate: if true, do not send request to server unless the
250 certificate is valid, signed by a trusted CA and the hostname matches
251 the certificate. A value of None indicates that the behaviour will be
252 chosen by the underlying urlfetch implementation.
254 We use a HTTP/1.1 compliant proxy to fetch the result.
256 The returned data structure has the following fields:
257 content: string containing the response from the server
258 status_code: HTTP status code returned by the server
259 headers: dictionary of headers returned by the server
261 If the URL is an empty string or obviously invalid, we throw an
262 urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
263 urlfetch.DownloadError. Note that HTTP errors are returned as a part
264 of the returned structure, so HTTP errors like 404 do not result in an
265 exception.
268 rpc = create_rpc(deadline=deadline)
269 make_fetch_call(rpc, url, payload, method, headers,
270 allow_truncated, follow_redirects, validate_certificate)
271 return rpc.get_result()
274 def make_fetch_call(rpc, url, payload=None, method=GET, headers={},
275 allow_truncated=False, follow_redirects=True,
276 validate_certificate=None):
277 """Executes the RPC call to fetch a given HTTP URL.
279 The first argument is a UserRPC instance. See urlfetch.fetch for a
280 thorough description of remaining arguments.
282 Raises:
283 InvalidMethodError: if requested method is not in _VALID_METHODS
284 ResponseTooLargeError: if the response payload is too large
285 InvalidURLError: if there are issues with the content/size of the
286 requested URL
288 Returns:
289 The rpc object passed into the function.
293 assert rpc.service == 'urlfetch', repr(rpc.service)
294 if isinstance(method, basestring):
295 method = method.upper()
296 method = _URL_STRING_MAP.get(method, method)
297 if method not in _VALID_METHODS:
298 raise InvalidMethodError('Invalid method %s.' % str(method))
300 if _is_fetching_self(url, method):
301 raise InvalidURLError("App cannot fetch the same URL as the one used for "
302 "the request.")
304 request = urlfetch_service_pb.URLFetchRequest()
305 response = urlfetch_service_pb.URLFetchResponse()
307 if isinstance(url, unicode):
308 url = url.encode('UTF-8')
309 request.set_url(url)
311 if method == GET:
312 request.set_method(urlfetch_service_pb.URLFetchRequest.GET)
313 elif method == POST:
314 request.set_method(urlfetch_service_pb.URLFetchRequest.POST)
315 elif method == HEAD:
316 request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD)
317 elif method == PUT:
318 request.set_method(urlfetch_service_pb.URLFetchRequest.PUT)
319 elif method == DELETE:
320 request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE)
321 elif method == PATCH:
322 request.set_method(urlfetch_service_pb.URLFetchRequest.PATCH)
325 if payload and method in (POST, PUT, PATCH):
326 request.set_payload(payload)
329 for key, value in headers.iteritems():
330 header_proto = request.add_header()
331 header_proto.set_key(key)
336 header_proto.set_value(str(value))
338 request.set_followredirects(follow_redirects)
339 if validate_certificate is not None:
340 request.set_mustvalidateservercertificate(validate_certificate)
342 if rpc.deadline is not None:
343 request.set_deadline(rpc.deadline)
347 rpc.make_call('Fetch', request, response, _get_fetch_result, allow_truncated)
348 return rpc
351 def _get_fetch_result(rpc):
352 """Check success, handle exceptions, and return converted RPC result.
354 This method waits for the RPC if it has not yet finished, and calls the
355 post-call hooks on the first invocation.
357 Args:
358 rpc: A UserRPC object.
360 Raises:
361 InvalidURLError: if the url was invalid.
362 DownloadError: if there was a problem fetching the url.
363 ResponseTooLargeError: if the response was either truncated (and
364 allow_truncated=False was passed to make_fetch_call()), or if it
365 was too big for us to download.
367 Returns:
368 A _URLFetchResult object.
370 assert rpc.service == 'urlfetch', repr(rpc.service)
371 assert rpc.method == 'Fetch', repr(rpc.method)
373 url = rpc.request.url()
375 try:
376 rpc.check_success()
377 except apiproxy_errors.RequestTooLargeError, err:
378 raise InvalidURLError(
379 'Request body too large fetching URL: ' + url)
380 except apiproxy_errors.ApplicationError, err:
381 error_detail = ''
382 if err.error_detail:
383 error_detail = ' Error: ' + err.error_detail
384 if (err.application_error ==
385 urlfetch_service_pb.URLFetchServiceError.INVALID_URL):
386 raise InvalidURLError(
387 'Invalid request URL: ' + url + error_detail)
388 if (err.application_error ==
389 urlfetch_service_pb.URLFetchServiceError.CLOSED):
390 raise ConnectionClosedError(
391 'Connection closed unexpectedly by server at URL: ' + url)
392 if (err.application_error ==
393 urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS):
394 raise TooManyRedirectsError(
395 'Too many redirects at URL: ' + url + ' with redirect=true')
396 if (err.application_error ==
397 urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY):
398 raise MalformedReplyError(
399 'Malformed HTTP reply received from server at URL: '
400 + url + error_detail)
401 if (err.application_error ==
402 urlfetch_service_pb.URLFetchServiceError.INTERNAL_TRANSIENT_ERROR):
403 raise InternalTransientError(
404 'Temporary error in fetching URL: ' + url + ', please re-try')
405 if (err.application_error ==
406 urlfetch_service_pb.URLFetchServiceError.DNS_ERROR):
407 raise DNSLookupFailedError('DNS lookup failed for URL: ' + url)
408 if (err.application_error ==
409 urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR):
410 raise DownloadError('Unspecified error in fetching URL: '
411 + url + error_detail)
412 if (err.application_error ==
413 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR):
414 raise DownloadError("Unable to fetch URL: " + url + error_detail)
415 if (err.application_error ==
416 urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE):
417 raise ResponseTooLargeError('HTTP response too large from URL: ' + url)
418 if (err.application_error ==
419 urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED):
420 raise DeadlineExceededError(
421 'Deadline exceeded while waiting for HTTP response from URL: ' + url)
422 if (err.application_error ==
423 urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR):
424 raise SSLCertificateError(
425 'Invalid and/or missing SSL certificate for URL: ' + url)
426 if (err.application_error ==
427 urlfetch_service_pb.URLFetchServiceError.CONNECTION_ERROR):
428 raise DownloadError('Unable to connect to server at URL: ' + url)
430 raise err
432 response = rpc.response
433 allow_truncated = rpc.user_data
434 result = _URLFetchResult(response)
435 if response.contentwastruncated() and not allow_truncated:
436 raise ResponseTooLargeError(result)
437 return result
439 Fetch = fetch
441 class _URLFetchResult(object):
442 """A Pythonic representation of our fetch response protocol buffer.
445 def __init__(self, response_proto):
446 """Constructor.
448 Args:
449 response_proto: the URLFetchResponse proto buffer to wrap.
451 self.__pb = response_proto
452 self.content = response_proto.content()
453 self.status_code = response_proto.statuscode()
454 self.content_was_truncated = response_proto.contentwastruncated()
455 self.final_url = response_proto.finalurl() or None
456 self.header_msg = httplib.HTTPMessage(
457 StringIO.StringIO(''.join(['%s: %s\n' % (h.key(), h.value())
458 for h in response_proto.header_list()] + ['\n'])))
459 self.headers = _CaselessDict(self.header_msg.items())
461 def get_default_fetch_deadline():
462 """Get the default value for create_rpc()'s deadline parameter."""
463 return getattr(_thread_local_settings, "default_fetch_deadline", None)
466 def set_default_fetch_deadline(value):
467 """Set the default value for create_rpc()'s deadline parameter.
469 This setting is thread-specific (i.e. it's stored in a thread local).
470 This function doesn't do any range or type checking of the value. The
471 default is None.
473 See also: create_rpc(), fetch()
476 _thread_local_settings.default_fetch_deadline = value