thirdparty/google_appengine/google/appengine/api/urlfetch.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17
  18 """URL downloading API.
  19
  20 Methods defined in this module:
  21    Fetch(): fetchs a given URL using an HTTP GET or POST
  22 """
  23
  24
  25
  26
  27
  28 import os
  29 import UserDict
  30 import urllib2
  31 import urlparse
  32
  33 from google.appengine.api import apiproxy_rpc
  34 from google.appengine.api import apiproxy_stub_map
  35 from google.appengine.api import urlfetch_service_pb
  36 from google.appengine.api.urlfetch_errors import *
  37 from google.appengine.runtime import apiproxy_errors
  38
  39 MAX_REDIRECTS = 5
  40
  41 GET = 1
  42 POST = 2
  43 HEAD = 3
  44 PUT = 4
  45 DELETE = 5
  46
  47
  48 _URL_STRING_MAP = {
  49     'GET': GET,
  50     'POST': POST,
  51     'HEAD': HEAD,
  52     'PUT': PUT,
  53     'DELETE': DELETE,
  54 }
  55
  56
  57 _VALID_METHODS = frozenset(_URL_STRING_MAP.values())
  58
  59
  60 class _CaselessDict(UserDict.IterableUserDict):
  61   """Case insensitive dictionary.
  62
  63   This class was lifted from os.py and slightly modified.
  64   """
  65
  66   def __init__(self):
  67     UserDict.IterableUserDict.__init__(self)
  68     self.caseless_keys = {}
  69
  70   def __setitem__(self, key, item):
  71     """Set dictionary item.
  72
  73     Args:
  74       key: Key of new item.  Key is case insensitive, so "d['Key'] = value "
  75         will replace previous values set by "d['key'] = old_value".
  76       item: Item to store.
  77     """
  78     caseless_key = key.lower()
  79     if caseless_key in self.caseless_keys:
  80       del self.data[self.caseless_keys[caseless_key]]
  81     self.caseless_keys[caseless_key] = key
  82     self.data[key] = item
  83
  84   def __getitem__(self, key):
  85     """Get dictionary item.
  86
  87     Args:
  88       key: Key of item to get.  Key is case insensitive, so "d['Key']" is the
  89         same as "d['key']".
  90
  91     Returns:
  92       Item associated with key.
  93     """
  94     return self.data[self.caseless_keys[key.lower()]]
  95
  96   def __delitem__(self, key):
  97     """Remove item from dictionary.
  98
  99     Args:
 100       key: Key of item to remove.  Key is case insensitive, so "del d['Key']" is
 101         the same as "del d['key']"
 102     """
 103     caseless_key = key.lower()
 104     del self.data[self.caseless_keys[caseless_key]]
 105     del self.caseless_keys[caseless_key]
 106
 107   def has_key(self, key):
 108     """Determine if dictionary has item with specific key.
 109
 110     Args:
 111       key: Key to check for presence.  Key is case insensitive, so
 112         "d.has_key('Key')" evaluates to the same value as "d.has_key('key')".
 113
 114     Returns:
 115       True if dictionary contains key, else False.
 116     """
 117     return key.lower() in self.caseless_keys
 118
 119   def __contains__(self, key):
 120     """Same as 'has_key', but used for 'in' operator.'"""
 121     return self.has_key(key)
 122
 123   def get(self, key, failobj=None):
 124     """Get dictionary item, defaulting to another value if it does not exist.
 125
 126     Args:
 127       key: Key of item to get.  Key is case insensitive, so "d['Key']" is the
 128         same as "d['key']".
 129       failobj: Value to return if key not in dictionary.
 130     """
 131     try:
 132       cased_key = self.caseless_keys[key.lower()]
 133     except KeyError:
 134       return failobj
 135     return self.data[cased_key]
 136
 137   def update(self, dict=None, **kwargs):
 138     """Update dictionary using values from another dictionary and keywords.
 139
 140     Args:
 141       dict: Dictionary to update from.
 142       kwargs: Keyword arguments to update from.
 143     """
 144     if dict:
 145       try:
 146         keys = dict.keys()
 147       except AttributeError:
 148         for k, v in dict:
 149           self[k] = v
 150       else:
 151         for k in keys:
 152           self[k] = dict[k]
 153     if kwargs:
 154       self.update(kwargs)
 155
 156   def copy(self):
 157     """Make a shallow, case sensitive copy of self."""
 158     return dict(self)
 159
 160
 161 def _is_fetching_self(url, method):
 162   """Checks if the fetch is for the same URL from which it originated.
 163
 164   Args:
 165     url: str, The URL being fetched.
 166     method: value from _VALID_METHODS.
 167
 168   Returns:
 169     boolean indicating whether or not it seems that the app is trying to fetch
 170       itself.
 171   """
 172   if (method != GET or
 173       "HTTP_HOST" not in os.environ or
 174       "PATH_INFO" not in os.environ):
 175     return False
 176
 177   scheme, host_port, path, query, fragment = urlparse.urlsplit(url)
 178
 179   if host_port == os.environ['HTTP_HOST']:
 180     current_path = urllib2.unquote(os.environ['PATH_INFO'])
 181     desired_path = urllib2.unquote(path)
 182
 183     if (current_path == desired_path or
 184         (current_path in ('', '/') and desired_path in ('', '/'))):
 185       return True
 186
 187   return False
 188
 189
 190 def __create_rpc(deadline=None, callback=None):
 191   """DO NOT USE.  WILL CHANGE AND BREAK YOUR CODE.
 192
 193   Creates an RPC object for use with the urlfetch API.
 194
 195   Args:
 196     deadline: deadline in seconds for the operation.
 197     callback: callable to invoke on completion.
 198
 199   Returns:
 200     A _URLFetchRPC object.
 201   """
 202   return _URLFetchRPC(deadline, callback)
 203
 204
 205 def fetch(url, payload=None, method=GET, headers={}, allow_truncated=False,
 206           follow_redirects=True, deadline=None):
 207   """Fetches the given HTTP URL, blocking until the result is returned.
 208
 209   Other optional parameters are:
 210      method: GET, POST, HEAD, PUT, or DELETE
 211      payload: POST or PUT payload (implies method is not GET, HEAD, or DELETE).
 212        this is ignored if the method is not POST or PUT.
 213      headers: dictionary of HTTP headers to send with the request
 214      allow_truncated: if true, truncate large responses and return them without
 215        error. otherwise, ResponseTooLargeError will be thrown when a response is
 216        truncated.
 217      follow_redirects: if true (the default), redirects are
 218        transparently followed and the response (if less than 5
 219        redirects) contains the final destination's payload and the
 220        response status is 200.  You lose, however, the redirect chain
 221        information.  If false, you see the HTTP response yourself,
 222        including the 'Location' header, and redirects are not
 223        followed.
 224      deadline: deadline in seconds for the operation.
 225
 226   We use a HTTP/1.1 compliant proxy to fetch the result.
 227
 228   The returned data structure has the following fields:
 229      content: string containing the response from the server
 230      status_code: HTTP status code returned by the server
 231      headers: dictionary of headers returned by the server
 232
 233   If the URL is an empty string or obviously invalid, we throw an
 234   urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
 235   urlfetch.DownloadError.  Note that HTTP errors are returned as a part
 236   of the returned structure, so HTTP errors like 404 do not result in an
 237   exception.
 238   """
 239   rpc = __create_rpc(deadline=deadline)
 240   rpc.make_call(url, payload, method, headers, follow_redirects)
 241   return rpc.get_result(allow_truncated)
 242
 243
 244 class _URLFetchRPC(object):
 245   """A RPC object that manages the urlfetch RPC.
 246
 247   Its primary functions are the following:
 248   1. Convert error codes to the URLFetchServiceError namespace and raise them
 249      when get_result is called.
 250   2. Wrap the urlfetch response with a _URLFetchResult object.
 251   """
 252
 253   def __init__(self, deadline=None, callback=None):
 254     """Construct a new url fetch RPC.
 255
 256     Args:
 257       deadline: deadline in seconds for the operation.
 258       callback: callable to invoke on completion.
 259     """
 260     self.__rpc = apiproxy_stub_map.CreateRPC('urlfetch')
 261     self.__rpc.deadline = deadline
 262     self.__rpc.callback = callback
 263     self.__called_hooks = False
 264
 265   def make_call(self, url, payload=None, method=GET, headers={},
 266                 follow_redirects=True):
 267     """Executes the RPC call to fetch a given HTTP URL.
 268
 269     See urlfetch.fetch for a thorough description of arguments.
 270     """
 271     assert self.__rpc.state is apiproxy_rpc.RPC.IDLE
 272     if isinstance(method, basestring):
 273       method = method.upper()
 274     method = _URL_STRING_MAP.get(method, method)
 275     if method not in _VALID_METHODS:
 276       raise InvalidMethodError('Invalid method %s.' % str(method))
 277
 278     if _is_fetching_self(url, method):
 279       raise InvalidURLError("App cannot fetch the same URL as the one used for "
 280                             "the request.")
 281
 282     self.__request = urlfetch_service_pb.URLFetchRequest()
 283     self.__response = urlfetch_service_pb.URLFetchResponse()
 284     self.__result = None
 285     self.__request.set_url(url)
 286
 287     if method == GET:
 288       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.GET)
 289     elif method == POST:
 290       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.POST)
 291     elif method == HEAD:
 292       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD)
 293     elif method == PUT:
 294       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.PUT)
 295     elif method == DELETE:
 296       self.__request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE)
 297
 298     if payload and (method == POST or method == PUT):
 299       self.__request.set_payload(payload)
 300
 301     for key, value in headers.iteritems():
 302       header_proto = self.__request.add_header()
 303       header_proto.set_key(key)
 304       header_proto.set_value(str(value))
 305
 306     self.__request.set_followredirects(follow_redirects)
 307     if self.__rpc.deadline:
 308       self.__request.set_deadline(self.__rpc.deadline)
 309
 310     apiproxy_stub_map.apiproxy.GetPreCallHooks().Call(
 311         'urlfetch', 'Fetch', self.__request, self.__response)
 312     self.__rpc.MakeCall('urlfetch', 'Fetch', self.__request, self.__response)
 313
 314   def wait(self):
 315     """Waits for the urlfetch RPC to finish.  Idempotent.
 316     """
 317     assert self.__rpc.state is not apiproxy_rpc.RPC.IDLE
 318     if self.__rpc.state is apiproxy_rpc.RPC.RUNNING:
 319       self.__rpc.Wait()
 320
 321   def check_success(self, allow_truncated=False):
 322     """Check success and convert RPC exceptions to urlfetch exceptions.
 323
 324     This method waits for the RPC if it has not yet finished, and calls the
 325     post-call hooks on the first invocation.
 326
 327     Args:
 328       allow_truncated: if False, an error is raised if the response was
 329         truncated.
 330
 331     Raises:
 332       InvalidURLError if the url was invalid.
 333       DownloadError if there was a problem fetching the url.
 334       ResponseTooLargeError if the response was either truncated (and
 335         allow_truncated is false) or if it was too big for us to download.
 336     """
 337     assert self.__rpc.state is not apiproxy_rpc.RPC.IDLE
 338     if self.__rpc.state is apiproxy_rpc.RPC.RUNNING:
 339       self.wait()
 340
 341     try:
 342       self.__rpc.CheckSuccess()
 343       if not self.__called_hooks:
 344         self.__called_hooks = True
 345         apiproxy_stub_map.apiproxy.GetPostCallHooks().Call(
 346             'urlfetch', 'Fetch', self.__request, self.__response)
 347     except apiproxy_errors.ApplicationError, e:
 348       if (e.application_error ==
 349           urlfetch_service_pb.URLFetchServiceError.INVALID_URL):
 350         raise InvalidURLError(str(e))
 351       if (e.application_error ==
 352           urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR):
 353         raise DownloadError(str(e))
 354       if (e.application_error ==
 355           urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR):
 356         raise DownloadError(str(e))
 357       if (e.application_error ==
 358           urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE):
 359         raise ResponseTooLargeError(None)
 360       if (e.application_error ==
 361           urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED):
 362         raise DownloadError(str(e))
 363       raise e
 364
 365     if self.__response.contentwastruncated() and not allow_truncated:
 366       raise ResponseTooLargeError(_URLFetchResult(self.__response))
 367
 368   def get_result(self, allow_truncated=False):
 369     """Returns the RPC result or raises an exception if the rpc failed.
 370
 371     This method waits for the RPC if not completed, and checks success.
 372
 373     Args:
 374       allow_truncated: if False, an error is raised if the response was
 375         truncated.
 376
 377     Returns:
 378       The urlfetch result.
 379
 380     Raises:
 381       Error if the rpc has not yet finished.
 382       InvalidURLError if the url was invalid.
 383       DownloadError if there was a problem fetching the url.
 384       ResponseTooLargeError if the response was either truncated (and
 385         allow_truncated is false) or if it was too big for us to download.
 386     """
 387     if self.__result is None:
 388       self.check_success(allow_truncated)
 389       self.__result = _URLFetchResult(self.__response)
 390     return self.__result
 391
 392
 393 Fetch = fetch
 394
 395
 396 class _URLFetchResult(object):
 397   """A Pythonic representation of our fetch response protocol buffer.
 398   """
 399
 400   def __init__(self, response_proto):
 401     """Constructor.
 402
 403     Args:
 404       response_proto: the URLFetchResponse proto buffer to wrap.
 405     """
 406     self.__pb = response_proto
 407     self.content = response_proto.content()
 408     self.status_code = response_proto.statuscode()
 409     self.content_was_truncated = response_proto.contentwastruncated()
 410     self.headers = _CaselessDict()
 411     for header_proto in response_proto.header_list():
 412       self.headers[header_proto.key()] = header_proto.value()