1.9.30 sync.
[gae.git] / python / google / appengine / api / urlfetch_stub.py
blob0160b06a0a63f8d94e14c60f8f2459eace658fdd
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
23 """Stub version of the urlfetch API, based on httplib."""
28 _successfully_imported_fancy_urllib = False
29 _fancy_urllib_InvalidCertException = None
30 _fancy_urllib_SSLError = None
31 try:
32 import fancy_urllib
33 _successfully_imported_fancy_urllib = True
34 _fancy_urllib_InvalidCertException = fancy_urllib.InvalidCertificateException
35 _fancy_urllib_SSLError = fancy_urllib.SSLError
36 except ImportError:
37 pass
39 import gzip
40 import httplib
41 import logging
42 import os
43 import socket
44 import StringIO
45 import sys
46 import urllib
47 import urlparse
49 from google.appengine.api import apiproxy_stub
50 from google.appengine.api import urlfetch
51 from google.appengine.api import urlfetch_errors
52 from google.appengine.api import urlfetch_service_pb
53 from google.appengine.runtime import apiproxy_errors
57 MAX_REQUEST_SIZE = 10 << 20
59 MAX_RESPONSE_SIZE = 2 ** 25
61 MAX_REDIRECTS = urlfetch.MAX_REDIRECTS
63 REDIRECT_STATUSES = frozenset([
64 httplib.MOVED_PERMANENTLY,
65 httplib.FOUND,
66 httplib.SEE_OTHER,
67 httplib.TEMPORARY_REDIRECT,
74 _API_CALL_DEADLINE = 5.0
79 _API_CALL_VALIDATE_CERTIFICATE_DEFAULT = False
82 _CONNECTION_SUPPORTS_TIMEOUT = sys.version_info >= (2, 6)
90 _UNTRUSTED_REQUEST_HEADERS = frozenset([
91 'content-length',
92 'host',
93 'vary',
94 'via',
95 'x-forwarded-for',
98 _MAX_URL_LENGTH = 2048
101 def _CanValidateCerts():
102 return (_successfully_imported_fancy_urllib and
103 fancy_urllib.can_validate_certs())
106 def _SetupSSL(path):
107 global CERT_PATH
108 if os.path.exists(path):
109 CERT_PATH = path
110 else:
111 CERT_PATH = None
112 logging.warning('%s missing; without this urlfetch will not be able to '
113 'validate SSL certificates.', path)
115 if not _CanValidateCerts():
116 logging.warning('No ssl package found. urlfetch will not be able to '
117 'validate SSL certificates.')
120 _SetupSSL(os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..',
121 '..', 'lib', 'cacerts',
122 'urlfetch_cacerts.txt')))
124 def _IsAllowedPort(port):
126 if port is None:
127 return True
128 try:
129 port = int(port)
130 except ValueError, e:
131 return False
136 if ((port >= 80 and port <= 90) or
137 (port >= 440 and port <= 450) or
138 port >= 1024):
139 return True
140 return False
143 class URLFetchServiceStub(apiproxy_stub.APIProxyStub):
144 """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
146 THREADSAFE = True
148 def __init__(self,
149 service_name='urlfetch',
150 urlmatchers_to_fetch_functions=None):
151 """Initializer.
153 Args:
154 service_name: Service name expected for all calls.
155 urlmatchers_to_fetch_functions: A list of two-element tuples.
156 The first element is a urlmatcher predicate function that takes
157 a url and determines a match. The second is a function that
158 can retrieve result for that url. If no match is found, a url is
159 handled by the default _RetrieveURL function.
160 When more than one match is possible, the first match is used.
162 super(URLFetchServiceStub, self).__init__(service_name,
163 max_request_size=MAX_REQUEST_SIZE)
164 self._urlmatchers_to_fetch_functions = urlmatchers_to_fetch_functions or []
166 def _Dynamic_Fetch(self, request, response):
167 """Trivial implementation of URLFetchService::Fetch().
169 Args:
170 request: the fetch to perform, a URLFetchRequest
171 response: the fetch response, a URLFetchResponse
175 if len(request.url()) >= _MAX_URL_LENGTH:
176 logging.error('URL is too long: %s...' % request.url()[:50])
177 raise apiproxy_errors.ApplicationError(
178 urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
180 (protocol, host, path, query, fragment) = urlparse.urlsplit(request.url())
182 payload = None
183 if request.method() == urlfetch_service_pb.URLFetchRequest.GET:
184 method = 'GET'
185 elif request.method() == urlfetch_service_pb.URLFetchRequest.POST:
186 method = 'POST'
187 payload = request.payload()
188 elif request.method() == urlfetch_service_pb.URLFetchRequest.HEAD:
189 method = 'HEAD'
190 elif request.method() == urlfetch_service_pb.URLFetchRequest.PUT:
191 method = 'PUT'
192 payload = request.payload()
193 elif request.method() == urlfetch_service_pb.URLFetchRequest.DELETE:
194 method = 'DELETE'
195 elif request.method() == urlfetch_service_pb.URLFetchRequest.PATCH:
196 method = 'PATCH'
197 payload = request.payload()
198 else:
199 logging.error('Invalid method: %s', request.method())
200 raise apiproxy_errors.ApplicationError(
201 urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
203 if not (protocol == 'http' or protocol == 'https'):
204 logging.error('Invalid protocol: %s', protocol)
205 raise apiproxy_errors.ApplicationError(
206 urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
208 if not host:
209 logging.error('Missing host.')
210 raise apiproxy_errors.ApplicationError(
211 urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
213 self._SanitizeHttpHeaders(_UNTRUSTED_REQUEST_HEADERS,
214 request.header_list())
215 deadline = _API_CALL_DEADLINE
216 if request.has_deadline():
217 deadline = request.deadline()
218 validate_certificate = _API_CALL_VALIDATE_CERTIFICATE_DEFAULT
219 if request.has_mustvalidateservercertificate():
220 validate_certificate = request.mustvalidateservercertificate()
222 fetch_function = self._GetFetchFunction(request.url())
223 fetch_function(request.url(), payload, method,
224 request.header_list(), request, response,
225 follow_redirects=request.followredirects(),
226 deadline=deadline,
227 validate_certificate=validate_certificate)
229 def _GetFetchFunction(self, url):
230 """Get the fetch function for a url.
232 Args:
233 url: A url to fetch from. str.
235 Returns:
236 A fetch function for this url.
238 for urlmatcher, fetch_function in self._urlmatchers_to_fetch_functions:
239 if urlmatcher(url):
240 return fetch_function
241 return self._RetrieveURL
243 @staticmethod
244 def _RetrieveURL(url, payload, method, headers, request, response,
245 follow_redirects=True, deadline=_API_CALL_DEADLINE,
246 validate_certificate=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT):
247 """Retrieves a URL over network.
249 Args:
250 url: String containing the URL to access.
251 payload: Request payload to send, if any; None if no payload.
252 If the payload is unicode, we assume it is utf-8.
253 method: HTTP method to use (e.g., 'GET')
254 headers: List of additional header objects to use for the request.
255 request: A urlfetch_service_pb.URLFetchRequest proto object from
256 original request.
257 response: A urlfetch_service_pb.URLFetchResponse proto object to
258 populate with the response data.
259 follow_redirects: optional setting (defaulting to True) for whether or not
260 we should transparently follow redirects (up to MAX_REDIRECTS)
261 deadline: Number of seconds to wait for the urlfetch to finish.
262 validate_certificate: If true, do not send request to server unless the
263 certificate is valid, signed by a trusted CA and the hostname matches
264 the certificate.
266 Raises:
267 Raises an apiproxy_errors.ApplicationError exception with
268 INVALID_URL_ERROR in cases where:
269 - The protocol of the redirected URL is bad or missing.
270 - The port is not in the allowable range of ports.
271 Raises an apiproxy_errors.ApplicationError exception with
272 TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
274 last_protocol = ''
275 last_host = ''
276 if isinstance(payload, unicode):
277 payload = payload.encode('utf-8')
279 for redirect_number in xrange(MAX_REDIRECTS + 1):
280 parsed = urlparse.urlsplit(url)
281 protocol, host, path, query, fragment = parsed
289 port = urllib.splitport(urllib.splituser(host)[1])[1]
291 if not _IsAllowedPort(port):
292 logging.error(
293 'urlfetch received %s ; port %s is not allowed in production!' %
294 (url, port))
300 raise apiproxy_errors.ApplicationError(
301 urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
303 if protocol and not host:
305 logging.error('Missing host on redirect; target url is %s' % url)
306 raise apiproxy_errors.ApplicationError(
307 urlfetch_service_pb.URLFetchServiceError.INVALID_URL)
312 if not host and not protocol:
313 host = last_host
314 protocol = last_protocol
321 adjusted_headers = {
322 'User-Agent':
323 'AppEngine-Google; (+http://code.google.com/appengine)',
324 'Host': host,
325 'Accept-Encoding': 'gzip',
327 if payload is not None:
330 adjusted_headers['Content-Length'] = str(len(payload))
333 if method == 'POST' and payload:
334 adjusted_headers['Content-Type'] = 'application/x-www-form-urlencoded'
336 passthrough_content_encoding = False
337 for header in headers:
338 if header.key().title().lower() == 'user-agent':
339 adjusted_headers['User-Agent'] = (
340 '%s %s' %
341 (header.value(), adjusted_headers['User-Agent']))
342 else:
343 if header.key().lower() == 'accept-encoding':
344 passthrough_content_encoding = True
345 adjusted_headers[header.key().title()] = header.value()
347 if payload is not None:
348 escaped_payload = payload.encode('string_escape')
349 else:
350 escaped_payload = ''
351 logging.debug('Making HTTP request: host = %r, '
352 'url = %r, payload = %.1000r, headers = %r',
353 host, url, escaped_payload, adjusted_headers)
354 try:
355 if protocol == 'http':
356 connection_class = httplib.HTTPConnection
357 elif protocol == 'https':
358 if (validate_certificate and _CanValidateCerts() and
359 CERT_PATH):
361 connection_class = fancy_urllib.create_fancy_connection(
362 ca_certs=CERT_PATH)
363 else:
364 connection_class = httplib.HTTPSConnection
365 else:
367 error_msg = 'Redirect specified invalid protocol: "%s"' % protocol
368 logging.error(error_msg)
369 raise apiproxy_errors.ApplicationError(
370 urlfetch_service_pb.URLFetchServiceError.INVALID_URL, error_msg)
377 if _CONNECTION_SUPPORTS_TIMEOUT:
378 connection = connection_class(host, timeout=deadline)
379 else:
380 connection = connection_class(host)
384 last_protocol = protocol
385 last_host = host
387 if query != '':
388 full_path = path + '?' + query
389 else:
390 full_path = path
392 if not _CONNECTION_SUPPORTS_TIMEOUT:
393 orig_timeout = socket.getdefaulttimeout()
394 try:
395 if not _CONNECTION_SUPPORTS_TIMEOUT:
398 socket.setdefaulttimeout(deadline)
399 connection.request(method, full_path, payload, adjusted_headers)
400 http_response = connection.getresponse()
401 if method == 'HEAD':
402 http_response_data = ''
403 else:
404 http_response_data = http_response.read()
405 finally:
406 if not _CONNECTION_SUPPORTS_TIMEOUT:
407 socket.setdefaulttimeout(orig_timeout)
408 connection.close()
409 except _fancy_urllib_InvalidCertException, e:
410 raise apiproxy_errors.ApplicationError(
411 urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR,
412 str(e))
413 except _fancy_urllib_SSLError, e:
419 app_error = (
420 urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED
421 if 'timed out' in e.message else
422 urlfetch_service_pb.URLFetchServiceError.SSL_CERTIFICATE_ERROR)
423 raise apiproxy_errors.ApplicationError(app_error, str(e))
424 except socket.timeout, e:
425 raise apiproxy_errors.ApplicationError(
426 urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED, str(e))
427 except (httplib.error, socket.error, IOError), e:
428 raise apiproxy_errors.ApplicationError(
429 urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR, str(e))
434 if http_response.status in REDIRECT_STATUSES and follow_redirects:
436 url = http_response.getheader('Location', None)
437 if url is None:
438 error_msg = 'Redirecting response was missing "Location" header'
439 logging.error(error_msg)
440 raise apiproxy_errors.ApplicationError(
441 urlfetch_service_pb.URLFetchServiceError.MALFORMED_REPLY,
442 error_msg)
443 else:
444 response.set_statuscode(http_response.status)
445 if (http_response.getheader('content-encoding') == 'gzip' and
446 not passthrough_content_encoding):
447 gzip_stream = StringIO.StringIO(http_response_data)
448 gzip_file = gzip.GzipFile(fileobj=gzip_stream)
449 http_response_data = gzip_file.read()
450 response.set_content(http_response_data[:MAX_RESPONSE_SIZE])
453 for header_key in http_response.msg.keys():
454 for header_value in http_response.msg.getheaders(header_key):
455 if (header_key.lower() == 'content-encoding' and
456 header_value == 'gzip' and
457 not passthrough_content_encoding):
458 continue
459 if header_key.lower() == 'content-length' and method != 'HEAD':
460 header_value = str(len(response.content()))
461 header_proto = response.add_header()
462 header_proto.set_key(header_key)
463 header_proto.set_value(header_value)
465 if len(http_response_data) > MAX_RESPONSE_SIZE:
466 response.set_contentwastruncated(True)
470 if request.url() != url:
471 response.set_finalurl(url)
474 break
475 else:
476 error_msg = 'Too many repeated redirects'
477 logging.error(error_msg)
478 raise apiproxy_errors.ApplicationError(
479 urlfetch_service_pb.URLFetchServiceError.TOO_MANY_REDIRECTS,
480 error_msg)
482 def _SanitizeHttpHeaders(self, untrusted_headers, headers):
483 """Cleans "unsafe" headers from the HTTP request, in place.
485 Args:
486 untrusted_headers: Set of untrusted headers names (all lowercase).
487 headers: List of Header objects. The list is modified in place.
489 prohibited_headers = [h.key() for h in headers
490 if h.key().lower() in untrusted_headers]
491 if prohibited_headers:
492 logging.warn('Stripped prohibited headers from URLFetch request: %s',
493 prohibited_headers)
495 for index in reversed(xrange(len(headers))):
496 if headers[index].key().lower() in untrusted_headers:
497 del headers[index]