3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
23 """Stub version of the urlfetch API, based on httplib."""
28 _successfully_imported_fancy_urllib
= False
29 _fancy_urllib_InvalidCertException
= None
30 _fancy_urllib_SSLError
= None
33 _successfully_imported_fancy_urllib
= True
34 _fancy_urllib_InvalidCertException
= fancy_urllib
.InvalidCertificateException
35 _fancy_urllib_SSLError
= fancy_urllib
.SSLError
49 from google
.appengine
.api
import apiproxy_stub
50 from google
.appengine
.api
import urlfetch
51 from google
.appengine
.api
import urlfetch_errors
52 from google
.appengine
.api
import urlfetch_service_pb
53 from google
.appengine
.runtime
import apiproxy_errors
57 MAX_REQUEST_SIZE
= 10 << 20
59 MAX_RESPONSE_SIZE
= 2 ** 25
61 MAX_REDIRECTS
= urlfetch
.MAX_REDIRECTS
63 REDIRECT_STATUSES
= frozenset([
64 httplib
.MOVED_PERMANENTLY
,
67 httplib
.TEMPORARY_REDIRECT
,
74 _API_CALL_DEADLINE
= 5.0
79 _API_CALL_VALIDATE_CERTIFICATE_DEFAULT
= False
82 _CONNECTION_SUPPORTS_TIMEOUT
= sys
.version_info
>= (2, 6)
90 _UNTRUSTED_REQUEST_HEADERS
= frozenset([
98 _MAX_URL_LENGTH
= 2048
101 def _CanValidateCerts():
102 return (_successfully_imported_fancy_urllib
and
103 fancy_urllib
.can_validate_certs())
108 if os
.path
.exists(path
):
112 logging
.warning('%s missing; without this urlfetch will not be able to '
113 'validate SSL certificates.', path
)
115 if not _CanValidateCerts():
116 logging
.warning('No ssl package found. urlfetch will not be able to '
117 'validate SSL certificates.')
120 _SetupSSL(os
.path
.normpath(os
.path
.join(os
.path
.dirname(__file__
), '..', '..',
121 '..', 'lib', 'cacerts',
122 'urlfetch_cacerts.txt')))
124 def _IsAllowedPort(port
):
130 except ValueError, e
:
136 if ((port
>= 80 and port
<= 90) or
137 (port
>= 440 and port
<= 450) or
143 class URLFetchServiceStub(apiproxy_stub
.APIProxyStub
):
144 """Stub version of the urlfetch API to be used with apiproxy_stub_map."""
149 service_name
='urlfetch',
150 urlmatchers_to_fetch_functions
=None):
154 service_name: Service name expected for all calls.
155 urlmatchers_to_fetch_functions: A list of two-element tuples.
156 The first element is a urlmatcher predicate function that takes
157 a url and determines a match. The second is a function that
158 can retrieve result for that url. If no match is found, a url is
159 handled by the default _RetrieveURL function.
160 When more than one match is possible, the first match is used.
162 super(URLFetchServiceStub
, self
).__init
__(service_name
,
163 max_request_size
=MAX_REQUEST_SIZE
)
164 self
._urlmatchers
_to
_fetch
_functions
= urlmatchers_to_fetch_functions
or []
166 def _Dynamic_Fetch(self
, request
, response
):
167 """Trivial implementation of URLFetchService::Fetch().
170 request: the fetch to perform, a URLFetchRequest
171 response: the fetch response, a URLFetchResponse
175 if len(request
.url()) >= _MAX_URL_LENGTH
:
176 logging
.error('URL is too long: %s...' % request
.url()[:50])
177 raise apiproxy_errors
.ApplicationError(
178 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
)
180 (protocol
, host
, path
, query
, fragment
) = urlparse
.urlsplit(request
.url())
183 if request
.method() == urlfetch_service_pb
.URLFetchRequest
.GET
:
185 elif request
.method() == urlfetch_service_pb
.URLFetchRequest
.POST
:
187 payload
= request
.payload()
188 elif request
.method() == urlfetch_service_pb
.URLFetchRequest
.HEAD
:
190 elif request
.method() == urlfetch_service_pb
.URLFetchRequest
.PUT
:
192 payload
= request
.payload()
193 elif request
.method() == urlfetch_service_pb
.URLFetchRequest
.DELETE
:
195 elif request
.method() == urlfetch_service_pb
.URLFetchRequest
.PATCH
:
197 payload
= request
.payload()
199 logging
.error('Invalid method: %s', request
.method())
200 raise apiproxy_errors
.ApplicationError(
201 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
)
203 if not (protocol
== 'http' or protocol
== 'https'):
204 logging
.error('Invalid protocol: %s', protocol
)
205 raise apiproxy_errors
.ApplicationError(
206 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
)
209 logging
.error('Missing host.')
210 raise apiproxy_errors
.ApplicationError(
211 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
)
213 self
._SanitizeHttpHeaders
(_UNTRUSTED_REQUEST_HEADERS
,
214 request
.header_list())
215 deadline
= _API_CALL_DEADLINE
216 if request
.has_deadline():
217 deadline
= request
.deadline()
218 validate_certificate
= _API_CALL_VALIDATE_CERTIFICATE_DEFAULT
219 if request
.has_mustvalidateservercertificate():
220 validate_certificate
= request
.mustvalidateservercertificate()
222 fetch_function
= self
._GetFetchFunction
(request
.url())
223 fetch_function(request
.url(), payload
, method
,
224 request
.header_list(), request
, response
,
225 follow_redirects
=request
.followredirects(),
227 validate_certificate
=validate_certificate
)
229 def _GetFetchFunction(self
, url
):
230 """Get the fetch function for a url.
233 url: A url to fetch from. str.
236 A fetch function for this url.
238 for urlmatcher
, fetch_function
in self
._urlmatchers
_to
_fetch
_functions
:
240 return fetch_function
241 return self
._RetrieveURL
244 def _RetrieveURL(url
, payload
, method
, headers
, request
, response
,
245 follow_redirects
=True, deadline
=_API_CALL_DEADLINE
,
246 validate_certificate
=_API_CALL_VALIDATE_CERTIFICATE_DEFAULT
):
247 """Retrieves a URL over network.
250 url: String containing the URL to access.
251 payload: Request payload to send, if any; None if no payload.
252 If the payload is unicode, we assume it is utf-8.
253 method: HTTP method to use (e.g., 'GET')
254 headers: List of additional header objects to use for the request.
255 request: A urlfetch_service_pb.URLFetchRequest proto object from
257 response: A urlfetch_service_pb.URLFetchResponse proto object to
258 populate with the response data.
259 follow_redirects: optional setting (defaulting to True) for whether or not
260 we should transparently follow redirects (up to MAX_REDIRECTS)
261 deadline: Number of seconds to wait for the urlfetch to finish.
262 validate_certificate: If true, do not send request to server unless the
263 certificate is valid, signed by a trusted CA and the hostname matches
267 Raises an apiproxy_errors.ApplicationError exception with
268 INVALID_URL_ERROR in cases where:
269 - The protocol of the redirected URL is bad or missing.
270 - The port is not in the allowable range of ports.
271 Raises an apiproxy_errors.ApplicationError exception with
272 TOO_MANY_REDIRECTS in cases when MAX_REDIRECTS is exceeded
276 if isinstance(payload
, unicode):
277 payload
= payload
.encode('utf-8')
279 for redirect_number
in xrange(MAX_REDIRECTS
+ 1):
280 parsed
= urlparse
.urlsplit(url
)
281 protocol
, host
, path
, query
, fragment
= parsed
289 port
= urllib
.splitport(urllib
.splituser(host
)[1])[1]
291 if not _IsAllowedPort(port
):
293 'urlfetch received %s ; port %s is not allowed in production!' %
300 raise apiproxy_errors
.ApplicationError(
301 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
)
303 if protocol
and not host
:
305 logging
.error('Missing host on redirect; target url is %s' % url
)
306 raise apiproxy_errors
.ApplicationError(
307 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
)
312 if not host
and not protocol
:
314 protocol
= last_protocol
323 'AppEngine-Google; (+http://code.google.com/appengine)',
325 'Accept-Encoding': 'gzip',
327 if payload
is not None:
330 adjusted_headers
['Content-Length'] = str(len(payload
))
333 if method
== 'POST' and payload
:
334 adjusted_headers
['Content-Type'] = 'application/x-www-form-urlencoded'
336 passthrough_content_encoding
= False
337 for header
in headers
:
338 if header
.key().title().lower() == 'user-agent':
339 adjusted_headers
['User-Agent'] = (
341 (header
.value(), adjusted_headers
['User-Agent']))
343 if header
.key().lower() == 'accept-encoding':
344 passthrough_content_encoding
= True
345 adjusted_headers
[header
.key().title()] = header
.value()
347 if payload
is not None:
348 escaped_payload
= payload
.encode('string_escape')
351 logging
.debug('Making HTTP request: host = %r, '
352 'url = %r, payload = %.1000r, headers = %r',
353 host
, url
, escaped_payload
, adjusted_headers
)
355 if protocol
== 'http':
356 connection_class
= httplib
.HTTPConnection
357 elif protocol
== 'https':
358 if (validate_certificate
and _CanValidateCerts() and
361 connection_class
= fancy_urllib
.create_fancy_connection(
364 connection_class
= httplib
.HTTPSConnection
367 error_msg
= 'Redirect specified invalid protocol: "%s"' % protocol
368 logging
.error(error_msg
)
369 raise apiproxy_errors
.ApplicationError(
370 urlfetch_service_pb
.URLFetchServiceError
.INVALID_URL
, error_msg
)
377 if _CONNECTION_SUPPORTS_TIMEOUT
:
378 connection
= connection_class(host
, timeout
=deadline
)
380 connection
= connection_class(host
)
384 last_protocol
= protocol
388 full_path
= path
+ '?' + query
392 if not _CONNECTION_SUPPORTS_TIMEOUT
:
393 orig_timeout
= socket
.getdefaulttimeout()
395 if not _CONNECTION_SUPPORTS_TIMEOUT
:
398 socket
.setdefaulttimeout(deadline
)
399 connection
.request(method
, full_path
, payload
, adjusted_headers
)
400 http_response
= connection
.getresponse()
402 http_response_data
= ''
404 http_response_data
= http_response
.read()
406 if not _CONNECTION_SUPPORTS_TIMEOUT
:
407 socket
.setdefaulttimeout(orig_timeout
)
409 except _fancy_urllib_InvalidCertException
, e
:
410 raise apiproxy_errors
.ApplicationError(
411 urlfetch_service_pb
.URLFetchServiceError
.SSL_CERTIFICATE_ERROR
,
413 except _fancy_urllib_SSLError
, e
:
420 urlfetch_service_pb
.URLFetchServiceError
.DEADLINE_EXCEEDED
421 if 'timed out' in e
.message
else
422 urlfetch_service_pb
.URLFetchServiceError
.SSL_CERTIFICATE_ERROR
)
423 raise apiproxy_errors
.ApplicationError(app_error
, str(e
))
424 except socket
.timeout
, e
:
425 raise apiproxy_errors
.ApplicationError(
426 urlfetch_service_pb
.URLFetchServiceError
.DEADLINE_EXCEEDED
, str(e
))
427 except (httplib
.error
, socket
.error
, IOError), e
:
428 raise apiproxy_errors
.ApplicationError(
429 urlfetch_service_pb
.URLFetchServiceError
.FETCH_ERROR
, str(e
))
434 if http_response
.status
in REDIRECT_STATUSES
and follow_redirects
:
436 url
= http_response
.getheader('Location', None)
438 error_msg
= 'Redirecting response was missing "Location" header'
439 logging
.error(error_msg
)
440 raise apiproxy_errors
.ApplicationError(
441 urlfetch_service_pb
.URLFetchServiceError
.MALFORMED_REPLY
,
444 response
.set_statuscode(http_response
.status
)
445 if (http_response
.getheader('content-encoding') == 'gzip' and
446 not passthrough_content_encoding
):
447 gzip_stream
= StringIO
.StringIO(http_response_data
)
448 gzip_file
= gzip
.GzipFile(fileobj
=gzip_stream
)
449 http_response_data
= gzip_file
.read()
450 response
.set_content(http_response_data
[:MAX_RESPONSE_SIZE
])
453 for header_key
in http_response
.msg
.keys():
454 for header_value
in http_response
.msg
.getheaders(header_key
):
455 if (header_key
.lower() == 'content-encoding' and
456 header_value
== 'gzip' and
457 not passthrough_content_encoding
):
459 if header_key
.lower() == 'content-length' and method
!= 'HEAD':
460 header_value
= str(len(response
.content()))
461 header_proto
= response
.add_header()
462 header_proto
.set_key(header_key
)
463 header_proto
.set_value(header_value
)
465 if len(http_response_data
) > MAX_RESPONSE_SIZE
:
466 response
.set_contentwastruncated(True)
470 if request
.url() != url
:
471 response
.set_finalurl(url
)
476 error_msg
= 'Too many repeated redirects'
477 logging
.error(error_msg
)
478 raise apiproxy_errors
.ApplicationError(
479 urlfetch_service_pb
.URLFetchServiceError
.TOO_MANY_REDIRECTS
,
482 def _SanitizeHttpHeaders(self
, untrusted_headers
, headers
):
483 """Cleans "unsafe" headers from the HTTP request, in place.
486 untrusted_headers: Set of untrusted headers names (all lowercase).
487 headers: List of Header objects. The list is modified in place.
489 prohibited_headers
= [h
.key() for h
in headers
490 if h
.key().lower() in untrusted_headers
]
491 if prohibited_headers
:
492 logging
.warn('Stripped prohibited headers from URLFetch request: %s',
495 for index
in reversed(xrange(len(headers
))):
496 if headers
[index
].key().lower() in untrusted_headers
: