App Engine Python SDK version 1.9.12
[gae.git] / python / google / appengine / ext / cloudstorage / stub_dispatcher.py
blob3d84853c86697a39b6d160c1be56d8c8420426dd
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Dispatcher to handle Google Cloud Storage stub requests."""
19 from __future__ import with_statement
29 import httplib
30 import re
31 import threading
32 import urllib
33 import urlparse
34 import xml.etree.ElementTree as ET
36 from google.appengine.api import apiproxy_stub_map
37 from google.appengine.ext.cloudstorage import cloudstorage_stub
38 from google.appengine.ext.cloudstorage import common
41 BUCKET_ONLY_PATH = re.compile('(/[a-z0-9-_.]+)/?$')
43 GCS_STUB_LOCK = threading.RLock()
46 class _FakeUrlFetchResult(object):
47 def __init__(self, status, headers, content):
48 self.status_code = status
49 self.headers = headers
50 self.content = content
53 def dispatch(method, headers, url, payload):
54 """Dispatches incoming request and returns response.
56 In dev appserver GCS requests are forwarded to this method via the /_ah/gcs
57 endpoint. In unittest environment, this method is called instead of urlfetch.
58 See https://developers.google.com/storage/docs/xml-api-overview for the
59 exepected format for the request.
61 Args:
62 method: A string represneting the HTTP request method.
63 headers: A dict mapping HTTP header names to values.
64 url: A string representing the request URL in the form of
65 http://<host>/_ah/gcs/<bucket>/<object>.
66 payload: A string containing the payload for the request.
68 Returns:
69 A _FakeUrlFetchResult containing the HTTP status code, headers, and body of
70 the response.
72 Raises:
73 ValueError: invalid request method.
74 """
75 method, headers, filename, param_dict = _preprocess(method, headers, url)
76 gcs_stub = cloudstorage_stub.CloudStorageStub(
77 apiproxy_stub_map.apiproxy.GetStub('blobstore').storage)
79 with GCS_STUB_LOCK:
80 if method == 'POST':
81 return _handle_post(gcs_stub, filename, headers)
82 elif method == 'PUT':
83 return _handle_put(gcs_stub, filename, param_dict, headers, payload)
84 elif method == 'GET':
85 return _handle_get(gcs_stub, filename, param_dict, headers)
86 elif method == 'HEAD':
87 return _handle_head(gcs_stub, filename)
88 elif method == 'DELETE':
89 return _handle_delete(gcs_stub, filename)
90 raise ValueError('Unrecognized request method %r.' % method,
91 httplib.METHOD_NOT_ALLOWED)
94 def _preprocess(method, headers, url):
95 """Unify input.
97 Example:
98 _preprocess('POST', {'Content-Type': 'Foo'},
99 'http://localhost:8080/_ah/gcs/b/f?foo=bar')
100 -> 'POST', {'content-type': 'Foo'}, '/b/f', {'foo':'bar'}
102 Args:
103 method: HTTP method used by the request.
104 headers: HTTP request headers in a dict.
105 url: HTTP request url.
107 Returns:
108 method: method in all upper case.
109 headers: headers with keys in all lower case.
110 filename: a google storage filename of form /bucket/filename or
111 a bucket path of form /bucket
112 param_dict: a dict of query parameters.
114 Raises:
115 ValueError: invalid path.
117 _, _, path, query, _ = urlparse.urlsplit(url)
119 if not path.startswith(common.LOCAL_GCS_ENDPOINT):
120 raise ValueError('Invalid GCS path: %s' % path, httplib.BAD_REQUEST)
122 filename = path[len(common.LOCAL_GCS_ENDPOINT):]
126 param_dict = urlparse.parse_qs(query, True)
127 for k in param_dict:
128 param_dict[k] = urllib.unquote(param_dict[k][0])
130 headers = dict((k.lower(), v) for k, v in headers.iteritems())
131 return method, headers, urllib.unquote(filename), param_dict
134 def _handle_post(gcs_stub, filename, headers):
135 """Handle POST that starts object creation."""
136 content_type = _ContentType(headers)
137 token = gcs_stub.post_start_creation(filename, headers)
138 response_headers = {
139 'location': 'https://storage.googleapis.com/%s?%s' % (
140 filename,
141 urllib.urlencode({'upload_id': token})),
142 'content-type': content_type.value,
143 'content-length': 0
145 return _FakeUrlFetchResult(httplib.CREATED, response_headers, '')
148 def _handle_put(gcs_stub, filename, param_dict, headers, payload):
149 """Handle PUT."""
150 if _iscopy(headers):
151 return _copy(gcs_stub, filename, headers)
154 token = _get_param('upload_id', param_dict)
155 content_range = _ContentRange(headers)
157 if _is_query_progress(content_range):
158 return _find_progress(gcs_stub, filename, token)
160 if not content_range.value:
161 raise ValueError('Missing header content-range.', httplib.BAD_REQUEST)
166 if (headers.get('x-goog-if-generation-match', None) == '0' and
167 gcs_stub.head_object(filename) is not None):
168 return _FakeUrlFetchResult(httplib.PRECONDITION_FAILED, {}, '')
172 if not token:
174 if content_range.length is None:
175 raise ValueError('Content-Range must have a final length.',
176 httplib.BAD_REQUEST)
177 elif not content_range.no_data and content_range.range[0] != 0:
178 raise ValueError('Content-Range must specify complete object.',
179 httplib.BAD_REQUEST)
180 else:
182 token = gcs_stub.post_start_creation(filename, headers)
184 try:
185 gcs_stub.put_continue_creation(token,
186 payload,
187 content_range.range,
188 content_range.length)
189 except ValueError, e:
190 return _FakeUrlFetchResult(e.args[1], {}, e.args[0])
192 if content_range.length is not None:
195 response_headers = {
196 'content-length': 0,
198 response_status = httplib.OK
199 else:
200 response_headers = {}
201 response_status = 308
203 return _FakeUrlFetchResult(response_status, response_headers, '')
206 def _is_query_progress(content_range):
207 """Empty put to query upload status."""
208 return content_range.no_data and content_range.length is None
211 def _find_progress(gcs_stub, filename, token):
213 if gcs_stub.head_object(filename) is not None:
214 return _FakeUrlFetchResult(httplib.OK, {}, '')
215 last_offset = gcs_stub.put_empty(token)
216 if last_offset == -1:
217 return _FakeUrlFetchResult(308, {}, '')
218 return _FakeUrlFetchResult(308, {'range': 'bytes=0-%s' % last_offset}, '')
221 def _iscopy(headers):
222 copysource = _XGoogCopySource(headers)
223 return copysource.value is not None
226 def _copy(gcs_stub, filename, headers):
227 """Copy file.
229 Args:
230 gcs_stub: an instance of gcs stub.
231 filename: dst filename of format /bucket/filename
232 headers: a dict of request headers. Must contain _XGoogCopySource header.
234 Returns:
235 An _FakeUrlFetchResult instance.
237 source = _XGoogCopySource(headers).value
238 result = _handle_head(gcs_stub, source)
239 if result.status_code == httplib.NOT_FOUND:
240 return result
241 directive = headers.pop('x-goog-metadata-directive', 'COPY')
242 if directive == 'REPLACE':
243 gcs_stub.put_copy(source, filename, headers)
244 else:
245 gcs_stub.put_copy(source, filename, None)
246 return _FakeUrlFetchResult(httplib.OK, {}, '')
249 def _handle_get(gcs_stub, filename, param_dict, headers):
250 """Handle GET object and GET bucket."""
251 mo = re.match(BUCKET_ONLY_PATH, filename)
252 if mo is not None:
254 return _handle_get_bucket(gcs_stub, mo.group(1), param_dict)
255 else:
257 result = _handle_head(gcs_stub, filename)
258 if result.status_code == httplib.NOT_FOUND:
259 return result
263 start, end = _Range(headers).value
264 st_size = result.headers['x-goog-stored-content-length']
265 if end is not None:
266 result.status_code = httplib.PARTIAL_CONTENT
267 end = min(end, st_size - 1)
268 result.headers['content-range'] = 'bytes %d-%d/%d' % (start, end, st_size)
270 result.content = gcs_stub.get_object(filename, start, end)
271 result.headers['content-length'] = len(result.content)
272 return result
275 def _handle_get_bucket(gcs_stub, bucketpath, param_dict):
276 """Handle get bucket request."""
277 prefix = _get_param('prefix', param_dict, '')
279 max_keys = _get_param('max-keys', param_dict, common._MAX_GET_BUCKET_RESULT)
280 marker = _get_param('marker', param_dict, '')
281 delimiter = _get_param('delimiter', param_dict, '')
283 stats, last_filename, is_truncated = gcs_stub.get_bucket(
284 bucketpath, prefix, marker, max_keys, delimiter)
286 builder = ET.TreeBuilder()
287 builder.start('ListBucketResult', {'xmlns': common.CS_XML_NS})
288 for stat in stats:
289 filename = stat.filename[len(bucketpath) + 1:]
290 if stat.is_dir:
291 builder.start('CommonPrefixes', {})
292 builder.start('Prefix', {})
293 builder.data(filename)
294 builder.end('Prefix')
295 builder.end('CommonPrefixes')
296 else:
297 builder.start('Contents', {})
299 builder.start('Key', {})
300 builder.data(filename)
301 builder.end('Key')
303 builder.start('LastModified', {})
304 builder.data(common.posix_to_dt_str(stat.st_ctime))
305 builder.end('LastModified')
307 builder.start('ETag', {})
308 builder.data(stat.etag)
309 builder.end('ETag')
311 builder.start('Size', {})
312 builder.data(str(stat.st_size))
313 builder.end('Size')
315 builder.end('Contents')
317 if last_filename:
318 builder.start('NextMarker', {})
319 builder.data(last_filename[len(bucketpath) + 1:])
320 builder.end('NextMarker')
322 builder.start('IsTruncated', {})
323 builder.data(str(is_truncated))
324 builder.end('IsTruncated')
326 max_keys = _get_param('max-keys', param_dict)
327 if max_keys is not None:
328 builder.start('MaxKeys', {})
329 builder.data(str(max_keys))
330 builder.end('MaxKeys')
332 builder.end('ListBucketResult')
333 root = builder.close()
335 body = ET.tostring(root)
336 response_headers = {'content-length': len(body),
337 'content-type': 'application/xml'}
338 return _FakeUrlFetchResult(httplib.OK, response_headers, body)
341 def _handle_head(gcs_stub, filename):
342 """Handle HEAD request."""
343 filestat = gcs_stub.head_object(filename)
344 if not filestat:
345 return _FakeUrlFetchResult(httplib.NOT_FOUND, {}, '')
347 http_time = common.posix_time_to_http(filestat.st_ctime)
349 response_headers = {
350 'x-goog-stored-content-length': filestat.st_size,
351 'content-length': 0,
352 'content-type': filestat.content_type,
353 'etag': filestat.etag,
354 'last-modified': http_time
357 if filestat.metadata:
358 response_headers.update(filestat.metadata)
360 return _FakeUrlFetchResult(httplib.OK, response_headers, '')
363 def _handle_delete(gcs_stub, filename):
364 """Handle DELETE object."""
365 if gcs_stub.delete_object(filename):
366 return _FakeUrlFetchResult(httplib.NO_CONTENT, {}, '')
367 else:
368 return _FakeUrlFetchResult(httplib.NOT_FOUND, {}, '')
371 class _Header(object):
372 """Wrapper class for a header.
374 A subclass helps to parse a specific header.
377 HEADER = ''
378 DEFAULT = None
380 def __init__(self, headers):
381 """Initialize.
383 Initializes self.value to the value in request header, or DEFAULT if
384 not defined in headers.
386 Args:
387 headers: request headers.
389 self.value = self.DEFAULT
390 for k in headers:
391 if k.lower() == self.HEADER.lower():
392 self.value = headers[k]
393 break
396 class _XGoogCopySource(_Header):
397 """x-goog-copy-source: /bucket/filename."""
399 HEADER = 'x-goog-copy-source'
402 class _ContentType(_Header):
403 """Content-type header."""
405 HEADER = 'Content-Type'
406 DEFAULT = 'binary/octet-stream'
409 class _ContentRange(_Header):
410 """Content-Range header.
412 Used by resumable upload of unknown size. Possible formats:
413 Content-Range: bytes 1-3/* (for uploading of unknown size)
414 Content-Range: bytes */5 (for finalizing with no data)
417 HEADER = 'Content-Range'
418 RE_PATTERN = re.compile(r'^bytes (([0-9]+)-([0-9]+)|\*)/([0-9]+|\*)$')
420 def __init__(self, headers):
421 super(_ContentRange, self).__init__(headers)
422 if self.value:
423 result = self.RE_PATTERN.match(self.value)
424 if not result:
425 raise ValueError('Invalid content-range header %s' % self.value,
426 httplib.BAD_REQUEST)
428 self.no_data = result.group(1) == '*'
429 last = result.group(4) != '*'
430 self.length = None
431 if last:
432 self.length = long(result.group(4))
434 self.range = None
435 if not self.no_data:
436 self.range = (long(result.group(2)), long(result.group(3)))
439 class _Range(_Header):
440 """_Range header.
442 Used by read. Format: Range: bytes=1-3.
445 HEADER = 'Range'
447 def __init__(self, headers):
448 super(_Range, self).__init__(headers)
449 if self.value:
450 start, end = self.value.rsplit('=', 1)[-1].split('-')
451 start, end = long(start), long(end)
452 else:
453 start, end = 0, None
454 self.value = start, end
457 def _get_param(param, param_dict, default=None):
458 """Gets a parameter value from request query parameters.
460 Args:
461 param: name of the parameter to get.
462 param_dict: a dict of request query parameters.
463 default: default value if not defined.
465 Returns:
466 Value of the parameter or default if not defined.
468 result = param_dict.get(param, default)
469 if param in ['max-keys'] and result:
470 return long(result)
471 return result