python/google/appengine/ext/cloudstorage/stub_dispatcher.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17 """Dispatcher to handle Google Cloud Storage stub requests."""
  18
  19 from __future__ import with_statement
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29 import httplib
  30 import re
  31 import threading
  32 import urllib
  33 import urlparse
  34 import xml.etree.ElementTree as ET
  35
  36 from google.appengine.api import apiproxy_stub_map
  37 from google.appengine.ext.cloudstorage import cloudstorage_stub
  38 from google.appengine.ext.cloudstorage import common
  39
  40
  41 BUCKET_ONLY_PATH = re.compile('(/[a-z0-9-_.]+)/?$')
  42
  43 GCS_STUB_LOCK = threading.RLock()
  44
  45
  46 class _FakeUrlFetchResult(object):
  47   def __init__(self, status, headers, content):
  48     self.status_code = status
  49     self.headers = headers
  50     self.content = content
  51
  52
  53 def dispatch(method, headers, url, payload):
  54   """Dispatches incoming request and returns response.
  55
  56   In dev appserver GCS requests are forwarded to this method via the /_ah/gcs
  57   endpoint. In unittest environment, this method is called instead of urlfetch.
  58   See https://developers.google.com/storage/docs/xml-api-overview for the
  59   exepected format for the request.
  60
  61   Args:
  62     method: A string represneting the HTTP request method.
  63     headers: A dict mapping HTTP header names to values.
  64     url: A string representing the request URL in the form of
  65         http://<host>/_ah/gcs/<bucket>/<object>.
  66     payload: A string containing the payload for the request.
  67
  68   Returns:
  69     A _FakeUrlFetchResult containing the HTTP status code, headers, and body of
  70     the response.
  71
  72   Raises:
  73     ValueError: invalid request method.
  74   """
  75   method, headers, filename, param_dict = _preprocess(method, headers, url)
  76   gcs_stub = cloudstorage_stub.CloudStorageStub(
  77       apiproxy_stub_map.apiproxy.GetStub('blobstore').storage)
  78
  79   with GCS_STUB_LOCK:
  80     if method == 'POST':
  81       return _handle_post(gcs_stub, filename, headers)
  82     elif method == 'PUT':
  83       return _handle_put(gcs_stub, filename, param_dict, headers, payload)
  84     elif method == 'GET':
  85       return _handle_get(gcs_stub, filename, param_dict, headers)
  86     elif method == 'HEAD':
  87       return _handle_head(gcs_stub, filename)
  88     elif method == 'DELETE':
  89       return _handle_delete(gcs_stub, filename)
  90     raise ValueError('Unrecognized request method %r.' % method,
  91                      httplib.METHOD_NOT_ALLOWED)
  92
  93
  94 def _preprocess(method, headers, url):
  95   """Unify input.
  96
  97   Example:
  98     _preprocess('POST', {'Content-Type': 'Foo'},
  99                 'http://localhost:8080/_ah/gcs/b/f?foo=bar')
 100     -> 'POST', {'content-type': 'Foo'}, '/b/f', {'foo':'bar'}
 101
 102   Args:
 103     method: HTTP method used by the request.
 104     headers: HTTP request headers in a dict.
 105     url: HTTP request url.
 106
 107   Returns:
 108     method: method in all upper case.
 109     headers: headers with keys in all lower case.
 110     filename: a google storage filename of form /bucket/filename or
 111       a bucket path of form /bucket
 112     param_dict: a dict of query parameters.
 113
 114   Raises:
 115     ValueError: invalid path.
 116   """
 117   _, _, path, query, _ = urlparse.urlsplit(url)
 118
 119   if not path.startswith(common.LOCAL_GCS_ENDPOINT):
 120     raise ValueError('Invalid GCS path: %s' % path, httplib.BAD_REQUEST)
 121
 122   filename = path[len(common.LOCAL_GCS_ENDPOINT):]
 123
 124
 125
 126   param_dict = urlparse.parse_qs(query, True)
 127   for k in param_dict:
 128     param_dict[k] = urllib.unquote(param_dict[k][0])
 129
 130   headers = dict((k.lower(), v) for k, v in headers.iteritems())
 131   return method, headers, urllib.unquote(filename), param_dict
 132
 133
 134 def _handle_post(gcs_stub, filename, headers):
 135   """Handle POST that starts object creation."""
 136   content_type = _ContentType(headers)
 137   token = gcs_stub.post_start_creation(filename, headers)
 138   response_headers = {
 139       'location': 'https://storage.googleapis.com/%s?%s' % (
 140           filename,
 141           urllib.urlencode({'upload_id': token})),
 142       'content-type': content_type.value,
 143       'content-length': 0
 144   }
 145   return _FakeUrlFetchResult(httplib.CREATED, response_headers, '')
 146
 147
 148 def _handle_put(gcs_stub, filename, param_dict, headers, payload):
 149   """Handle PUT."""
 150   if _iscopy(headers):
 151     return _copy(gcs_stub, filename, headers)
 152
 153
 154   token = _get_param('upload_id', param_dict)
 155   content_range = _ContentRange(headers)
 156
 157   if _is_query_progress(content_range):
 158     return _find_progress(gcs_stub, filename, token)
 159
 160   if not content_range.value:
 161     raise ValueError('Missing header content-range.', httplib.BAD_REQUEST)
 162
 163
 164
 165
 166   if (headers.get('x-goog-if-generation-match', None) == '0' and
 167       gcs_stub.head_object(filename) is not None):
 168     return _FakeUrlFetchResult(httplib.PRECONDITION_FAILED, {}, '')
 169
 170
 171
 172   if not token:
 173
 174     if content_range.length is None:
 175       raise ValueError('Content-Range must have a final length.',
 176                        httplib.BAD_REQUEST)
 177     elif not content_range.no_data and content_range.range[0] != 0:
 178       raise ValueError('Content-Range must specify complete object.',
 179                        httplib.BAD_REQUEST)
 180     else:
 181
 182       token = gcs_stub.post_start_creation(filename, headers)
 183
 184   try:
 185     gcs_stub.put_continue_creation(token,
 186                                    payload,
 187                                    content_range.range,
 188                                    content_range.length)
 189   except ValueError, e:
 190     return _FakeUrlFetchResult(e.args[1], {}, e.args[0])
 191
 192   if content_range.length is not None:
 193
 194
 195     response_headers = {
 196         'content-length': 0,
 197     }
 198     response_status = httplib.OK
 199   else:
 200     response_headers = {}
 201     response_status = 308
 202
 203   return _FakeUrlFetchResult(response_status, response_headers, '')
 204
 205
 206 def _is_query_progress(content_range):
 207   """Empty put to query upload status."""
 208   return content_range.no_data and content_range.length is None
 209
 210
 211 def _find_progress(gcs_stub, filename, token):
 212
 213   if gcs_stub.head_object(filename) is not None:
 214     return _FakeUrlFetchResult(httplib.OK, {}, '')
 215   last_offset = gcs_stub.put_empty(token)
 216   if last_offset == -1:
 217     return _FakeUrlFetchResult(308, {}, '')
 218   return _FakeUrlFetchResult(308, {'range': 'bytes=0-%s' % last_offset}, '')
 219
 220
 221 def _iscopy(headers):
 222   copysource = _XGoogCopySource(headers)
 223   return copysource.value is not None
 224
 225
 226 def _copy(gcs_stub, filename, headers):
 227   """Copy file.
 228
 229   Args:
 230     gcs_stub: an instance of gcs stub.
 231     filename: dst filename of format /bucket/filename
 232     headers: a dict of request headers. Must contain _XGoogCopySource header.
 233
 234   Returns:
 235     An _FakeUrlFetchResult instance.
 236   """
 237   source = _XGoogCopySource(headers).value
 238   result = _handle_head(gcs_stub, source)
 239   if result.status_code == httplib.NOT_FOUND:
 240     return result
 241   directive = headers.pop('x-goog-metadata-directive', 'COPY')
 242   if directive == 'REPLACE':
 243     gcs_stub.put_copy(source, filename, headers)
 244   else:
 245     gcs_stub.put_copy(source, filename, None)
 246   return _FakeUrlFetchResult(httplib.OK, {}, '')
 247
 248
 249 def _handle_get(gcs_stub, filename, param_dict, headers):
 250   """Handle GET object and GET bucket."""
 251   mo = re.match(BUCKET_ONLY_PATH, filename)
 252   if mo is not None:
 253
 254     return _handle_get_bucket(gcs_stub, mo.group(1), param_dict)
 255   else:
 256
 257     result = _handle_head(gcs_stub, filename)
 258     if result.status_code == httplib.NOT_FOUND:
 259       return result
 260
 261
 262
 263     start, end = _Range(headers).value
 264     st_size = result.headers['x-goog-stored-content-length']
 265     if end is not None:
 266       result.status_code = httplib.PARTIAL_CONTENT
 267       end = min(end, st_size - 1)
 268       result.headers['content-range'] = 'bytes %d-%d/%d' % (start, end, st_size)
 269
 270     result.content = gcs_stub.get_object(filename, start, end)
 271     result.headers['content-length'] = len(result.content)
 272     return result
 273
 274
 275 def _handle_get_bucket(gcs_stub, bucketpath, param_dict):
 276   """Handle get bucket request."""
 277   prefix = _get_param('prefix', param_dict, '')
 278
 279   max_keys = _get_param('max-keys', param_dict, common._MAX_GET_BUCKET_RESULT)
 280   marker = _get_param('marker', param_dict, '')
 281   delimiter = _get_param('delimiter', param_dict, '')
 282
 283   stats, last_filename, is_truncated = gcs_stub.get_bucket(
 284       bucketpath, prefix, marker, max_keys, delimiter)
 285
 286   builder = ET.TreeBuilder()
 287   builder.start('ListBucketResult', {'xmlns': common.CS_XML_NS})
 288   for stat in stats:
 289     filename = stat.filename[len(bucketpath) + 1:]
 290     if stat.is_dir:
 291       builder.start('CommonPrefixes', {})
 292       builder.start('Prefix', {})
 293       builder.data(filename)
 294       builder.end('Prefix')
 295       builder.end('CommonPrefixes')
 296     else:
 297       builder.start('Contents', {})
 298
 299       builder.start('Key', {})
 300       builder.data(filename)
 301       builder.end('Key')
 302
 303       builder.start('LastModified', {})
 304       builder.data(common.posix_to_dt_str(stat.st_ctime))
 305       builder.end('LastModified')
 306
 307       builder.start('ETag', {})
 308       builder.data(stat.etag)
 309       builder.end('ETag')
 310
 311       builder.start('Size', {})
 312       builder.data(str(stat.st_size))
 313       builder.end('Size')
 314
 315       builder.end('Contents')
 316
 317   if last_filename:
 318     builder.start('NextMarker', {})
 319     builder.data(last_filename[len(bucketpath) + 1:])
 320     builder.end('NextMarker')
 321
 322   builder.start('IsTruncated', {})
 323   builder.data(str(is_truncated))
 324   builder.end('IsTruncated')
 325
 326   max_keys = _get_param('max-keys', param_dict)
 327   if max_keys is not None:
 328     builder.start('MaxKeys', {})
 329     builder.data(str(max_keys))
 330     builder.end('MaxKeys')
 331
 332   builder.end('ListBucketResult')
 333   root = builder.close()
 334
 335   body = ET.tostring(root)
 336   response_headers = {'content-length': len(body),
 337                       'content-type': 'application/xml'}
 338   return _FakeUrlFetchResult(httplib.OK, response_headers, body)
 339
 340
 341 def _handle_head(gcs_stub, filename):
 342   """Handle HEAD request."""
 343   filestat = gcs_stub.head_object(filename)
 344   if not filestat:
 345     return _FakeUrlFetchResult(httplib.NOT_FOUND, {}, '')
 346
 347   http_time = common.posix_time_to_http(filestat.st_ctime)
 348
 349   response_headers = {
 350       'x-goog-stored-content-length': filestat.st_size,
 351       'content-length': 0,
 352       'content-type': filestat.content_type,
 353       'etag': filestat.etag,
 354       'last-modified': http_time
 355   }
 356
 357   if filestat.metadata:
 358     response_headers.update(filestat.metadata)
 359
 360   return _FakeUrlFetchResult(httplib.OK, response_headers, '')
 361
 362
 363 def _handle_delete(gcs_stub, filename):
 364   """Handle DELETE object."""
 365   if gcs_stub.delete_object(filename):
 366     return _FakeUrlFetchResult(httplib.NO_CONTENT, {}, '')
 367   else:
 368     return _FakeUrlFetchResult(httplib.NOT_FOUND, {}, '')
 369
 370
 371 class _Header(object):
 372   """Wrapper class for a header.
 373
 374   A subclass helps to parse a specific header.
 375   """
 376
 377   HEADER = ''
 378   DEFAULT = None
 379
 380   def __init__(self, headers):
 381     """Initialize.
 382
 383     Initializes self.value to the value in request header, or DEFAULT if
 384     not defined in headers.
 385
 386     Args:
 387       headers: request headers.
 388     """
 389     self.value = self.DEFAULT
 390     for k in headers:
 391       if k.lower() == self.HEADER.lower():
 392         self.value = headers[k]
 393         break
 394
 395
 396 class _XGoogCopySource(_Header):
 397   """x-goog-copy-source: /bucket/filename."""
 398
 399   HEADER = 'x-goog-copy-source'
 400
 401
 402 class _ContentType(_Header):
 403   """Content-type header."""
 404
 405   HEADER = 'Content-Type'
 406   DEFAULT = 'binary/octet-stream'
 407
 408
 409 class _ContentRange(_Header):
 410   """Content-Range header.
 411
 412   Used by resumable upload of unknown size. Possible formats:
 413     Content-Range: bytes 1-3/* (for uploading of unknown size)
 414     Content-Range: bytes */5 (for finalizing with no data)
 415   """
 416
 417   HEADER = 'Content-Range'
 418   RE_PATTERN = re.compile(r'^bytes (([0-9]+)-([0-9]+)|\*)/([0-9]+|\*)$')
 419
 420   def __init__(self, headers):
 421     super(_ContentRange, self).__init__(headers)
 422     if self.value:
 423       result = self.RE_PATTERN.match(self.value)
 424       if not result:
 425         raise ValueError('Invalid content-range header %s' % self.value,
 426                          httplib.BAD_REQUEST)
 427
 428       self.no_data = result.group(1) == '*'
 429       last = result.group(4) != '*'
 430       self.length = None
 431       if last:
 432         self.length = long(result.group(4))
 433
 434       self.range = None
 435       if not self.no_data:
 436         self.range = (long(result.group(2)), long(result.group(3)))
 437
 438
 439 class _Range(_Header):
 440   """_Range header.
 441
 442   Used by read. Format: Range: bytes=1-3.
 443   """
 444
 445   HEADER = 'Range'
 446
 447   def __init__(self, headers):
 448     super(_Range, self).__init__(headers)
 449     if self.value:
 450       start, end = self.value.rsplit('=', 1)[-1].split('-')
 451       start, end = long(start), long(end)
 452     else:
 453       start, end = 0, None
 454     self.value = start, end
 455
 456
 457 def _get_param(param, param_dict, default=None):
 458   """Gets a parameter value from request query parameters.
 459
 460   Args:
 461     param: name of the parameter to get.
 462     param_dict: a dict of request query parameters.
 463     default: default value if not defined.
 464
 465   Returns:
 466     Value of the parameter or default if not defined.
 467   """
 468   result = param_dict.get(param, default)
 469   if param in ['max-keys'] and result:
 470     return long(result)
 471   return result