3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Stub for Google storage."""
27 from google
.appengine
.api
import datastore
28 from google
.appengine
.api
import namespace_manager
29 from google
.appengine
.api
.blobstore
import blobstore_stub
30 from google
.appengine
.ext
import db
31 from google
.appengine
.ext
.cloudstorage
import common
34 class _AE_GCSFileInfo_(db
.Model
):
35 """Store GCS specific info.
37 GCS allows user to define arbitrary metadata via header x-goog-meta-foo: bar.
38 These headers are returned when user does a GET or HEAD on the object.
43 filename
= db
.StringProperty(required
=True)
44 finalized
= db
.BooleanProperty(required
=True)
49 raw_options
= db
.StringListProperty()
52 size
= db
.IntegerProperty()
54 creation
= db
.DateTimeProperty()
56 content_type
= db
.StringProperty()
57 etag
= db
.ByteStringProperty()
59 def get_options(self
):
60 return dict(o
.split(':', 1) for o
in self
.raw_options
)
62 def set_options(self
, options_dict
):
64 '%s:%s' % (k
.lower(), v
) for k
, v
in options_dict
.iteritems()]
65 if 'content-type' in options_dict
:
66 self
.content_type
= options_dict
['content-type']
69 options
= property(get_options
, set_options
)
74 return blobstore_stub
._GS
_INFO
_KIND
77 class _AE_GCSPartialFile_(db
.Model
):
78 """Store partial content for uploading files."""
85 end
= db
.IntegerProperty(required
=True)
87 partial_content
= db
.TextProperty(required
=True)
90 class CloudStorageStub(object):
91 """Google Cloud Storage stub implementation.
93 We use blobstore stub to store files. All metadata are stored
96 Note: this Google Cloud Storage stub is designed to work with
97 apphosting.ext.cloudstorage.storage_api.py.
98 It only implements the part of GCS storage_api.py uses, and its interface
102 def __init__(self
, blob_storage
):
107 apphosting.api.blobstore.blobstore_stub.BlobStorage instance
109 self
.blob_storage
= blob_storage
111 def _filename_to_blobkey(self
, filename
):
112 """Get blobkey for filename.
115 filename: gcs filename of form /bucket/filename.
118 blobinfo's datastore's key name, aka, blobkey.
120 common
.validate_file_path(filename
)
122 return blobstore_stub
.BlobstoreServiceStub
.CreateEncodedGoogleStorageKey(
125 @db.non_transactional
126 def post_start_creation(self
, filename
, options
):
127 """Start object creation with a POST.
129 This implements the resumable upload XML API.
132 filename: gcs filename of form /bucket/filename.
133 options: a dict containing all user specified request headers.
134 e.g. {'content-type': 'foo', 'x-goog-meta-bar': 'bar'}.
137 a token (blobkey) used for continuing upload.
139 ns
= namespace_manager
.get_namespace()
141 namespace_manager
.set_namespace('')
142 common
.validate_file_path(filename
)
143 token
= self
._filename
_to
_blobkey
(filename
)
144 gcs_file
= _AE_GCSFileInfo_
.get_by_key_name(token
)
146 self
._cleanup
_old
_file
(gcs_file
)
147 new_file
= _AE_GCSFileInfo_(key_name
=token
,
150 new_file
.options
= options
154 namespace_manager
.set_namespace(ns
)
157 @db.non_transactional
158 def _cleanup_old_file(self
, gcs_file
):
159 """Clean up the old version of a file.
161 The old version may or may not be finalized yet. Either way,
162 when user tries to create a file that already exists, we delete the
166 gcs_file: an instance of _AE_GCSFileInfo_.
169 if gcs_file
.finalized
:
170 blobkey
= gcs_file
.key().name()
171 self
.blob_storage
.DeleteBlob(blobkey
)
173 db
.delete(_AE_GCSPartialFile_
.all().ancestor(gcs_file
))
176 @db.non_transactional
177 def put_continue_creation(self
, token
, content
, content_range
,
179 _upload_filename
=None):
180 """Continue object upload with PUTs.
182 This implements the resumable upload XML API.
185 token: upload token returned by post_start_creation.
186 content: object content.
187 content_range: a (start, end) tuple specifying the content range of this
188 chunk. Both are inclusive according to XML API.
189 last: True if this is the last chunk of file content.
190 _upload_filename: internal use. Might be removed any time! This is
191 used by blobstore to pass in the upload filename from user.
194 _AE_GCSFileInfo entity for this file if the file is finalized.
197 ValueError: if token is invalid.
199 ns
= namespace_manager
.get_namespace()
201 namespace_manager
.set_namespace('')
202 gcs_file
= _AE_GCSFileInfo_
.get_by_key_name(token
)
204 raise ValueError('Invalid token')
206 start
, end
= content_range
207 if len(content
) != (end
- start
+ 1):
208 raise ValueError('Invalid content range %d-%d' % content_range
)
209 blobkey
= '%s-%d-%d' % (token
, content_range
[0], content_range
[1])
210 self
.blob_storage
.StoreBlob(blobkey
, StringIO
.StringIO(content
))
211 new_content
= _AE_GCSPartialFile_(parent
=gcs_file
,
213 key_name
='%020d' % start
,
214 partial_content
=blobkey
,
219 return self
._end
_creation
(token
, _upload_filename
)
221 namespace_manager
.set_namespace(ns
)
223 @db.non_transactional
224 def put_copy(self
, src
, dst
):
225 """Copy file from src to dst.
230 src: /bucket/filename. This file must exist.
231 dst: /bucket/filename
233 common
.validate_file_path(src
)
234 common
.validate_file_path(dst
)
237 src_blobkey
= self
._filename
_to
_blobkey
(src
)
238 source
= _AE_GCSFileInfo_
.get_by_key_name(src_blobkey
)
239 ns
= namespace_manager
.get_namespace()
241 namespace_manager
.set_namespace('')
242 token
= self
._filename
_to
_blobkey
(dst
)
243 new_file
= _AE_GCSFileInfo_(key_name
=token
,
246 new_file
.options
= source
.options
247 new_file
.etag
= source
.etag
248 new_file
.size
= source
.size
249 new_file
.creation
= datetime
.datetime
.utcnow()
252 namespace_manager
.set_namespace(ns
)
255 local_file
= self
.blob_storage
.OpenBlob(src_blobkey
)
256 self
.blob_storage
.StoreBlob(token
, local_file
)
258 @db.non_transactional
259 def _end_creation(self
, token
, _upload_filename
):
260 """End object upload.
263 token: upload token returned by post_start_creation.
266 _AE_GCSFileInfo Entity for this file.
269 ValueError: if token is invalid. Or file is corrupted during upload.
271 Save file content to blobstore. Save blobinfo and _AE_GCSFileInfo.
273 gcs_file
= _AE_GCSFileInfo_
.get_by_key_name(token
)
275 raise ValueError('Invalid token')
277 error_msg
, content
= self
._get
_content
(gcs_file
)
279 raise ValueError(error_msg
)
281 gcs_file
.etag
= hashlib
.md5(content
).hexdigest()
282 gcs_file
.creation
= datetime
.datetime
.utcnow()
283 gcs_file
.size
= len(content
)
287 blob_info
= datastore
.Entity('__BlobInfo__', name
=str(token
), namespace
='')
288 blob_info
['content_type'] = gcs_file
.content_type
289 blob_info
['creation'] = gcs_file
.creation
290 blob_info
['filename'] = _upload_filename
291 blob_info
['md5_hash'] = gcs_file
.etag
292 blob_info
['size'] = gcs_file
.size
293 datastore
.Put(blob_info
)
295 self
.blob_storage
.StoreBlob(token
, StringIO
.StringIO(content
))
297 gcs_file
.finalized
= True
301 @db.transactional(propagation
=db
.INDEPENDENT
)
302 def _get_content(self
, gcs_file
):
303 """Aggregate all partial content of the gcs_file.
306 gcs_file: an instance of _AE_GCSFileInfo_.
309 (error_msg, content) tuple. error_msg is set if the file is
310 corrupted during upload. Otherwise content is set to the
311 aggregation of all partial contents.
316 for partial
in (_AE_GCSPartialFile_
.all(namespace
='').ancestor(gcs_file
).
318 start
= int(partial
.key().name())
320 if start
< previous_end
:
321 error_msg
= 'File is corrupted due to missing chunks.'
322 elif start
> previous_end
:
323 error_msg
= 'File is corrupted due to overlapping chunks'
324 previous_end
= partial
.end
325 content
+= self
.blob_storage
.OpenBlob(partial
.partial_content
).read()
326 self
.blob_storage
.DeleteBlob(partial
.partial_content
)
331 return error_msg
, content
333 @db.non_transactional
339 """Get bucket listing with a GET.
342 bucketpath: gcs bucket path of form '/bucket'
343 prefix: prefix to limit listing.
344 marker: a str after which to start listing.
345 max_keys: max size of listing.
347 See https://developers.google.com/storage/docs/reference-methods#getbucket
351 A list of GCSFileStat sorted by filename.
353 common
.validate_bucket_path(bucketpath
)
354 q
= _AE_GCSFileInfo_
.all(namespace
='')
355 fully_qualified_prefix
= '/'.join([bucketpath
, prefix
])
357 q
.filter('filename >', '/'.join([bucketpath
, marker
]))
359 q
.filter('filename >=', fully_qualified_prefix
)
361 for info
in q
.run(limit
=max_keys
):
362 if not info
.filename
.startswith(fully_qualified_prefix
):
365 info
= db
.get(info
.key())
367 result
.append(common
.GCSFileStat(
368 filename
=info
.filename
,
370 st_ctime
=calendar
.timegm(info
.creation
.utctimetuple()),
374 @db.non_transactional
375 def get_object(self
, filename
, start
=0, end
=None):
376 """Get file content with a GET.
379 filename: gcs filename of form '/bucket/filename'.
380 start: start offset to request. Inclusive.
381 end: end offset to request. Inclusive.
384 The segment of file content requested.
387 ValueError: if file doesn't exist.
389 common
.validate_file_path(filename
)
390 blobkey
= self
._filename
_to
_blobkey
(filename
)
391 key
= blobstore_stub
.BlobstoreServiceStub
.ToDatastoreBlobKey(blobkey
)
392 gcsfileinfo
= db
.get(key
)
393 if not gcsfileinfo
or not gcsfileinfo
.finalized
:
394 raise ValueError('File does not exist.')
395 local_file
= self
.blob_storage
.OpenBlob(blobkey
)
396 local_file
.seek(start
)
398 return local_file
.read(end
- start
+ 1)
400 return local_file
.read()
402 @db.non_transactional
403 def head_object(self
, filename
):
404 """Get file stat with a HEAD.
407 filename: gcs filename of form '/bucket/filename'
410 A GCSFileStat object containing file stat. None if file doesn't exist.
412 common
.validate_file_path(filename
)
413 blobkey
= self
._filename
_to
_blobkey
(filename
)
414 key
= blobstore_stub
.BlobstoreServiceStub
.ToDatastoreBlobKey(blobkey
)
416 if info
and info
.finalized
:
417 metadata
= common
.get_metadata(info
.options
)
418 filestat
= common
.GCSFileStat(
419 filename
=info
.filename
,
422 st_ctime
=calendar
.timegm(info
.creation
.utctimetuple()),
423 content_type
=info
.content_type
,
428 @db.non_transactional
429 def delete_object(self
, filename
):
430 """Delete file with a DELETE.
433 filename: gcs filename of form '/bucket/filename'
436 True if file is deleted. False if file doesn't exist.
438 common
.validate_file_path(filename
)
439 blobkey
= self
._filename
_to
_blobkey
(filename
)
440 key
= blobstore_stub
.BlobstoreServiceStub
.ToDatastoreBlobKey(blobkey
)
441 gcsfileinfo
= db
.get(key
)
445 blobstore_stub
.BlobstoreServiceStub
.DeleteBlob(blobkey
, self
.blob_storage
)