1.9.30 sync.
[gae.git] / python / google / appengine / tools / dev_appserver_upload.py
blob0bb1b60225ddfd3b7924116431aae3142e3159c7
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Helper CGI for POST uploads.
19 Utility library contains the main logic behind simulating the blobstore
20 uploading mechanism.
22 Contents:
23 GenerateBlobKey: Function for generation unique blob-keys.
24 UploadCGIHandler: Main CGI handler class for post uploads.
25 """
29 import base64
30 import cStringIO
31 import datetime
32 import random
33 import time
34 import hashlib
36 from google.appengine.api import datastore
37 from google.appengine.api import datastore_errors
38 from google.appengine.api.blobstore import blobstore
42 try:
43 from email.mime import base
44 from email.mime import multipart
45 from email import generator
46 except ImportError:
47 from email import Generator as generator
48 from email import MIMEBase as base
49 from email import MIMEMultipart as multipart
54 STRIPPED_HEADERS = frozenset(('content-length',
55 'content-md5',
56 'content-type',
61 MAX_STRING_NAME_LENGTH = 500
64 class Error(Exception):
65 """Base class for upload processing errors."""
68 class InvalidMIMETypeFormatError(Error):
69 """MIME type was formatted incorrectly."""
72 class UploadEntityTooLargeError(Error):
73 """Entity being uploaded exceeded the allowed size."""
76 class FilenameOrContentTypeTooLargeError(Error):
77 """The filename or content type exceeded the allowed size."""
79 def __init__(self, invalid_field):
80 Error.__init__(self,
81 'The %s exceeds the maximum allowed length of %s.' % (
82 invalid_field, MAX_STRING_NAME_LENGTH))
85 class InvalidMetadataError(Error):
86 """The filename or content type of the entity was not a valid UTF-8 string."""
89 def GenerateBlobKey(time_func=time.time, random_func=random.random):
90 """Generate a unique BlobKey.
92 BlobKey is generated using the current time stamp combined with a random
93 number. The two values are subject to an md5 digest and base64 url-safe
94 encoded. The new key is checked against the possibility of existence within
95 the datastore and the random number is regenerated until there is no match.
97 Args:
98 time_func: Function used for generating the timestamp. Used for
99 dependency injection. Allows for predictable results during tests.
100 Must return a floating point UTC timestamp.
101 random_func: Function used for generating the random number. Used for
102 dependency injection. Allows for predictable results during tests.
104 Returns:
105 String version of BlobKey that is unique within the BlobInfo datastore.
106 None if there are too many name conflicts.
108 timestamp = str(time_func())
109 tries = 0
110 while tries < 10:
111 number = str(random_func())
112 digester = hashlib.md5()
113 digester.update(timestamp)
114 digester.update(number)
115 blob_key = base64.urlsafe_b64encode(digester.digest())
116 datastore_key = datastore.Key.from_path(blobstore.BLOB_INFO_KIND,
117 blob_key,
118 namespace='')
119 try:
120 datastore.Get(datastore_key)
121 tries += 1
122 except datastore_errors.EntityNotFoundError:
123 return blob_key
124 return None
127 def _SplitMIMEType(mime_type):
128 """Split MIME-type in to main and sub type.
130 Args:
131 mime_type: full MIME type string.
133 Returns:
134 (main, sub):
135 main: Main part of mime type (application, image, text, etc).
136 sub: Subtype part of mime type (pdf, png, html, etc).
138 Raises:
139 InvalidMIMETypeFormatError: If form item has incorrectly formatted MIME
140 type.
142 if mime_type:
143 mime_type_array = mime_type.split('/')
145 if len(mime_type_array) == 1:
146 raise InvalidMIMETypeFormatError('Missing MIME sub-type.')
147 elif len(mime_type_array) == 2:
148 main_type, sub_type = mime_type_array
149 if not(main_type and sub_type):
150 raise InvalidMIMETypeFormatError(
151 'Incorrectly formatted MIME type: %s' % mime_type)
152 return main_type, sub_type
153 else:
154 raise InvalidMIMETypeFormatError(
155 'Incorrectly formatted MIME type: %s' % mime_type)
156 else:
157 return 'application', 'octet-stream'
160 class UploadCGIHandler(object):
161 """Class used for handling an upload post.
163 The main interface to this class is the UploadCGI method. This will receive
164 the upload form, store the blobs contained in the post and rewrite the blobs
165 to contain BlobKeys instead of blobs.
168 def __init__(self,
169 blob_storage,
170 generate_blob_key=GenerateBlobKey,
171 now_func=datetime.datetime.now):
172 """Constructor.
174 Args:
175 blob_storage: BlobStorage instance where actual blobs are stored.
176 generate_blob_key: Function used for generating unique blob keys.
177 now_func: Function that returns the current timestamp.
179 self.__blob_storage = blob_storage
180 self.__generate_blob_key = generate_blob_key
181 self.__now_func = now_func
183 def StoreBlob(self, form_item, creation):
184 """Store form-item to blob storage.
186 Args:
187 form_item: FieldStorage instance that represents a specific form field.
188 This instance should have a non-empty filename attribute, meaning that
189 it is an uploaded blob rather than a normal form field.
190 creation: Timestamp to associate with new blobs creation time. This
191 parameter is provided so that all blobs in the same upload form can have
192 the same creation date.
194 Returns:
195 datastore.Entity('__BlobInfo__') associated with the upload.
197 main_type, sub_type = _SplitMIMEType(form_item.type)
199 blob_key = self.__generate_blob_key()
200 blob_file = form_item.file
201 if 'Content-Transfer-Encoding' in form_item.headers:
202 if form_item.headers['Content-Transfer-Encoding'] == 'base64':
203 blob_file = cStringIO.StringIO(
204 base64.urlsafe_b64decode(blob_file.read()))
205 self.__blob_storage.StoreBlob(blob_key, blob_file)
206 content_type_formatter = base.MIMEBase(main_type, sub_type,
207 **form_item.type_options)
209 blob_entity = datastore.Entity('__BlobInfo__',
210 name=str(blob_key),
211 namespace='')
212 try:
213 blob_entity['content_type'] = (
214 content_type_formatter['content-type'].decode('utf-8'))
215 blob_entity['creation'] = creation
216 blob_entity['filename'] = form_item.filename.decode('utf-8')
217 except UnicodeDecodeError:
218 raise InvalidMetadataError(
219 'The uploaded entity contained invalid UTF-8 metadata. This may be '
220 'because the page containing the upload form was served with a '
221 'charset other than "utf-8".')
223 blob_file.seek(0)
224 digester = hashlib.md5()
225 while True:
226 block = blob_file.read(1 << 20)
227 if not block:
228 break
229 digester.update(block)
231 blob_entity['md5_hash'] = digester.hexdigest()
232 blob_entity['size'] = blob_file.tell()
233 blob_file.seek(0)
235 datastore.Put(blob_entity)
236 return blob_entity
238 def _GenerateMIMEMessage(self,
239 form,
240 boundary=None,
241 max_bytes_per_blob=None,
242 max_bytes_total=None,
243 bucket_name=None):
244 """Generate a new post from original form.
246 Also responsible for storing blobs in the datastore.
248 Args:
249 form: Instance of cgi.FieldStorage representing the whole form
250 derived from original post data.
251 boundary: Boundary to use for resulting form. Used only in tests so
252 that the boundary is always consistent.
253 max_bytes_per_blob: The maximum size in bytes that any single blob
254 in the form is allowed to be.
255 max_bytes_total: The maximum size in bytes that the total of all blobs
256 in the form is allowed to be.
257 bucket_name: The name of the Google Storage bucket to uplad the file.
259 Returns:
260 A MIMEMultipart instance representing the new HTTP post which should be
261 forwarded to the developers actual CGI handler. DO NOT use the return
262 value of this method to generate a string unless you know what you're
263 doing and properly handle folding whitespace (from rfc822) properly.
265 Raises:
266 UploadEntityTooLargeError: The upload exceeds either the
267 max_bytes_per_blob or max_bytes_total limits.
268 FilenameOrContentTypeTooLargeError: The filename or the content_type of
269 the upload is larger than the allowed size for a string type in the
270 datastore.
272 message = multipart.MIMEMultipart('form-data', boundary)
273 for name, value in form.headers.items():
274 if name.lower() not in STRIPPED_HEADERS:
275 message.add_header(name, value)
277 def IterateForm():
278 """Flattens form in to single sequence of cgi.FieldStorage instances.
280 The resulting cgi.FieldStorage objects are a little bit irregular in
281 their structure. A single name can have mulitple sub-items. In this
282 case, the root FieldStorage object has a list associated with that field
283 name. Otherwise, the root FieldStorage object just refers to a single
284 nested instance.
286 Lists of FieldStorage instances occur when a form has multiple values
287 for the same name.
289 Yields:
290 cgi.FieldStorage irrespective of their nesting level.
294 for key in sorted(form):
295 form_item = form[key]
296 if isinstance(form_item, list):
297 for list_item in form_item:
298 yield list_item
299 else:
300 yield form_item
302 creation = self.__now_func()
303 total_bytes_uploaded = 0
304 created_blobs = []
305 upload_too_large = False
306 filename_too_large = False
307 content_type_too_large = False
309 for form_item in IterateForm():
318 disposition_parameters = {'name': form_item.name}
320 if form_item.filename is None:
322 variable = base.MIMEBase('text', 'plain')
323 variable.set_payload(form_item.value)
324 else:
328 if not form_item.filename:
329 continue
331 disposition_parameters['filename'] = form_item.filename
333 main_type, sub_type = _SplitMIMEType(form_item.type)
336 form_item.file.seek(0, 2)
337 content_length = form_item.file.tell()
338 form_item.file.seek(0)
340 total_bytes_uploaded += content_length
342 if max_bytes_per_blob is not None:
343 if max_bytes_per_blob < content_length:
344 upload_too_large = True
345 break
346 if max_bytes_total is not None:
347 if max_bytes_total < total_bytes_uploaded:
348 upload_too_large = True
349 break
350 if form_item.filename is not None:
351 if MAX_STRING_NAME_LENGTH < len(form_item.filename):
352 filename_too_large = True
353 break
354 if form_item.type is not None:
355 if MAX_STRING_NAME_LENGTH < len(form_item.type):
356 content_type_too_large = True
357 break
360 blob_entity = self.StoreBlob(form_item, creation)
363 created_blobs.append(blob_entity)
365 variable = base.MIMEBase('message',
366 'external-body',
367 access_type=blobstore.BLOB_KEY_HEADER,
368 blob_key=blob_entity.key().name())
371 form_item.file.seek(0)
372 digester = hashlib.md5()
373 while True:
374 block = form_item.file.read(1 << 20)
375 if not block:
376 break
377 digester.update(block)
379 blob_key = base64.urlsafe_b64encode(digester.hexdigest())
380 form_item.file.seek(0)
382 external = base.MIMEBase(main_type,
383 sub_type,
384 **form_item.type_options)
385 headers = dict(form_item.headers)
386 headers['Content-Length'] = str(content_length)
387 headers[blobstore.UPLOAD_INFO_CREATION_HEADER] = (
388 blobstore._format_creation(creation))
389 if bucket_name:
390 headers[blobstore.CLOUD_STORAGE_OBJECT_HEADER] = (
391 '/gs/%s/fake-%s-%s' % (bucket_name, blob_entity.key().name(),
392 blob_key))
393 headers['Content-MD5'] = blob_key
394 for key, value in headers.iteritems():
395 external.add_header(key, value)
398 external_disposition_parameters = dict(disposition_parameters)
401 external_disposition_parameters['filename'] = form_item.filename
402 if not external.get('Content-Disposition'):
403 external.add_header('Content-Disposition',
404 'form-data',
405 **external_disposition_parameters)
406 variable.set_payload([external])
409 variable.add_header('Content-Disposition',
410 'form-data',
411 **disposition_parameters)
412 message.attach(variable)
414 if upload_too_large or filename_too_large or content_type_too_large:
415 for blob in created_blobs:
416 datastore.Delete(blob)
417 if upload_too_large:
418 raise UploadEntityTooLargeError()
419 elif filename_too_large:
420 raise FilenameOrContentTypeTooLargeError('filename')
421 else:
422 raise FilenameOrContentTypeTooLargeError('content-type')
424 return message
426 def GenerateMIMEMessageString(self,
427 form,
428 boundary=None,
429 max_bytes_per_blob=None,
430 max_bytes_total=None,
431 bucket_name=None):
432 """Generate a new post string from original form.
434 Args:
435 form: Instance of cgi.FieldStorage representing the whole form
436 derived from original post data.
437 boundary: Boundary to use for resulting form. Used only in tests so
438 that the boundary is always consistent.
439 max_bytes_per_blob: The maximum size in bytes that any single blob
440 in the form is allowed to be.
441 max_bytes_total: The maximum size in bytes that the total of all blobs
442 in the form is allowed to be.
443 bucket_name: The name of the Google Storage bucket to uplad the file.
445 Returns:
446 A string rendering of a MIMEMultipart instance.
448 message = self._GenerateMIMEMessage(form,
449 boundary=boundary,
450 max_bytes_per_blob=max_bytes_per_blob,
451 max_bytes_total=max_bytes_total,
452 bucket_name=bucket_name)
453 message_out = cStringIO.StringIO()
454 gen = generator.Generator(message_out, maxheaderlen=0)
455 gen.flatten(message, unixfrom=False)
456 return message_out.getvalue()