3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Helper CGI for POST uploads.
19 Utility library contains the main logic behind simulating the blobstore
23 GenerateBlobKey: Function for generation unique blob-keys.
24 UploadCGIHandler: Main CGI handler class for post uploads.
36 from google
.appengine
.api
import datastore
37 from google
.appengine
.api
import datastore_errors
38 from google
.appengine
.api
.blobstore
import blobstore
43 from email
.mime
import base
44 from email
.mime
import multipart
45 from email
import generator
47 from email
import Generator
as generator
48 from email
import MIMEBase
as base
49 from email
import MIMEMultipart
as multipart
54 STRIPPED_HEADERS
= frozenset(('content-length',
61 MAX_STRING_NAME_LENGTH
= 500
64 class Error(Exception):
65 """Base class for upload processing errors."""
68 class InvalidMIMETypeFormatError(Error
):
69 """MIME type was formatted incorrectly."""
72 class UploadEntityTooLargeError(Error
):
73 """Entity being uploaded exceeded the allowed size."""
76 class FilenameOrContentTypeTooLargeError(Error
):
77 """The filename or content type exceeded the allowed size."""
79 def __init__(self
, invalid_field
):
81 'The %s exceeds the maximum allowed length of %s.' % (
82 invalid_field
, MAX_STRING_NAME_LENGTH
))
85 class InvalidMetadataError(Error
):
86 """The filename or content type of the entity was not a valid UTF-8 string."""
89 def GenerateBlobKey(time_func
=time
.time
, random_func
=random
.random
):
90 """Generate a unique BlobKey.
92 BlobKey is generated using the current time stamp combined with a random
93 number. The two values are subject to an md5 digest and base64 url-safe
94 encoded. The new key is checked against the possibility of existence within
95 the datastore and the random number is regenerated until there is no match.
98 time_func: Function used for generating the timestamp. Used for
99 dependency injection. Allows for predictable results during tests.
100 Must return a floating point UTC timestamp.
101 random_func: Function used for generating the random number. Used for
102 dependency injection. Allows for predictable results during tests.
105 String version of BlobKey that is unique within the BlobInfo datastore.
106 None if there are too many name conflicts.
108 timestamp
= str(time_func())
111 number
= str(random_func())
112 digester
= hashlib
.md5()
113 digester
.update(timestamp
)
114 digester
.update(number
)
115 blob_key
= base64
.urlsafe_b64encode(digester
.digest())
116 datastore_key
= datastore
.Key
.from_path(blobstore
.BLOB_INFO_KIND
,
120 datastore
.Get(datastore_key
)
122 except datastore_errors
.EntityNotFoundError
:
127 def _SplitMIMEType(mime_type
):
128 """Split MIME-type in to main and sub type.
131 mime_type: full MIME type string.
135 main: Main part of mime type (application, image, text, etc).
136 sub: Subtype part of mime type (pdf, png, html, etc).
139 InvalidMIMETypeFormatError: If form item has incorrectly formatted MIME
143 mime_type_array
= mime_type
.split('/')
145 if len(mime_type_array
) == 1:
146 raise InvalidMIMETypeFormatError('Missing MIME sub-type.')
147 elif len(mime_type_array
) == 2:
148 main_type
, sub_type
= mime_type_array
149 if not(main_type
and sub_type
):
150 raise InvalidMIMETypeFormatError(
151 'Incorrectly formatted MIME type: %s' % mime_type
)
152 return main_type
, sub_type
154 raise InvalidMIMETypeFormatError(
155 'Incorrectly formatted MIME type: %s' % mime_type
)
157 return 'application', 'octet-stream'
160 class UploadCGIHandler(object):
161 """Class used for handling an upload post.
163 The main interface to this class is the UploadCGI method. This will receive
164 the upload form, store the blobs contained in the post and rewrite the blobs
165 to contain BlobKeys instead of blobs.
170 generate_blob_key
=GenerateBlobKey
,
171 now_func
=datetime
.datetime
.now
):
175 blob_storage: BlobStorage instance where actual blobs are stored.
176 generate_blob_key: Function used for generating unique blob keys.
177 now_func: Function that returns the current timestamp.
179 self
.__blob
_storage
= blob_storage
180 self
.__generate
_blob
_key
= generate_blob_key
181 self
.__now
_func
= now_func
183 def StoreBlob(self
, form_item
, creation
):
184 """Store form-item to blob storage.
187 form_item: FieldStorage instance that represents a specific form field.
188 This instance should have a non-empty filename attribute, meaning that
189 it is an uploaded blob rather than a normal form field.
190 creation: Timestamp to associate with new blobs creation time. This
191 parameter is provided so that all blobs in the same upload form can have
192 the same creation date.
195 datastore.Entity('__BlobInfo__') associated with the upload.
197 main_type
, sub_type
= _SplitMIMEType(form_item
.type)
199 blob_key
= self
.__generate
_blob
_key
()
200 blob_file
= form_item
.file
201 if 'Content-Transfer-Encoding' in form_item
.headers
:
202 if form_item
.headers
['Content-Transfer-Encoding'] == 'base64':
203 blob_file
= cStringIO
.StringIO(
204 base64
.urlsafe_b64decode(blob_file
.read()))
205 self
.__blob
_storage
.StoreBlob(blob_key
, blob_file
)
206 content_type_formatter
= base
.MIMEBase(main_type
, sub_type
,
207 **form_item
.type_options
)
209 blob_entity
= datastore
.Entity('__BlobInfo__',
213 blob_entity
['content_type'] = (
214 content_type_formatter
['content-type'].decode('utf-8'))
215 blob_entity
['creation'] = creation
216 blob_entity
['filename'] = form_item
.filename
.decode('utf-8')
217 except UnicodeDecodeError:
218 raise InvalidMetadataError(
219 'The uploaded entity contained invalid UTF-8 metadata. This may be '
220 'because the page containing the upload form was served with a '
221 'charset other than "utf-8".')
224 digester
= hashlib
.md5()
226 block
= blob_file
.read(1 << 20)
229 digester
.update(block
)
231 blob_entity
['md5_hash'] = digester
.hexdigest()
232 blob_entity
['size'] = blob_file
.tell()
235 datastore
.Put(blob_entity
)
238 def _GenerateMIMEMessage(self
,
241 max_bytes_per_blob
=None,
242 max_bytes_total
=None,
244 """Generate a new post from original form.
246 Also responsible for storing blobs in the datastore.
249 form: Instance of cgi.FieldStorage representing the whole form
250 derived from original post data.
251 boundary: Boundary to use for resulting form. Used only in tests so
252 that the boundary is always consistent.
253 max_bytes_per_blob: The maximum size in bytes that any single blob
254 in the form is allowed to be.
255 max_bytes_total: The maximum size in bytes that the total of all blobs
256 in the form is allowed to be.
257 bucket_name: The name of the Google Storage bucket to uplad the file.
260 A MIMEMultipart instance representing the new HTTP post which should be
261 forwarded to the developers actual CGI handler. DO NOT use the return
262 value of this method to generate a string unless you know what you're
263 doing and properly handle folding whitespace (from rfc822) properly.
266 UploadEntityTooLargeError: The upload exceeds either the
267 max_bytes_per_blob or max_bytes_total limits.
268 FilenameOrContentTypeTooLargeError: The filename or the content_type of
269 the upload is larger than the allowed size for a string type in the
272 message
= multipart
.MIMEMultipart('form-data', boundary
)
273 for name
, value
in form
.headers
.items():
274 if name
.lower() not in STRIPPED_HEADERS
:
275 message
.add_header(name
, value
)
278 """Flattens form in to single sequence of cgi.FieldStorage instances.
280 The resulting cgi.FieldStorage objects are a little bit irregular in
281 their structure. A single name can have mulitple sub-items. In this
282 case, the root FieldStorage object has a list associated with that field
283 name. Otherwise, the root FieldStorage object just refers to a single
286 Lists of FieldStorage instances occur when a form has multiple values
290 cgi.FieldStorage irrespective of their nesting level.
294 for key
in sorted(form
):
295 form_item
= form
[key
]
296 if isinstance(form_item
, list):
297 for list_item
in form_item
:
302 creation
= self
.__now
_func
()
303 total_bytes_uploaded
= 0
305 upload_too_large
= False
306 filename_too_large
= False
307 content_type_too_large
= False
309 for form_item
in IterateForm():
318 disposition_parameters
= {'name': form_item
.name
}
320 if form_item
.filename
is None:
322 variable
= base
.MIMEBase('text', 'plain')
323 variable
.set_payload(form_item
.value
)
328 if not form_item
.filename
:
331 disposition_parameters
['filename'] = form_item
.filename
333 main_type
, sub_type
= _SplitMIMEType(form_item
.type)
336 form_item
.file.seek(0, 2)
337 content_length
= form_item
.file.tell()
338 form_item
.file.seek(0)
340 total_bytes_uploaded
+= content_length
342 if max_bytes_per_blob
is not None:
343 if max_bytes_per_blob
< content_length
:
344 upload_too_large
= True
346 if max_bytes_total
is not None:
347 if max_bytes_total
< total_bytes_uploaded
:
348 upload_too_large
= True
350 if form_item
.filename
is not None:
351 if MAX_STRING_NAME_LENGTH
< len(form_item
.filename
):
352 filename_too_large
= True
354 if form_item
.type is not None:
355 if MAX_STRING_NAME_LENGTH
< len(form_item
.type):
356 content_type_too_large
= True
360 blob_entity
= self
.StoreBlob(form_item
, creation
)
363 created_blobs
.append(blob_entity
)
365 variable
= base
.MIMEBase('message',
367 access_type
=blobstore
.BLOB_KEY_HEADER
,
368 blob_key
=blob_entity
.key().name())
371 form_item
.file.seek(0)
372 digester
= hashlib
.md5()
374 block
= form_item
.file.read(1 << 20)
377 digester
.update(block
)
379 blob_key
= base64
.urlsafe_b64encode(digester
.hexdigest())
380 form_item
.file.seek(0)
382 external
= base
.MIMEBase(main_type
,
384 **form_item
.type_options
)
385 headers
= dict(form_item
.headers
)
386 headers
['Content-Length'] = str(content_length
)
387 headers
[blobstore
.UPLOAD_INFO_CREATION_HEADER
] = (
388 blobstore
._format
_creation
(creation
))
390 headers
[blobstore
.CLOUD_STORAGE_OBJECT_HEADER
] = (
391 '/gs/%s/fake-%s-%s' % (bucket_name
, blob_entity
.key().name(),
393 headers
['Content-MD5'] = blob_key
394 for key
, value
in headers
.iteritems():
395 external
.add_header(key
, value
)
398 external_disposition_parameters
= dict(disposition_parameters
)
401 external_disposition_parameters
['filename'] = form_item
.filename
402 if not external
.get('Content-Disposition'):
403 external
.add_header('Content-Disposition',
405 **external_disposition_parameters
)
406 variable
.set_payload([external
])
409 variable
.add_header('Content-Disposition',
411 **disposition_parameters
)
412 message
.attach(variable
)
414 if upload_too_large
or filename_too_large
or content_type_too_large
:
415 for blob
in created_blobs
:
416 datastore
.Delete(blob
)
418 raise UploadEntityTooLargeError()
419 elif filename_too_large
:
420 raise FilenameOrContentTypeTooLargeError('filename')
422 raise FilenameOrContentTypeTooLargeError('content-type')
426 def GenerateMIMEMessageString(self
,
429 max_bytes_per_blob
=None,
430 max_bytes_total
=None,
432 """Generate a new post string from original form.
435 form: Instance of cgi.FieldStorage representing the whole form
436 derived from original post data.
437 boundary: Boundary to use for resulting form. Used only in tests so
438 that the boundary is always consistent.
439 max_bytes_per_blob: The maximum size in bytes that any single blob
440 in the form is allowed to be.
441 max_bytes_total: The maximum size in bytes that the total of all blobs
442 in the form is allowed to be.
443 bucket_name: The name of the Google Storage bucket to uplad the file.
446 A string rendering of a MIMEMultipart instance.
448 message
= self
._GenerateMIMEMessage
(form
,
450 max_bytes_per_blob
=max_bytes_per_blob
,
451 max_bytes_total
=max_bytes_total
,
452 bucket_name
=bucket_name
)
453 message_out
= cStringIO
.StringIO()
454 gen
= generator
.Generator(message_out
, maxheaderlen
=0)
455 gen
.flatten(message
, unixfrom
=False)
456 return message_out
.getvalue()