5 from cherrypy
._cpcompat
import basestring
, BytesIO
, ntob
, set, unicodestr
6 from cherrypy
.lib
import file_generator
7 from cherrypy
.lib
import set_vary_header
10 def decode(encoding
=None, default_encoding
='utf-8'):
11 """Replace or extend the list of charsets used to decode a request entity.
13 Either argument may be a single string or a list of strings.
16 If not None, restricts the set of charsets attempted while decoding
17 a request entity to the given set (even if a different charset is given in
18 the Content-Type request header).
21 Only in effect if the 'encoding' argument is not given.
22 If given, the set of charsets attempted while decoding a request entity is
23 *extended* with the given value(s).
26 body
= cherrypy
.request
.body
27 if encoding
is not None:
28 if not isinstance(encoding
, list):
30 body
.attempt_charsets
= encoding
31 elif default_encoding
:
32 if not isinstance(default_encoding
, list):
33 default_encoding
= [default_encoding
]
34 body
.attempt_charsets
= body
.attempt_charsets
+ default_encoding
37 class ResponseEncoder
:
39 default_encoding
= 'utf-8'
40 failmsg
= "Response body could not be encoded with %r."
47 def __init__(self
, **kwargs
):
48 for k
, v
in kwargs
.items():
51 self
.attempted_charsets
= set()
52 request
= cherrypy
.serving
.request
53 if request
.handler
is not None:
54 # Replace request.handler with self
56 cherrypy
.log('Replacing request.handler', 'TOOLS.ENCODE')
57 self
.oldhandler
= request
.handler
58 request
.handler
= self
60 def encode_stream(self
, encoding
):
61 """Encode a streaming response body.
63 Use a generator wrapper, and just pray it works as the stream is
66 if encoding
in self
.attempted_charsets
:
68 self
.attempted_charsets
.add(encoding
)
72 if isinstance(chunk
, unicodestr
):
73 chunk
= chunk
.encode(encoding
, self
.errors
)
75 self
.body
= encoder(self
.body
)
78 def encode_string(self
, encoding
):
79 """Encode a buffered response body."""
80 if encoding
in self
.attempted_charsets
:
82 self
.attempted_charsets
.add(encoding
)
86 for chunk
in self
.body
:
87 if isinstance(chunk
, unicodestr
):
88 chunk
= chunk
.encode(encoding
, self
.errors
)
91 except (LookupError, UnicodeError):
96 def find_acceptable_charset(self
):
97 request
= cherrypy
.serving
.request
98 response
= cherrypy
.serving
.response
101 cherrypy
.log('response.stream %r' % response
.stream
, 'TOOLS.ENCODE')
103 encoder
= self
.encode_stream
105 encoder
= self
.encode_string
106 if "Content-Length" in response
.headers
:
107 # Delete Content-Length header so finalize() recalcs it.
108 # Encoded strings may be of different lengths from their
109 # unicode equivalents, and even from each other. For example:
110 # >>> t = u"\u7007\u3040"
113 # >>> len(t.encode("UTF-8"))
115 # >>> len(t.encode("utf7"))
117 del response
.headers
["Content-Length"]
119 # Parse the Accept-Charset request header, and try to provide one
120 # of the requested charsets (in order of user preference).
121 encs
= request
.headers
.elements('Accept-Charset')
122 charsets
= [enc
.value
.lower() for enc
in encs
]
124 cherrypy
.log('charsets %s' % repr(charsets
), 'TOOLS.ENCODE')
126 if self
.encoding
is not None:
127 # If specified, force this encoding to be used, or fail.
128 encoding
= self
.encoding
.lower()
130 cherrypy
.log('Specified encoding %r' % encoding
, 'TOOLS.ENCODE')
131 if (not charsets
) or "*" in charsets
or encoding
in charsets
:
133 cherrypy
.log('Attempting encoding %r' % encoding
, 'TOOLS.ENCODE')
134 if encoder(encoding
):
139 cherrypy
.log('Attempting default encoding %r' %
140 self
.default_encoding
, 'TOOLS.ENCODE')
141 # Any character-set is acceptable.
142 if encoder(self
.default_encoding
):
143 return self
.default_encoding
145 raise cherrypy
.HTTPError(500, self
.failmsg
% self
.default_encoding
)
148 if element
.qvalue
> 0:
149 if element
.value
== "*":
150 # Matches any charset. Try our default.
152 cherrypy
.log('Attempting default encoding due '
153 'to %r' % element
, 'TOOLS.ENCODE')
154 if encoder(self
.default_encoding
):
155 return self
.default_encoding
157 encoding
= element
.value
159 cherrypy
.log('Attempting encoding %s (qvalue >'
160 '0)' % element
, 'TOOLS.ENCODE')
161 if encoder(encoding
):
164 if "*" not in charsets
:
165 # If no "*" is present in an Accept-Charset field, then all
166 # character sets not explicitly mentioned get a quality
167 # value of 0, except for ISO-8859-1, which gets a quality
168 # value of 1 if not explicitly mentioned.
170 if iso
not in charsets
:
172 cherrypy
.log('Attempting ISO-8859-1 encoding',
177 # No suitable encoding found.
178 ac
= request
.headers
.get('Accept-Charset')
180 msg
= "Your client did not send an Accept-Charset header."
182 msg
= "Your client sent this Accept-Charset header: %s." % ac
183 msg
+= " We tried these charsets: %s." % ", ".join(self
.attempted_charsets
)
184 raise cherrypy
.HTTPError(406, msg
)
186 def __call__(self
, *args
, **kwargs
):
187 response
= cherrypy
.serving
.response
188 self
.body
= self
.oldhandler(*args
, **kwargs
)
190 if isinstance(self
.body
, basestring
):
191 # strings get wrapped in a list because iterating over a single
192 # item list is much faster than iterating over every character
195 self
.body
= [self
.body
]
197 # [''] doesn't evaluate to False, so replace it with [].
199 elif hasattr(self
.body
, 'read'):
200 self
.body
= file_generator(self
.body
)
201 elif self
.body
is None:
204 ct
= response
.headers
.elements("Content-Type")
206 cherrypy
.log('Content-Type: %r' % [str(h
) for h
in ct
], 'TOOLS.ENCODE')
210 if ct
.value
.lower().startswith("text/"):
212 cherrypy
.log('Content-Type %s starts with "text/"' % ct
,
217 cherrypy
.log('Not finding because Content-Type %s does '
218 'not start with "text/"' % ct
,
223 cherrypy
.log('Finding because not text_only', 'TOOLS.ENCODE')
227 # Set "charset=..." param on response Content-Type header
228 ct
.params
['charset'] = self
.find_acceptable_charset()
231 cherrypy
.log('Setting Content-Type %s' % ct
,
233 response
.headers
["Content-Type"] = str(ct
)
239 def compress(body
, compress_level
):
240 """Compress 'body' at the given compress_level."""
243 # See http://www.gzip.org/zlib/rfc-gzip.html
244 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker
245 yield ntob('\x08') # CM: compression method
246 yield ntob('\x00') # FLG: none set
248 yield struct
.pack("<L", int(time
.time()) & int('FFFFFFFF', 16))
249 yield ntob('\x02') # XFL: max compression, slowest algo
250 yield ntob('\xff') # OS: unknown
252 crc
= zlib
.crc32(ntob(""))
254 zobj
= zlib
.compressobj(compress_level
,
255 zlib
.DEFLATED
, -zlib
.MAX_WBITS
,
256 zlib
.DEF_MEM_LEVEL
, 0)
259 crc
= zlib
.crc32(line
, crc
)
260 yield zobj
.compress(line
)
264 yield struct
.pack("<L", crc
& int('FFFFFFFF', 16))
266 yield struct
.pack("<L", size
& int('FFFFFFFF', 16))
268 def decompress(body
):
274 zfile
= gzip
.GzipFile(mode
='rb', fileobj
=zbuf
)
280 def gzip(compress_level
=5, mime_types
=['text/html', 'text/plain'], debug
=False):
281 """Try to gzip the response body if Content-Type in mime_types.
283 cherrypy.response.headers['Content-Type'] must be set to one of the
284 values in the mime_types arg before calling this function.
286 The provided list of mime-types must be of one of the following form:
291 No compression is performed if any of the following hold:
292 * The client sends no Accept-Encoding request header
293 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
294 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
295 * The 'identity' value is given with a qvalue > 0.
298 request
= cherrypy
.serving
.request
299 response
= cherrypy
.serving
.response
301 set_vary_header(response
, "Accept-Encoding")
303 if not response
.body
:
304 # Response body is empty (might be a 304 for instance)
306 cherrypy
.log('No response body', context
='TOOLS.GZIP')
309 # If returning cached content (which should already have been gzipped),
311 if getattr(request
, "cached", False):
313 cherrypy
.log('Not gzipping cached response', context
='TOOLS.GZIP')
316 acceptable
= request
.headers
.elements('Accept-Encoding')
318 # If no Accept-Encoding field is present in a request,
319 # the server MAY assume that the client will accept any
320 # content coding. In this case, if "identity" is one of
321 # the available content-codings, then the server SHOULD use
322 # the "identity" content-coding, unless it has additional
323 # information that a different content-coding is meaningful
326 cherrypy
.log('No Accept-Encoding', context
='TOOLS.GZIP')
329 ct
= response
.headers
.get('Content-Type', '').split(';')[0]
330 for coding
in acceptable
:
331 if coding
.value
== 'identity' and coding
.qvalue
!= 0:
333 cherrypy
.log('Non-zero identity qvalue: %s' % coding
,
334 context
='TOOLS.GZIP')
336 if coding
.value
in ('gzip', 'x-gzip'):
337 if coding
.qvalue
== 0:
339 cherrypy
.log('Zero gzip qvalue: %s' % coding
,
340 context
='TOOLS.GZIP')
343 if ct
not in mime_types
:
344 # If the list of provided mime-types contains tokens
345 # such as 'text/*' or 'application/*+xml',
346 # we go through them and find the most appropriate one
347 # based on the given content-type.
348 # The pattern matching is only caring about the most
349 # common cases, as stated above, and doesn't support
350 # for extra parameters.
353 ct_media_type
, ct_sub_type
= ct
.split('/')
354 for mime_type
in mime_types
:
356 media_type
, sub_type
= mime_type
.split('/')
357 if ct_media_type
== media_type
:
361 elif '+' in sub_type
and '+' in ct_sub_type
:
362 ct_left
, ct_right
= ct_sub_type
.split('+')
363 left
, right
= sub_type
.split('+')
364 if left
== '*' and ct_right
== right
:
370 cherrypy
.log('Content-Type %s not in mime_types %r' %
371 (ct
, mime_types
), context
='TOOLS.GZIP')
375 cherrypy
.log('Gzipping', context
='TOOLS.GZIP')
376 # Return a generator that compresses the page
377 response
.headers
['Content-Encoding'] = 'gzip'
378 response
.body
= compress(response
.body
, compress_level
)
379 if "Content-Length" in response
.headers
:
380 # Delete Content-Length header so finalize() recalcs it.
381 del response
.headers
["Content-Length"]
386 cherrypy
.log('No acceptable encoding found.', context
='GZIP')
387 cherrypy
.HTTPError(406, "identity, gzip").set_response()