Fixed python_path problem.
[smonitor.git] / lib / cherrypy / lib / encoding.py
blob6459746509fba22dfdda1819898a9017b3823c41
1 import struct
2 import time
4 import cherrypy
5 from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr
6 from cherrypy.lib import file_generator
7 from cherrypy.lib import set_vary_header
10 def decode(encoding=None, default_encoding='utf-8'):
11 """Replace or extend the list of charsets used to decode a request entity.
13 Either argument may be a single string or a list of strings.
15 encoding
16 If not None, restricts the set of charsets attempted while decoding
17 a request entity to the given set (even if a different charset is given in
18 the Content-Type request header).
20 default_encoding
21 Only in effect if the 'encoding' argument is not given.
22 If given, the set of charsets attempted while decoding a request entity is
23 *extended* with the given value(s).
25 """
26 body = cherrypy.request.body
27 if encoding is not None:
28 if not isinstance(encoding, list):
29 encoding = [encoding]
30 body.attempt_charsets = encoding
31 elif default_encoding:
32 if not isinstance(default_encoding, list):
33 default_encoding = [default_encoding]
34 body.attempt_charsets = body.attempt_charsets + default_encoding
37 class ResponseEncoder:
39 default_encoding = 'utf-8'
40 failmsg = "Response body could not be encoded with %r."
41 encoding = None
42 errors = 'strict'
43 text_only = True
44 add_charset = True
45 debug = False
47 def __init__(self, **kwargs):
48 for k, v in kwargs.items():
49 setattr(self, k, v)
51 self.attempted_charsets = set()
52 request = cherrypy.serving.request
53 if request.handler is not None:
54 # Replace request.handler with self
55 if self.debug:
56 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE')
57 self.oldhandler = request.handler
58 request.handler = self
60 def encode_stream(self, encoding):
61 """Encode a streaming response body.
63 Use a generator wrapper, and just pray it works as the stream is
64 being written out.
65 """
66 if encoding in self.attempted_charsets:
67 return False
68 self.attempted_charsets.add(encoding)
70 def encoder(body):
71 for chunk in body:
72 if isinstance(chunk, unicodestr):
73 chunk = chunk.encode(encoding, self.errors)
74 yield chunk
75 self.body = encoder(self.body)
76 return True
78 def encode_string(self, encoding):
79 """Encode a buffered response body."""
80 if encoding in self.attempted_charsets:
81 return False
82 self.attempted_charsets.add(encoding)
84 try:
85 body = []
86 for chunk in self.body:
87 if isinstance(chunk, unicodestr):
88 chunk = chunk.encode(encoding, self.errors)
89 body.append(chunk)
90 self.body = body
91 except (LookupError, UnicodeError):
92 return False
93 else:
94 return True
96 def find_acceptable_charset(self):
97 request = cherrypy.serving.request
98 response = cherrypy.serving.response
100 if self.debug:
101 cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE')
102 if response.stream:
103 encoder = self.encode_stream
104 else:
105 encoder = self.encode_string
106 if "Content-Length" in response.headers:
107 # Delete Content-Length header so finalize() recalcs it.
108 # Encoded strings may be of different lengths from their
109 # unicode equivalents, and even from each other. For example:
110 # >>> t = u"\u7007\u3040"
111 # >>> len(t)
113 # >>> len(t.encode("UTF-8"))
115 # >>> len(t.encode("utf7"))
117 del response.headers["Content-Length"]
119 # Parse the Accept-Charset request header, and try to provide one
120 # of the requested charsets (in order of user preference).
121 encs = request.headers.elements('Accept-Charset')
122 charsets = [enc.value.lower() for enc in encs]
123 if self.debug:
124 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE')
126 if self.encoding is not None:
127 # If specified, force this encoding to be used, or fail.
128 encoding = self.encoding.lower()
129 if self.debug:
130 cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE')
131 if (not charsets) or "*" in charsets or encoding in charsets:
132 if self.debug:
133 cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE')
134 if encoder(encoding):
135 return encoding
136 else:
137 if not encs:
138 if self.debug:
139 cherrypy.log('Attempting default encoding %r' %
140 self.default_encoding, 'TOOLS.ENCODE')
141 # Any character-set is acceptable.
142 if encoder(self.default_encoding):
143 return self.default_encoding
144 else:
145 raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding)
146 else:
147 for element in encs:
148 if element.qvalue > 0:
149 if element.value == "*":
150 # Matches any charset. Try our default.
151 if self.debug:
152 cherrypy.log('Attempting default encoding due '
153 'to %r' % element, 'TOOLS.ENCODE')
154 if encoder(self.default_encoding):
155 return self.default_encoding
156 else:
157 encoding = element.value
158 if self.debug:
159 cherrypy.log('Attempting encoding %s (qvalue >'
160 '0)' % element, 'TOOLS.ENCODE')
161 if encoder(encoding):
162 return encoding
164 if "*" not in charsets:
165 # If no "*" is present in an Accept-Charset field, then all
166 # character sets not explicitly mentioned get a quality
167 # value of 0, except for ISO-8859-1, which gets a quality
168 # value of 1 if not explicitly mentioned.
169 iso = 'iso-8859-1'
170 if iso not in charsets:
171 if self.debug:
172 cherrypy.log('Attempting ISO-8859-1 encoding',
173 'TOOLS.ENCODE')
174 if encoder(iso):
175 return iso
177 # No suitable encoding found.
178 ac = request.headers.get('Accept-Charset')
179 if ac is None:
180 msg = "Your client did not send an Accept-Charset header."
181 else:
182 msg = "Your client sent this Accept-Charset header: %s." % ac
183 msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets)
184 raise cherrypy.HTTPError(406, msg)
186 def __call__(self, *args, **kwargs):
187 response = cherrypy.serving.response
188 self.body = self.oldhandler(*args, **kwargs)
190 if isinstance(self.body, basestring):
191 # strings get wrapped in a list because iterating over a single
192 # item list is much faster than iterating over every character
193 # in a long string.
194 if self.body:
195 self.body = [self.body]
196 else:
197 # [''] doesn't evaluate to False, so replace it with [].
198 self.body = []
199 elif hasattr(self.body, 'read'):
200 self.body = file_generator(self.body)
201 elif self.body is None:
202 self.body = []
204 ct = response.headers.elements("Content-Type")
205 if self.debug:
206 cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE')
207 if ct:
208 ct = ct[0]
209 if self.text_only:
210 if ct.value.lower().startswith("text/"):
211 if self.debug:
212 cherrypy.log('Content-Type %s starts with "text/"' % ct,
213 'TOOLS.ENCODE')
214 do_find = True
215 else:
216 if self.debug:
217 cherrypy.log('Not finding because Content-Type %s does '
218 'not start with "text/"' % ct,
219 'TOOLS.ENCODE')
220 do_find = False
221 else:
222 if self.debug:
223 cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE')
224 do_find = True
226 if do_find:
227 # Set "charset=..." param on response Content-Type header
228 ct.params['charset'] = self.find_acceptable_charset()
229 if self.add_charset:
230 if self.debug:
231 cherrypy.log('Setting Content-Type %s' % ct,
232 'TOOLS.ENCODE')
233 response.headers["Content-Type"] = str(ct)
235 return self.body
237 # GZIP
239 def compress(body, compress_level):
240 """Compress 'body' at the given compress_level."""
241 import zlib
243 # See http://www.gzip.org/zlib/rfc-gzip.html
244 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker
245 yield ntob('\x08') # CM: compression method
246 yield ntob('\x00') # FLG: none set
247 # MTIME: 4 bytes
248 yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16))
249 yield ntob('\x02') # XFL: max compression, slowest algo
250 yield ntob('\xff') # OS: unknown
252 crc = zlib.crc32(ntob(""))
253 size = 0
254 zobj = zlib.compressobj(compress_level,
255 zlib.DEFLATED, -zlib.MAX_WBITS,
256 zlib.DEF_MEM_LEVEL, 0)
257 for line in body:
258 size += len(line)
259 crc = zlib.crc32(line, crc)
260 yield zobj.compress(line)
261 yield zobj.flush()
263 # CRC32: 4 bytes
264 yield struct.pack("<L", crc & int('FFFFFFFF', 16))
265 # ISIZE: 4 bytes
266 yield struct.pack("<L", size & int('FFFFFFFF', 16))
268 def decompress(body):
269 import gzip
271 zbuf = BytesIO()
272 zbuf.write(body)
273 zbuf.seek(0)
274 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)
275 data = zfile.read()
276 zfile.close()
277 return data
280 def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False):
281 """Try to gzip the response body if Content-Type in mime_types.
283 cherrypy.response.headers['Content-Type'] must be set to one of the
284 values in the mime_types arg before calling this function.
286 The provided list of mime-types must be of one of the following form:
287 * type/subtype
288 * type/*
289 * type/*+subtype
291 No compression is performed if any of the following hold:
292 * The client sends no Accept-Encoding request header
293 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
294 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
295 * The 'identity' value is given with a qvalue > 0.
298 request = cherrypy.serving.request
299 response = cherrypy.serving.response
301 set_vary_header(response, "Accept-Encoding")
303 if not response.body:
304 # Response body is empty (might be a 304 for instance)
305 if debug:
306 cherrypy.log('No response body', context='TOOLS.GZIP')
307 return
309 # If returning cached content (which should already have been gzipped),
310 # don't re-zip.
311 if getattr(request, "cached", False):
312 if debug:
313 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP')
314 return
316 acceptable = request.headers.elements('Accept-Encoding')
317 if not acceptable:
318 # If no Accept-Encoding field is present in a request,
319 # the server MAY assume that the client will accept any
320 # content coding. In this case, if "identity" is one of
321 # the available content-codings, then the server SHOULD use
322 # the "identity" content-coding, unless it has additional
323 # information that a different content-coding is meaningful
324 # to the client.
325 if debug:
326 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP')
327 return
329 ct = response.headers.get('Content-Type', '').split(';')[0]
330 for coding in acceptable:
331 if coding.value == 'identity' and coding.qvalue != 0:
332 if debug:
333 cherrypy.log('Non-zero identity qvalue: %s' % coding,
334 context='TOOLS.GZIP')
335 return
336 if coding.value in ('gzip', 'x-gzip'):
337 if coding.qvalue == 0:
338 if debug:
339 cherrypy.log('Zero gzip qvalue: %s' % coding,
340 context='TOOLS.GZIP')
341 return
343 if ct not in mime_types:
344 # If the list of provided mime-types contains tokens
345 # such as 'text/*' or 'application/*+xml',
346 # we go through them and find the most appropriate one
347 # based on the given content-type.
348 # The pattern matching is only caring about the most
349 # common cases, as stated above, and doesn't support
350 # for extra parameters.
351 found = False
352 if '/' in ct:
353 ct_media_type, ct_sub_type = ct.split('/')
354 for mime_type in mime_types:
355 if '/' in mime_type:
356 media_type, sub_type = mime_type.split('/')
357 if ct_media_type == media_type:
358 if sub_type == '*':
359 found = True
360 break
361 elif '+' in sub_type and '+' in ct_sub_type:
362 ct_left, ct_right = ct_sub_type.split('+')
363 left, right = sub_type.split('+')
364 if left == '*' and ct_right == right:
365 found = True
366 break
368 if not found:
369 if debug:
370 cherrypy.log('Content-Type %s not in mime_types %r' %
371 (ct, mime_types), context='TOOLS.GZIP')
372 return
374 if debug:
375 cherrypy.log('Gzipping', context='TOOLS.GZIP')
376 # Return a generator that compresses the page
377 response.headers['Content-Encoding'] = 'gzip'
378 response.body = compress(response.body, compress_level)
379 if "Content-Length" in response.headers:
380 # Delete Content-Length header so finalize() recalcs it.
381 del response.headers["Content-Length"]
383 return
385 if debug:
386 cherrypy.log('No acceptable encoding found.', context='GZIP')
387 cherrypy.HTTPError(406, "identity, gzip").set_response()