Fixed python_path problem.
[smonitor.git] / lib / cherrypy / test / test_encoding.py
blob67b28ede20da2ffe387d5d3dacd35300e7123a54
2 import gzip
3 import sys
5 import cherrypy
6 from cherrypy._cpcompat import BytesIO, IncompleteRead, ntob, ntou
8 europoundUnicode = ntou('\x80\xa3')
9 sing = u"\u6bdb\u6cfd\u4e1c: Sing, Little Birdie?"
10 sing8 = sing.encode('utf-8')
11 sing16 = sing.encode('utf-16')
14 from cherrypy.test import helper
17 class EncodingTests(helper.CPWebCase):
19 def setup_server():
20 class Root:
21 def index(self, param):
22 assert param == europoundUnicode, "%r != %r" % (param, europoundUnicode)
23 yield europoundUnicode
24 index.exposed = True
26 def mao_zedong(self):
27 return sing
28 mao_zedong.exposed = True
30 def utf8(self):
31 return sing8
32 utf8.exposed = True
33 utf8._cp_config = {'tools.encode.encoding': 'utf-8'}
35 def cookies_and_headers(self):
36 # if the headers have non-ascii characters and a cookie has
37 # any part which is unicode (even ascii), the response
38 # should not fail.
39 cherrypy.response.cookie['candy'] = 'bar'
40 cherrypy.response.cookie['candy']['domain'] = 'cherrypy.org'
41 cherrypy.response.headers['Some-Header'] = 'My d\xc3\xb6g has fleas'
42 return 'Any content'
43 cookies_and_headers.exposed = True
45 def reqparams(self, *args, **kwargs):
46 return ntob(', ').join([": ".join((k, v)).encode('utf8')
47 for k, v in cherrypy.request.params.items()])
48 reqparams.exposed = True
50 def nontext(self, *args, **kwargs):
51 cherrypy.response.headers['Content-Type'] = 'application/binary'
52 return '\x00\x01\x02\x03'
53 nontext.exposed = True
54 nontext._cp_config = {'tools.encode.text_only': False,
55 'tools.encode.add_charset': True,
58 class GZIP:
59 def index(self):
60 yield "Hello, world"
61 index.exposed = True
63 def noshow(self):
64 # Test for ticket #147, where yield showed no exceptions (content-
65 # encoding was still gzip even though traceback wasn't zipped).
66 raise IndexError()
67 yield "Here be dragons"
68 noshow.exposed = True
69 # Turn encoding off so the gzip tool is the one doing the collapse.
70 noshow._cp_config = {'tools.encode.on': False}
72 def noshow_stream(self):
73 # Test for ticket #147, where yield showed no exceptions (content-
74 # encoding was still gzip even though traceback wasn't zipped).
75 raise IndexError()
76 yield "Here be dragons"
77 noshow_stream.exposed = True
78 noshow_stream._cp_config = {'response.stream': True}
80 class Decode:
81 def extra_charset(self, *args, **kwargs):
82 return ', '.join([": ".join((k, v))
83 for k, v in cherrypy.request.params.items()])
84 extra_charset.exposed = True
85 extra_charset._cp_config = {
86 'tools.decode.on': True,
87 'tools.decode.default_encoding': ['utf-16'],
90 def force_charset(self, *args, **kwargs):
91 return ', '.join([": ".join((k, v))
92 for k, v in cherrypy.request.params.items()])
93 force_charset.exposed = True
94 force_charset._cp_config = {
95 'tools.decode.on': True,
96 'tools.decode.encoding': 'utf-16',
99 root = Root()
100 root.gzip = GZIP()
101 root.decode = Decode()
102 cherrypy.tree.mount(root, config={'/gzip': {'tools.gzip.on': True}})
103 setup_server = staticmethod(setup_server)
105 def test_query_string_decoding(self):
106 europoundUtf8 = europoundUnicode.encode('utf-8')
107 self.getPage(ntob('/?param=') + europoundUtf8)
108 self.assertBody(europoundUtf8)
110 # Encoded utf8 query strings MUST be parsed correctly.
111 # Here, q is the POUND SIGN U+00A3 encoded in utf8 and then %HEX
112 self.getPage("/reqparams?q=%C2%A3")
113 # The return value will be encoded as utf8.
114 self.assertBody(ntob("q: \xc2\xa3"))
116 # Query strings that are incorrectly encoded MUST raise 404.
117 # Here, q is the POUND SIGN U+00A3 encoded in latin1 and then %HEX
118 self.getPage("/reqparams?q=%A3")
119 self.assertStatus(404)
120 self.assertErrorPage(404,
121 "The given query string could not be processed. Query "
122 "strings for this resource must be encoded with 'utf8'.")
124 def test_urlencoded_decoding(self):
125 # Test the decoding of an application/x-www-form-urlencoded entity.
126 europoundUtf8 = europoundUnicode.encode('utf-8')
127 body=ntob("param=") + europoundUtf8
128 self.getPage('/', method='POST',
129 headers=[("Content-Type", "application/x-www-form-urlencoded"),
130 ("Content-Length", str(len(body))),
132 body=body),
133 self.assertBody(europoundUtf8)
135 # Encoded utf8 entities MUST be parsed and decoded correctly.
136 # Here, q is the POUND SIGN U+00A3 encoded in utf8
137 body = ntob("q=\xc2\xa3")
138 self.getPage('/reqparams', method='POST',
139 headers=[("Content-Type", "application/x-www-form-urlencoded"),
140 ("Content-Length", str(len(body))),
142 body=body),
143 self.assertBody(ntob("q: \xc2\xa3"))
145 # ...and in utf16, which is not in the default attempt_charsets list:
146 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
147 self.getPage('/reqparams', method='POST',
148 headers=[("Content-Type", "application/x-www-form-urlencoded;charset=utf-16"),
149 ("Content-Length", str(len(body))),
151 body=body),
152 self.assertBody(ntob("q: \xc2\xa3"))
154 # Entities that are incorrectly encoded MUST raise 400.
155 # Here, q is the POUND SIGN U+00A3 encoded in utf16, but
156 # the Content-Type incorrectly labels it utf-8.
157 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
158 self.getPage('/reqparams', method='POST',
159 headers=[("Content-Type", "application/x-www-form-urlencoded;charset=utf-8"),
160 ("Content-Length", str(len(body))),
162 body=body),
163 self.assertStatus(400)
164 self.assertErrorPage(400,
165 "The request entity could not be decoded. The following charsets "
166 "were attempted: ['utf-8']")
168 def test_decode_tool(self):
169 # An extra charset should be tried first, and succeed if it matches.
170 # Here, we add utf-16 as a charset and pass a utf-16 body.
171 body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
172 self.getPage('/decode/extra_charset', method='POST',
173 headers=[("Content-Type", "application/x-www-form-urlencoded"),
174 ("Content-Length", str(len(body))),
176 body=body),
177 self.assertBody(ntob("q: \xc2\xa3"))
179 # An extra charset should be tried first, and continue to other default
180 # charsets if it doesn't match.
181 # Here, we add utf-16 as a charset but still pass a utf-8 body.
182 body = ntob("q=\xc2\xa3")
183 self.getPage('/decode/extra_charset', method='POST',
184 headers=[("Content-Type", "application/x-www-form-urlencoded"),
185 ("Content-Length", str(len(body))),
187 body=body),
188 self.assertBody(ntob("q: \xc2\xa3"))
190 # An extra charset should error if force is True and it doesn't match.
191 # Here, we force utf-16 as a charset but still pass a utf-8 body.
192 body = ntob("q=\xc2\xa3")
193 self.getPage('/decode/force_charset', method='POST',
194 headers=[("Content-Type", "application/x-www-form-urlencoded"),
195 ("Content-Length", str(len(body))),
197 body=body),
198 self.assertErrorPage(400,
199 "The request entity could not be decoded. The following charsets "
200 "were attempted: ['utf-16']")
202 def test_multipart_decoding(self):
203 # Test the decoding of a multipart entity when the charset (utf16) is
204 # explicitly given.
205 body=ntob('\r\n'.join(['--X',
206 'Content-Type: text/plain;charset=utf-16',
207 'Content-Disposition: form-data; name="text"',
209 '\xff\xfea\x00b\x00\x1c c\x00',
210 '--X',
211 'Content-Type: text/plain;charset=utf-16',
212 'Content-Disposition: form-data; name="submit"',
214 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00',
215 '--X--']))
216 self.getPage('/reqparams', method='POST',
217 headers=[("Content-Type", "multipart/form-data;boundary=X"),
218 ("Content-Length", str(len(body))),
220 body=body),
221 self.assertBody(ntob("text: ab\xe2\x80\x9cc, submit: Create"))
223 def test_multipart_decoding_no_charset(self):
224 # Test the decoding of a multipart entity when the charset (utf8) is
225 # NOT explicitly given, but is in the list of charsets to attempt.
226 body=ntob('\r\n'.join(['--X',
227 'Content-Disposition: form-data; name="text"',
229 '\xe2\x80\x9c',
230 '--X',
231 'Content-Disposition: form-data; name="submit"',
233 'Create',
234 '--X--']))
235 self.getPage('/reqparams', method='POST',
236 headers=[("Content-Type", "multipart/form-data;boundary=X"),
237 ("Content-Length", str(len(body))),
239 body=body),
240 self.assertBody(ntob("text: \xe2\x80\x9c, submit: Create"))
242 def test_multipart_decoding_no_successful_charset(self):
243 # Test the decoding of a multipart entity when the charset (utf16) is
244 # NOT explicitly given, and is NOT in the list of charsets to attempt.
245 body=ntob('\r\n'.join(['--X',
246 'Content-Disposition: form-data; name="text"',
248 '\xff\xfea\x00b\x00\x1c c\x00',
249 '--X',
250 'Content-Disposition: form-data; name="submit"',
252 '\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00',
253 '--X--']))
254 self.getPage('/reqparams', method='POST',
255 headers=[("Content-Type", "multipart/form-data;boundary=X"),
256 ("Content-Length", str(len(body))),
258 body=body),
259 self.assertStatus(400)
260 self.assertErrorPage(400,
261 "The request entity could not be decoded. The following charsets "
262 "were attempted: ['us-ascii', 'utf-8']")
264 def test_nontext(self):
265 self.getPage('/nontext')
266 self.assertHeader('Content-Type', 'application/binary;charset=utf-8')
267 self.assertBody('\x00\x01\x02\x03')
269 def testEncoding(self):
270 # Default encoding should be utf-8
271 self.getPage('/mao_zedong')
272 self.assertBody(sing8)
274 # Ask for utf-16.
275 self.getPage('/mao_zedong', [('Accept-Charset', 'utf-16')])
276 self.assertHeader('Content-Type', 'text/html;charset=utf-16')
277 self.assertBody(sing16)
279 # Ask for multiple encodings. ISO-8859-1 should fail, and utf-16
280 # should be produced.
281 self.getPage('/mao_zedong', [('Accept-Charset',
282 'iso-8859-1;q=1, utf-16;q=0.5')])
283 self.assertBody(sing16)
285 # The "*" value should default to our default_encoding, utf-8
286 self.getPage('/mao_zedong', [('Accept-Charset', '*;q=1, utf-7;q=.2')])
287 self.assertBody(sing8)
289 # Only allow iso-8859-1, which should fail and raise 406.
290 self.getPage('/mao_zedong', [('Accept-Charset', 'iso-8859-1, *;q=0')])
291 self.assertStatus("406 Not Acceptable")
292 self.assertInBody("Your client sent this Accept-Charset header: "
293 "iso-8859-1, *;q=0. We tried these charsets: "
294 "iso-8859-1.")
296 # Ask for x-mac-ce, which should be unknown. See ticket #569.
297 self.getPage('/mao_zedong', [('Accept-Charset',
298 'us-ascii, ISO-8859-1, x-mac-ce')])
299 self.assertStatus("406 Not Acceptable")
300 self.assertInBody("Your client sent this Accept-Charset header: "
301 "us-ascii, ISO-8859-1, x-mac-ce. We tried these "
302 "charsets: ISO-8859-1, us-ascii, x-mac-ce.")
304 # Test the 'encoding' arg to encode.
305 self.getPage('/utf8')
306 self.assertBody(sing8)
307 self.getPage('/utf8', [('Accept-Charset', 'us-ascii, ISO-8859-1')])
308 self.assertStatus("406 Not Acceptable")
310 def testGzip(self):
311 zbuf = BytesIO()
312 zfile = gzip.GzipFile(mode='wb', fileobj=zbuf, compresslevel=9)
313 zfile.write(ntob("Hello, world"))
314 zfile.close()
316 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip")])
317 self.assertInBody(zbuf.getvalue()[:3])
318 self.assertHeader("Vary", "Accept-Encoding")
319 self.assertHeader("Content-Encoding", "gzip")
321 # Test when gzip is denied.
322 self.getPage('/gzip/', headers=[("Accept-Encoding", "identity")])
323 self.assertHeader("Vary", "Accept-Encoding")
324 self.assertNoHeader("Content-Encoding")
325 self.assertBody("Hello, world")
327 self.getPage('/gzip/', headers=[("Accept-Encoding", "gzip;q=0")])
328 self.assertHeader("Vary", "Accept-Encoding")
329 self.assertNoHeader("Content-Encoding")
330 self.assertBody("Hello, world")
332 self.getPage('/gzip/', headers=[("Accept-Encoding", "*;q=0")])
333 self.assertStatus(406)
334 self.assertNoHeader("Content-Encoding")
335 self.assertErrorPage(406, "identity, gzip")
337 # Test for ticket #147
338 self.getPage('/gzip/noshow', headers=[("Accept-Encoding", "gzip")])
339 self.assertNoHeader('Content-Encoding')
340 self.assertStatus(500)
341 self.assertErrorPage(500, pattern="IndexError\n")
343 # In this case, there's nothing we can do to deliver a
344 # readable page, since 1) the gzip header is already set,
345 # and 2) we may have already written some of the body.
346 # The fix is to never stream yields when using gzip.
347 if (cherrypy.server.protocol_version == "HTTP/1.0" or
348 getattr(cherrypy.server, "using_apache", False)):
349 self.getPage('/gzip/noshow_stream',
350 headers=[("Accept-Encoding", "gzip")])
351 self.assertHeader('Content-Encoding', 'gzip')
352 self.assertInBody('\x1f\x8b\x08\x00')
353 else:
354 # The wsgiserver will simply stop sending data, and the HTTP client
355 # will error due to an incomplete chunk-encoded stream.
356 self.assertRaises((ValueError, IncompleteRead), self.getPage,
357 '/gzip/noshow_stream',
358 headers=[("Accept-Encoding", "gzip")])
360 def test_UnicodeHeaders(self):
361 self.getPage('/cookies_and_headers')
362 self.assertBody('Any content')