Switch to Ragel/C-based chunk/trailer parser
[unicorn.git] / test / unit / test_http_parser.rb
blob707257187501707b8a1c9bfcdf6ff17d1a9d4347
1 # Copyright (c) 2005 Zed A. Shaw 
2 # You can redistribute it and/or modify it under the same terms as Ruby.
4 # Additional work donated by contributors.  See http://mongrel.rubyforge.org/attributions.html
5 # for more information.
7 require 'test/test_helper'
9 include Unicorn
11 class HttpParserTest < Test::Unit::TestCase
12     
13   def test_parse_simple
14     parser = HttpParser.new
15     req = {}
16     http = "GET / HTTP/1.1\r\n\r\n"
17     assert_equal req, parser.headers(req, http)
18     assert_equal '', http
20     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
21     assert_equal '/', req['REQUEST_PATH']
22     assert_equal 'HTTP/1.1', req['HTTP_VERSION']
23     assert_equal '/', req['REQUEST_URI']
24     assert_equal 'GET', req['REQUEST_METHOD']
25     assert_nil req['FRAGMENT']
26     assert_equal '', req['QUERY_STRING']
28     parser.reset
29     req.clear
31     http = "G"
32     assert_nil parser.headers(req, http)
33     assert_equal "G", http
34     assert req.empty?
36     # try parsing again to ensure we were reset correctly
37     http = "GET /hello-world HTTP/1.1\r\n\r\n"
38     assert parser.headers(req, http)
40     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
41     assert_equal '/hello-world', req['REQUEST_PATH']
42     assert_equal 'HTTP/1.1', req['HTTP_VERSION']
43     assert_equal '/hello-world', req['REQUEST_URI']
44     assert_equal 'GET', req['REQUEST_METHOD']
45     assert_nil req['FRAGMENT']
46     assert_equal '', req['QUERY_STRING']
47     assert_equal '', http
48   end
50   def test_parse_server_host_default_port
51     parser = HttpParser.new
52     req = {}
53     tmp = "GET / HTTP/1.1\r\nHost: foo\r\n\r\n"
54     assert_equal req, parser.headers(req, tmp)
55     assert_equal 'foo', req['SERVER_NAME']
56     assert_equal '80', req['SERVER_PORT']
57     assert_equal '', tmp
58   end
60   def test_parse_server_host_alt_port
61     parser = HttpParser.new
62     req = {}
63     tmp = "GET / HTTP/1.1\r\nHost: foo:999\r\n\r\n"
64     assert_equal req, parser.headers(req, tmp)
65     assert_equal 'foo', req['SERVER_NAME']
66     assert_equal '999', req['SERVER_PORT']
67     assert_equal '', tmp
68   end
70   def test_parse_server_host_empty_port
71     parser = HttpParser.new
72     req = {}
73     tmp = "GET / HTTP/1.1\r\nHost: foo:\r\n\r\n"
74     assert_equal req, parser.headers(req, tmp)
75     assert_equal 'foo', req['SERVER_NAME']
76     assert_equal '80', req['SERVER_PORT']
77     assert_equal '', tmp
78   end
80   def test_parse_server_host_xfp_https
81     parser = HttpParser.new
82     req = {}
83     tmp = "GET / HTTP/1.1\r\nHost: foo:\r\n" \
84           "X-Forwarded-Proto: https\r\n\r\n"
85     assert_equal req, parser.headers(req, tmp)
86     assert_equal 'foo', req['SERVER_NAME']
87     assert_equal '443', req['SERVER_PORT']
88     assert_equal '', tmp
89   end
91   def test_parse_strange_headers
92     parser = HttpParser.new
93     req = {}
94     should_be_good = "GET / HTTP/1.1\r\naaaaaaaaaaaaa:++++++++++\r\n\r\n"
95     assert_equal req, parser.headers(req, should_be_good)
96     assert_equal '', should_be_good
98     # ref: http://thread.gmane.org/gmane.comp.lang.ruby.mongrel.devel/37/focus=45
99     # (note we got 'pen' mixed up with 'pound' in that thread,
100     # but the gist of it is still relevant: these nasty headers are irrelevant
101     #
102     # nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"
103     # parser = HttpParser.new
104     # req = {}
105     # assert parser.execute(req, nasty_pound_header, 0)
106   end
108   def test_parse_ie6_urls
109     %w(/some/random/path"
110        /some/random/path>
111        /some/random/path<
112        /we/love/you/ie6?q=<"">
113        /url?<="&>="
114        /mal"formed"?
115     ).each do |path|
116       parser = HttpParser.new
117       req = {}
118       sorta_safe = %(GET #{path} HTTP/1.1\r\n\r\n)
119       assert_equal req, parser.headers(req, sorta_safe)
120       assert_equal path, req['REQUEST_URI']
121       assert_equal '', sorta_safe
122     end
123   end
124   
125   def test_parse_error
126     parser = HttpParser.new
127     req = {}
128     bad_http = "GET / SsUTF/1.1"
130     assert_raises(HttpParserError) { parser.headers(req, bad_http) }
132     # make sure we can recover
133     parser.reset
134     req.clear
135     assert_equal req, parser.headers(req, "GET / HTTP/1.0\r\n\r\n")
136   end
138   def test_piecemeal
139     parser = HttpParser.new
140     req = {}
141     http = "GET"
142     assert_nil parser.headers(req, http)
143     assert_nil parser.headers(req, http)
144     assert_nil parser.headers(req, http << " / HTTP/1.0")
145     assert_equal '/', req['REQUEST_PATH']
146     assert_equal '/', req['REQUEST_URI']
147     assert_equal 'GET', req['REQUEST_METHOD']
148     assert_nil parser.headers(req, http << "\r\n")
149     assert_equal 'HTTP/1.0', req['HTTP_VERSION']
150     assert_nil parser.headers(req, http << "\r")
151     assert_equal req, parser.headers(req, http << "\n")
152     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
153     assert_nil req['FRAGMENT']
154     assert_equal '', req['QUERY_STRING']
155     assert_equal "", http
156   end
158   # not common, but underscores do appear in practice
159   def test_absolute_uri_underscores
160     parser = HttpParser.new
161     req = {}
162     http = "GET http://under_score.example.com/foo?q=bar HTTP/1.0\r\n\r\n"
163     assert_equal req, parser.headers(req, http)
164     assert_equal 'http', req['rack.url_scheme']
165     assert_equal '/foo?q=bar', req['REQUEST_URI']
166     assert_equal '/foo', req['REQUEST_PATH']
167     assert_equal 'q=bar', req['QUERY_STRING']
169     assert_equal 'under_score.example.com', req['HTTP_HOST']
170     assert_equal 'under_score.example.com', req['SERVER_NAME']
171     assert_equal '80', req['SERVER_PORT']
172     assert_equal "", http
173   end
175   def test_absolute_uri
176     parser = HttpParser.new
177     req = {}
178     http = "GET http://example.com/foo?q=bar HTTP/1.0\r\n\r\n"
179     assert_equal req, parser.headers(req, http)
180     assert_equal 'http', req['rack.url_scheme']
181     assert_equal '/foo?q=bar', req['REQUEST_URI']
182     assert_equal '/foo', req['REQUEST_PATH']
183     assert_equal 'q=bar', req['QUERY_STRING']
185     assert_equal 'example.com', req['HTTP_HOST']
186     assert_equal 'example.com', req['SERVER_NAME']
187     assert_equal '80', req['SERVER_PORT']
188     assert_equal "", http
189   end
191   # X-Forwarded-Proto is not in rfc2616, absolute URIs are, however...
192   def test_absolute_uri_https
193     parser = HttpParser.new
194     req = {}
195     http = "GET https://example.com/foo?q=bar HTTP/1.1\r\n" \
196            "X-Forwarded-Proto: http\r\n\r\n"
197     assert_equal req, parser.headers(req, http)
198     assert_equal 'https', req['rack.url_scheme']
199     assert_equal '/foo?q=bar', req['REQUEST_URI']
200     assert_equal '/foo', req['REQUEST_PATH']
201     assert_equal 'q=bar', req['QUERY_STRING']
203     assert_equal 'example.com', req['HTTP_HOST']
204     assert_equal 'example.com', req['SERVER_NAME']
205     assert_equal '443', req['SERVER_PORT']
206     assert_equal "", http
207   end
209   # Host: header should be ignored for absolute URIs
210   def test_absolute_uri_with_port
211     parser = HttpParser.new
212     req = {}
213     http = "GET http://example.com:8080/foo?q=bar HTTP/1.2\r\n" \
214            "Host: bad.example.com\r\n\r\n"
215     assert_equal req, parser.headers(req, http)
216     assert_equal 'http', req['rack.url_scheme']
217     assert_equal '/foo?q=bar', req['REQUEST_URI']
218     assert_equal '/foo', req['REQUEST_PATH']
219     assert_equal 'q=bar', req['QUERY_STRING']
221     assert_equal 'example.com:8080', req['HTTP_HOST']
222     assert_equal 'example.com', req['SERVER_NAME']
223     assert_equal '8080', req['SERVER_PORT']
224     assert_equal "", http
225   end
227   def test_absolute_uri_with_empty_port
228     parser = HttpParser.new
229     req = {}
230     http = "GET https://example.com:/foo?q=bar HTTP/1.1\r\n" \
231            "Host: bad.example.com\r\n\r\n"
232     assert_equal req, parser.headers(req, http)
233     assert_equal 'https', req['rack.url_scheme']
234     assert_equal '/foo?q=bar', req['REQUEST_URI']
235     assert_equal '/foo', req['REQUEST_PATH']
236     assert_equal 'q=bar', req['QUERY_STRING']
238     assert_equal 'example.com:', req['HTTP_HOST']
239     assert_equal 'example.com', req['SERVER_NAME']
240     assert_equal '443', req['SERVER_PORT']
241     assert_equal "", http
242   end
244   def test_put_body_oneshot
245     parser = HttpParser.new
246     req = {}
247     http = "PUT / HTTP/1.0\r\nContent-Length: 5\r\n\r\nabcde"
248     assert_equal req, parser.headers(req, http)
249     assert_equal '/', req['REQUEST_PATH']
250     assert_equal '/', req['REQUEST_URI']
251     assert_equal 'PUT', req['REQUEST_METHOD']
252     assert_equal 'HTTP/1.0', req['HTTP_VERSION']
253     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
254     assert_equal "abcde", http
255   end
257   def test_put_body_later
258     parser = HttpParser.new
259     req = {}
260     http = "PUT /l HTTP/1.0\r\nContent-Length: 5\r\n\r\n"
261     assert_equal req, parser.headers(req, http)
262     assert_equal '/l', req['REQUEST_PATH']
263     assert_equal '/l', req['REQUEST_URI']
264     assert_equal 'PUT', req['REQUEST_METHOD']
265     assert_equal 'HTTP/1.0', req['HTTP_VERSION']
266     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
267     assert_equal "", http
268   end
270   def test_unknown_methods
271     %w(GETT HEADR XGET XHEAD).each { |m|
272       parser = HttpParser.new
273       req = {}
274       s = "#{m} /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
275       ok = false
276       assert_nothing_raised do
277         ok = parser.headers(req, s)
278       end
279       assert ok
280       assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
281       assert_equal 'posts-17408', req['FRAGMENT']
282       assert_equal 'page=1', req['QUERY_STRING']
283       assert_equal "", s
284       assert_equal m, req['REQUEST_METHOD']
285     }
286   end
288   def test_fragment_in_uri
289     parser = HttpParser.new
290     req = {}
291     get = "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
292     ok = false
293     assert_nothing_raised do
294       ok = parser.headers(req, get)
295     end
296     assert ok
297     assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
298     assert_equal 'posts-17408', req['FRAGMENT']
299     assert_equal 'page=1', req['QUERY_STRING']
300     assert_equal '', get
301   end
303   # lame random garbage maker
304   def rand_data(min, max, readable=true)
305     count = min + ((rand(max)+1) *10).to_i
306     res = count.to_s + "/"
307     
308     if readable
309       res << Digest::SHA1.hexdigest(rand(count * 100).to_s) * (count / 40)
310     else
311       res << Digest::SHA1.digest(rand(count * 100).to_s) * (count / 20)
312     end
314     return res
315   end
316   
318   def test_horrible_queries
319     parser = HttpParser.new
321     # then that large header names are caught
322     10.times do |c|
323       get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-#{rand_data(1024, 1024+(c*1024))}: Test\r\n\r\n"
324       assert_raises Unicorn::HttpParserError do
325         parser.headers({}, get)
326         parser.reset
327       end
328     end
330     # then that large mangled field values are caught
331     10.times do |c|
332       get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-Test: #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
333       assert_raises Unicorn::HttpParserError do
334         parser.headers({}, get)
335         parser.reset
336       end
337     end
339     # then large headers are rejected too
340     get = "GET /#{rand_data(10,120)} HTTP/1.1\r\n"
341     get << "X-Test: test\r\n" * (80 * 1024)
342     assert_raises Unicorn::HttpParserError do
343       parser.headers({}, get)
344       parser.reset
345     end
347     # finally just that random garbage gets blocked all the time
348     10.times do |c|
349       get = "GET #{rand_data(1024, 1024+(c*1024), false)} #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
350       assert_raises Unicorn::HttpParserError do
351         parser.headers({}, get)
352         parser.reset
353       end
354     end
356   end