Optimize body-less GET/HEAD requests (again)
[unicorn.git] / test / unit / test_http_parser.rb
blob560f8d4c4ec53af229fcba8db875612b21e05c65
1 # Copyright (c) 2005 Zed A. Shaw 
2 # You can redistribute it and/or modify it under the same terms as Ruby.
4 # Additional work donated by contributors.  See http://mongrel.rubyforge.org/attributions.html
5 # for more information.
7 require 'test/test_helper'
9 include Unicorn
11 class HttpParserTest < Test::Unit::TestCase
12     
13   def test_parse_simple
14     parser = HttpParser.new
15     req = {}
16     http = "GET / HTTP/1.1\r\n\r\n"
17     assert parser.execute(req, http)
19     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
20     assert_equal '/', req['REQUEST_PATH']
21     assert_equal 'HTTP/1.1', req['HTTP_VERSION']
22     assert_equal '/', req['REQUEST_URI']
23     assert_equal 'GET', req['REQUEST_METHOD']
24     assert_nil req['FRAGMENT']
25     assert_equal '', req['QUERY_STRING']
26     assert_nil req[:http_body]
28     parser.reset
29     req.clear
31     assert ! parser.execute(req, "G")
32     assert req.empty?
34     # try parsing again to ensure we were reset correctly
35     http = "GET /hello-world HTTP/1.1\r\n\r\n"
36     assert parser.execute(req, http)
38     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
39     assert_equal '/hello-world', req['REQUEST_PATH']
40     assert_equal 'HTTP/1.1', req['HTTP_VERSION']
41     assert_equal '/hello-world', req['REQUEST_URI']
42     assert_equal 'GET', req['REQUEST_METHOD']
43     assert_nil req['FRAGMENT']
44     assert_equal '', req['QUERY_STRING']
45     assert_nil req[:http_body]
46   end
48   def test_parse_server_host_default_port
49     parser = HttpParser.new
50     req = {}
51     assert parser.execute(req, "GET / HTTP/1.1\r\nHost: foo\r\n\r\n")
52     assert_equal 'foo', req['SERVER_NAME']
53     assert_equal '80', req['SERVER_PORT']
54     assert_nil req[:http_body]
55   end
57   def test_parse_server_host_alt_port
58     parser = HttpParser.new
59     req = {}
60     assert parser.execute(req, "GET / HTTP/1.1\r\nHost: foo:999\r\n\r\n")
61     assert_equal 'foo', req['SERVER_NAME']
62     assert_equal '999', req['SERVER_PORT']
63     assert_nil req[:http_body]
64   end
66   def test_parse_server_host_empty_port
67     parser = HttpParser.new
68     req = {}
69     assert parser.execute(req, "GET / HTTP/1.1\r\nHost: foo:\r\n\r\n")
70     assert_equal 'foo', req['SERVER_NAME']
71     assert_equal '80', req['SERVER_PORT']
72     assert_nil req[:http_body]
73   end
75   def test_parse_server_host_xfp_https
76     parser = HttpParser.new
77     req = {}
78     assert parser.execute(req, "GET / HTTP/1.1\r\nHost: foo:\r\n" \
79                           "X-Forwarded-Proto: https\r\n\r\n")
80     assert_equal 'foo', req['SERVER_NAME']
81     assert_equal '443', req['SERVER_PORT']
82     assert_nil req[:http_body]
83   end
85   def test_parse_strange_headers
86     parser = HttpParser.new
87     req = {}
88     should_be_good = "GET / HTTP/1.1\r\naaaaaaaaaaaaa:++++++++++\r\n\r\n"
89     assert parser.execute(req, should_be_good)
90     assert_nil req[:http_body]
92     # ref: http://thread.gmane.org/gmane.comp.lang.ruby.mongrel.devel/37/focus=45
93     # (note we got 'pen' mixed up with 'pound' in that thread,
94     # but the gist of it is still relevant: these nasty headers are irrelevant
95     #
96     # nasty_pound_header = "GET / HTTP/1.1\r\nX-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"
97     # parser = HttpParser.new
98     # req = {}
99     # assert parser.execute(req, nasty_pound_header, 0)
100   end
102   def test_parse_ie6_urls
103     %w(/some/random/path"
104        /some/random/path>
105        /some/random/path<
106        /we/love/you/ie6?q=<"">
107        /url?<="&>="
108        /mal"formed"?
109     ).each do |path|
110       parser = HttpParser.new
111       req = {}
112       sorta_safe = %(GET #{path} HTTP/1.1\r\n\r\n)
113       assert parser.execute(req, sorta_safe)
114       assert_nil req[:http_body]
115     end
116   end
117   
118   def test_parse_error
119     parser = HttpParser.new
120     req = {}
121     bad_http = "GET / SsUTF/1.1"
123     assert_raises(HttpParserError) { parser.execute(req, bad_http) }
124     parser.reset
125     assert(parser.execute({}, "GET / HTTP/1.0\r\n\r\n"))
126     assert_nil req[:http_body]
127   end
129   def test_piecemeal
130     parser = HttpParser.new
131     req = {}
132     http = "GET"
133     assert ! parser.execute(req, http)
134     assert_raises(HttpParserError) { parser.execute(req, http) }
135     assert ! parser.execute(req, http << " / HTTP/1.0")
136     assert_equal '/', req['REQUEST_PATH']
137     assert_equal '/', req['REQUEST_URI']
138     assert_equal 'GET', req['REQUEST_METHOD']
139     assert ! parser.execute(req, http << "\r\n")
140     assert_equal 'HTTP/1.0', req['HTTP_VERSION']
141     assert ! parser.execute(req, http << "\r")
142     assert parser.execute(req, http << "\n")
143     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
144     assert_nil req['FRAGMENT']
145     assert_equal '', req['QUERY_STRING']
146     assert_nil req[:http_body]
147   end
149   # not common, but underscores do appear in practice
150   def test_absolute_uri_underscores
151     parser = HttpParser.new
152     req = {}
153     http = "GET http://under_score.example.com/foo?q=bar HTTP/1.0\r\n\r\n"
154     assert parser.execute(req, http)
155     assert_equal 'http', req['rack.url_scheme']
156     assert_equal '/foo?q=bar', req['REQUEST_URI']
157     assert_equal '/foo', req['REQUEST_PATH']
158     assert_equal 'q=bar', req['QUERY_STRING']
160     assert_equal 'under_score.example.com', req['HTTP_HOST']
161     assert_equal 'under_score.example.com', req['SERVER_NAME']
162     assert_equal '80', req['SERVER_PORT']
163     assert_nil req[:http_body]
164   end
166   def test_absolute_uri
167     parser = HttpParser.new
168     req = {}
169     http = "GET http://example.com/foo?q=bar HTTP/1.0\r\n\r\n"
170     assert parser.execute(req, http)
171     assert_equal 'http', req['rack.url_scheme']
172     assert_equal '/foo?q=bar', req['REQUEST_URI']
173     assert_equal '/foo', req['REQUEST_PATH']
174     assert_equal 'q=bar', req['QUERY_STRING']
176     assert_equal 'example.com', req['HTTP_HOST']
177     assert_equal 'example.com', req['SERVER_NAME']
178     assert_equal '80', req['SERVER_PORT']
179   end
181   # X-Forwarded-Proto is not in rfc2616, absolute URIs are, however...
182   def test_absolute_uri_https
183     parser = HttpParser.new
184     req = {}
185     http = "GET https://example.com/foo?q=bar HTTP/1.1\r\n" \
186            "X-Forwarded-Proto: http\r\n\r\n"
187     assert parser.execute(req, http)
188     assert_equal 'https', req['rack.url_scheme']
189     assert_equal '/foo?q=bar', req['REQUEST_URI']
190     assert_equal '/foo', req['REQUEST_PATH']
191     assert_equal 'q=bar', req['QUERY_STRING']
193     assert_equal 'example.com', req['HTTP_HOST']
194     assert_equal 'example.com', req['SERVER_NAME']
195     assert_equal '443', req['SERVER_PORT']
196   end
198   # Host: header should be ignored for absolute URIs
199   def test_absolute_uri_with_port
200     parser = HttpParser.new
201     req = {}
202     http = "GET http://example.com:8080/foo?q=bar HTTP/1.2\r\n" \
203            "Host: bad.example.com\r\n\r\n"
204     assert parser.execute(req, http)
205     assert_equal 'http', req['rack.url_scheme']
206     assert_equal '/foo?q=bar', req['REQUEST_URI']
207     assert_equal '/foo', req['REQUEST_PATH']
208     assert_equal 'q=bar', req['QUERY_STRING']
210     assert_equal 'example.com:8080', req['HTTP_HOST']
211     assert_equal 'example.com', req['SERVER_NAME']
212     assert_equal '8080', req['SERVER_PORT']
213   end
215   def test_absolute_uri_with_empty_port
216     parser = HttpParser.new
217     req = {}
218     http = "GET https://example.com:/foo?q=bar HTTP/1.1\r\n" \
219            "Host: bad.example.com\r\n\r\n"
220     assert parser.execute(req, http)
221     assert_equal 'https', req['rack.url_scheme']
222     assert_equal '/foo?q=bar', req['REQUEST_URI']
223     assert_equal '/foo', req['REQUEST_PATH']
224     assert_equal 'q=bar', req['QUERY_STRING']
226     assert_equal 'example.com:', req['HTTP_HOST']
227     assert_equal 'example.com', req['SERVER_NAME']
228     assert_equal '443', req['SERVER_PORT']
229   end
231   def test_put_body_oneshot
232     parser = HttpParser.new
233     req = {}
234     http = "PUT / HTTP/1.0\r\nContent-Length: 5\r\n\r\nabcde"
235     assert parser.execute(req, http)
236     assert_equal '/', req['REQUEST_PATH']
237     assert_equal '/', req['REQUEST_URI']
238     assert_equal 'PUT', req['REQUEST_METHOD']
239     assert_equal 'HTTP/1.0', req['HTTP_VERSION']
240     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
241     assert_equal "abcde", req[:http_body]
242   end
244   def test_put_body_later
245     parser = HttpParser.new
246     req = {}
247     http = "PUT /l HTTP/1.0\r\nContent-Length: 5\r\n\r\n"
248     assert parser.execute(req, http)
249     assert_equal '/l', req['REQUEST_PATH']
250     assert_equal '/l', req['REQUEST_URI']
251     assert_equal 'PUT', req['REQUEST_METHOD']
252     assert_equal 'HTTP/1.0', req['HTTP_VERSION']
253     assert_equal 'HTTP/1.1', req['SERVER_PROTOCOL']
254     assert_equal "", req[:http_body]
255   end
257   def test_unknown_methods
258     %w(GETT HEADR XGET XHEAD).each { |m|
259       parser = HttpParser.new
260       req = {}
261       s = "#{m} /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
262       ok = false
263       assert_nothing_raised do
264         ok = parser.execute(req, s)
265       end
266       assert ok
267       assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
268       assert_equal 'posts-17408', req['FRAGMENT']
269       assert_equal 'page=1', req['QUERY_STRING']
270       assert_equal "", req[:http_body]
271       assert_equal m, req['REQUEST_METHOD']
272     }
273   end
275   def test_fragment_in_uri
276     parser = HttpParser.new
277     req = {}
278     get = "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
279     ok = false
280     assert_nothing_raised do
281       ok = parser.execute(req, get)
282     end
283     assert ok
284     assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
285     assert_equal 'posts-17408', req['FRAGMENT']
286     assert_equal 'page=1', req['QUERY_STRING']
287     assert_nil req[:http_body]
288   end
290   # lame random garbage maker
291   def rand_data(min, max, readable=true)
292     count = min + ((rand(max)+1) *10).to_i
293     res = count.to_s + "/"
294     
295     if readable
296       res << Digest::SHA1.hexdigest(rand(count * 100).to_s) * (count / 40)
297     else
298       res << Digest::SHA1.digest(rand(count * 100).to_s) * (count / 20)
299     end
301     return res
302   end
303   
305   def test_horrible_queries
306     parser = HttpParser.new
308     # then that large header names are caught
309     10.times do |c|
310       get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-#{rand_data(1024, 1024+(c*1024))}: Test\r\n\r\n"
311       assert_raises Unicorn::HttpParserError do
312         parser.execute({}, get)
313         parser.reset
314       end
315     end
317     # then that large mangled field values are caught
318     10.times do |c|
319       get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-Test: #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
320       assert_raises Unicorn::HttpParserError do
321         parser.execute({}, get)
322         parser.reset
323       end
324     end
326     # then large headers are rejected too
327     get = "GET /#{rand_data(10,120)} HTTP/1.1\r\n"
328     get << "X-Test: test\r\n" * (80 * 1024)
329     assert_raises Unicorn::HttpParserError do
330       parser.execute({}, get)
331       parser.reset
332     end
334     # finally just that random garbage gets blocked all the time
335     10.times do |c|
336       get = "GET #{rand_data(1024, 1024+(c*1024), false)} #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
337       assert_raises Unicorn::HttpParserError do
338         parser.execute({}, get)
339         parser.reset
340       end
341     end
343   end