2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include "http11_parser.h"
13 #define RSTRING_PTR(s) (RSTRING(s)->ptr)
16 #define RSTRING_LEN(s) (RSTRING(s)->len)
19 static VALUE mUnicorn
;
20 static VALUE cHttpParser
;
21 static VALUE eHttpParserError
;
22 static VALUE sym_http_body
;
24 #define HTTP_PREFIX "HTTP_"
25 #define HTTP_PREFIX_LEN (sizeof(HTTP_PREFIX) - 1)
27 static VALUE global_rack_url_scheme
;
28 static VALUE global_request_method
;
29 static VALUE global_request_uri
;
30 static VALUE global_fragment
;
31 static VALUE global_query_string
;
32 static VALUE global_http_version
;
33 static VALUE global_request_path
;
34 static VALUE global_path_info
;
35 static VALUE global_server_name
;
36 static VALUE global_server_port
;
37 static VALUE global_server_protocol
;
38 static VALUE global_server_protocol_value
;
39 static VALUE global_http_host
;
40 static VALUE global_http_x_forwarded_proto
;
41 static VALUE global_port_80
;
42 static VALUE global_port_443
;
43 static VALUE global_localhost
;
44 static VALUE global_http
;
46 /** Defines common length and error messages for input length validation. */
47 #define DEF_MAX_LENGTH(N,length) const size_t MAX_##N##_LENGTH = length; const char *MAX_##N##_LENGTH_ERR = "HTTP element " # N " is longer than the " # length " allowed length."
49 /** Validates the max length of given input and throws an HttpParserError exception if over. */
50 #define VALIDATE_MAX_LENGTH(len, N) if(len > MAX_##N##_LENGTH) { rb_raise(eHttpParserError, MAX_##N##_LENGTH_ERR); }
52 /** Defines global strings in the init method. */
53 #define DEF_GLOBAL(N, val) global_##N = rb_obj_freeze(rb_str_new2(val)); rb_global_variable(&global_##N)
56 /* Defines the maximum allowed lengths for various input elements.*/
57 DEF_MAX_LENGTH(FIELD_NAME
, 256);
58 DEF_MAX_LENGTH(FIELD_VALUE
, 80 * 1024);
59 DEF_MAX_LENGTH(REQUEST_URI
, 1024 * 12);
60 DEF_MAX_LENGTH(FRAGMENT
, 1024); /* Don't know if this length is specified somewhere or not */
61 DEF_MAX_LENGTH(REQUEST_PATH
, 1024);
62 DEF_MAX_LENGTH(QUERY_STRING
, (1024 * 10));
63 DEF_MAX_LENGTH(HEADER
, (1024 * (80 + 32)));
66 const signed long len
;
72 * A list of common HTTP headers we expect to receive.
73 * This allows us to avoid repeatedly creating identical string
74 * objects to be used with rb_hash_aset().
76 static struct common_field common_http_fields
[] = {
77 # define f(N) { (sizeof(N) - 1), N, Qnil }
86 f("CONTENT_ENCODING"),
95 f("IF_MODIFIED_SINCE"),
98 f("IF_UNMODIFIED_SINCE"),
99 f("KEEP_ALIVE"), /* Firefox sends this */
102 f("PROXY_AUTHORIZATION"),
107 f("TRANSFER_ENCODING"),
111 f("X_FORWARDED_FOR"), /* common for proxies */
112 f("X_FORWARDED_PROTO"), /* common for proxies */
113 f("X_REAL_IP"), /* common for proxies */
118 /* this function is not performance-critical */
119 static void init_common_fields(void)
122 struct common_field
*cf
= common_http_fields
;
123 char tmp
[256]; /* MAX_FIELD_NAME_LENGTH */
124 memcpy(tmp
, HTTP_PREFIX
, HTTP_PREFIX_LEN
);
126 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); cf
++, i
++) {
127 /* Rack doesn't like certain headers prefixed with "HTTP_" */
128 if (!strcmp("CONTENT_LENGTH", cf
->name
) ||
129 !strcmp("CONTENT_TYPE", cf
->name
)) {
130 cf
->value
= rb_str_new(cf
->name
, cf
->len
);
132 memcpy(tmp
+ HTTP_PREFIX_LEN
, cf
->name
, cf
->len
+ 1);
133 cf
->value
= rb_str_new(tmp
, HTTP_PREFIX_LEN
+ cf
->len
);
135 cf
->value
= rb_obj_freeze(cf
->value
);
136 rb_global_variable(&cf
->value
);
140 static VALUE
find_common_field_value(const char *field
, size_t flen
)
143 struct common_field
*cf
= common_http_fields
;
144 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); i
++, cf
++) {
145 if (cf
->len
== flen
&& !memcmp(cf
->name
, field
, flen
))
151 static void http_field(void *data
, const char *field
,
152 size_t flen
, const char *value
, size_t vlen
)
154 VALUE req
= (VALUE
)data
;
158 VALIDATE_MAX_LENGTH(flen
, FIELD_NAME
);
159 VALIDATE_MAX_LENGTH(vlen
, FIELD_VALUE
);
161 v
= rb_str_new(value
, vlen
);
163 f
= find_common_field_value(field
, flen
);
167 * We got a strange header that we don't have a memoized value for.
168 * Fallback to creating a new string to use as a hash key.
170 * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
171 * in my testing, because: there's no minimum allocation length (and
172 * no check for it, either), RSTRING_LEN(f) does not need to be
173 * written twice, and and RSTRING_PTR(f) will already be
174 * null-terminated for us.
176 f
= rb_str_new(NULL
, HTTP_PREFIX_LEN
+ flen
);
177 memcpy(RSTRING_PTR(f
), HTTP_PREFIX
, HTTP_PREFIX_LEN
);
178 memcpy(RSTRING_PTR(f
) + HTTP_PREFIX_LEN
, field
, flen
);
179 assert(*(RSTRING_PTR(f
) + RSTRING_LEN(f
)) == '\0'); /* paranoia */
180 /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */
183 rb_hash_aset(req
, f
, v
);
186 static void request_method(void *data
, const char *at
, size_t length
)
188 VALUE req
= (VALUE
)data
;
191 val
= rb_str_new(at
, length
);
192 rb_hash_aset(req
, global_request_method
, val
);
195 static void request_uri(void *data
, const char *at
, size_t length
)
197 VALUE req
= (VALUE
)data
;
200 VALIDATE_MAX_LENGTH(length
, REQUEST_URI
);
202 val
= rb_str_new(at
, length
);
203 rb_hash_aset(req
, global_request_uri
, val
);
205 /* "OPTIONS * HTTP/1.1\r\n" is a valid request */
206 if (length
== 1 && *at
== '*') {
207 val
= rb_str_new(NULL
, 0);
208 rb_hash_aset(req
, global_request_path
, val
);
209 rb_hash_aset(req
, global_path_info
, val
);
213 static void fragment(void *data
, const char *at
, size_t length
)
215 VALUE req
= (VALUE
)data
;
218 VALIDATE_MAX_LENGTH(length
, FRAGMENT
);
220 val
= rb_str_new(at
, length
);
221 rb_hash_aset(req
, global_fragment
, val
);
224 static void request_path(void *data
, const char *at
, size_t length
)
226 VALUE req
= (VALUE
)data
;
229 VALIDATE_MAX_LENGTH(length
, REQUEST_PATH
);
231 val
= rb_str_new(at
, length
);
232 rb_hash_aset(req
, global_request_path
, val
);
234 /* rack says PATH_INFO must start with "/" or be empty */
235 if (!(length
== 1 && *at
== '*'))
236 rb_hash_aset(req
, global_path_info
, val
);
239 static void query_string(void *data
, const char *at
, size_t length
)
241 VALUE req
= (VALUE
)data
;
244 VALIDATE_MAX_LENGTH(length
, QUERY_STRING
);
246 val
= rb_str_new(at
, length
);
247 rb_hash_aset(req
, global_query_string
, val
);
250 static void http_version(void *data
, const char *at
, size_t length
)
252 VALUE req
= (VALUE
)data
;
253 VALUE val
= rb_str_new(at
, length
);
254 rb_hash_aset(req
, global_http_version
, val
);
257 /** Finalizes the request header to have a bunch of stuff that's needed. */
258 static void header_done(void *data
, const char *at
, size_t length
)
260 VALUE req
= (VALUE
)data
;
261 VALUE server_name
= global_localhost
;
262 VALUE server_port
= global_port_80
;
265 /* rack requires QUERY_STRING */
266 if (rb_hash_aref(req
, global_query_string
) == Qnil
)
267 rb_hash_aset(req
, global_query_string
, rb_str_new(NULL
, 0));
269 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
270 if ((temp
= rb_hash_aref(req
, global_http_x_forwarded_proto
)) != Qnil
&&
271 RSTRING_LEN(temp
) == 5 &&
272 !memcmp("https", RSTRING_PTR(temp
), 5))
273 server_port
= global_port_443
;
276 rb_hash_aset(req
, global_rack_url_scheme
, temp
);
278 /* parse and set the SERVER_NAME and SERVER_PORT variables */
279 if ((temp
= rb_hash_aref(req
, global_http_host
)) != Qnil
) {
280 char *colon
= memchr(RSTRING_PTR(temp
), ':', RSTRING_LEN(temp
));
282 server_name
= rb_str_substr(temp
, 0, colon
- RSTRING_PTR(temp
));
283 server_port
= rb_str_substr(temp
, colon
- RSTRING_PTR(temp
)+1,
289 rb_hash_aset(req
, global_server_name
, server_name
);
290 rb_hash_aset(req
, global_server_port
, server_port
);
292 /* grab the initial body and stuff it into the hash */
293 rb_hash_aset(req
, sym_http_body
, rb_str_new(at
, length
));
294 rb_hash_aset(req
, global_server_protocol
, global_server_protocol_value
);
297 static void HttpParser_free(void *data
) {
306 static VALUE
HttpParser_alloc(VALUE klass
)
309 http_parser
*hp
= ALLOC_N(http_parser
, 1);
311 hp
->http_field
= http_field
;
312 hp
->request_method
= request_method
;
313 hp
->request_uri
= request_uri
;
314 hp
->fragment
= fragment
;
315 hp
->request_path
= request_path
;
316 hp
->query_string
= query_string
;
317 hp
->http_version
= http_version
;
318 hp
->header_done
= header_done
;
319 http_parser_init(hp
);
321 obj
= Data_Wrap_Struct(klass
, NULL
, HttpParser_free
, hp
);
329 * parser.new -> parser
331 * Creates a new parser.
333 static VALUE
HttpParser_init(VALUE self
)
335 http_parser
*http
= NULL
;
336 DATA_GET(self
, http_parser
, http
);
337 http_parser_init(http
);
345 * parser.reset -> nil
347 * Resets the parser to it's initial state so that you can reuse it
348 * rather than making new ones.
350 static VALUE
HttpParser_reset(VALUE self
)
352 http_parser
*http
= NULL
;
353 DATA_GET(self
, http_parser
, http
);
354 http_parser_init(http
);
362 * parser.execute(req_hash, data) -> true/false
364 * Takes a Hash and a String of data, parses the String of data filling
365 * in the Hash returning a boolean to indicate whether or not parsing
368 * This function now throws an exception when there is a parsing error.
369 * This makes the logic for working with the parser much easier. You
370 * will need to wrap the parser with an exception handling block.
373 static VALUE
HttpParser_execute(VALUE self
, VALUE req_hash
, VALUE data
)
376 char *dptr
= RSTRING_PTR(data
);
377 long dlen
= RSTRING_LEN(data
);
379 DATA_GET(self
, http_parser
, http
);
381 if (http
->nread
< dlen
) {
382 http
->data
= (void *)req_hash
;
383 http_parser_execute(http
, dptr
, dlen
);
385 VALIDATE_MAX_LENGTH(http
->nread
, HEADER
);
387 if (!http_parser_has_error(http
))
388 return http_parser_is_finished(http
) ? Qtrue
: Qfalse
;
390 rb_raise(eHttpParserError
, "Invalid HTTP format, parsing fails.");
392 rb_raise(eHttpParserError
, "Requested start is after data buffer end.");
398 mUnicorn
= rb_define_module("Unicorn");
400 DEF_GLOBAL(rack_url_scheme
, "rack.url_scheme");
401 DEF_GLOBAL(request_method
, "REQUEST_METHOD");
402 DEF_GLOBAL(request_uri
, "REQUEST_URI");
403 DEF_GLOBAL(fragment
, "FRAGMENT");
404 DEF_GLOBAL(query_string
, "QUERY_STRING");
405 DEF_GLOBAL(http_version
, "HTTP_VERSION");
406 DEF_GLOBAL(request_path
, "REQUEST_PATH");
407 DEF_GLOBAL(path_info
, "PATH_INFO");
408 DEF_GLOBAL(server_name
, "SERVER_NAME");
409 DEF_GLOBAL(server_port
, "SERVER_PORT");
410 DEF_GLOBAL(server_protocol
, "SERVER_PROTOCOL");
411 DEF_GLOBAL(server_protocol_value
, "HTTP/1.1");
412 DEF_GLOBAL(http_host
, "HTTP_HOST");
413 DEF_GLOBAL(http_x_forwarded_proto
, "HTTP_X_FORWARDED_PROTO");
414 DEF_GLOBAL(port_80
, "80");
415 DEF_GLOBAL(port_443
, "443");
416 DEF_GLOBAL(localhost
, "localhost");
417 DEF_GLOBAL(http
, "http");
419 eHttpParserError
= rb_define_class_under(mUnicorn
, "HttpParserError", rb_eIOError
);
421 cHttpParser
= rb_define_class_under(mUnicorn
, "HttpParser", rb_cObject
);
422 rb_define_alloc_func(cHttpParser
, HttpParser_alloc
);
423 rb_define_method(cHttpParser
, "initialize", HttpParser_init
,0);
424 rb_define_method(cHttpParser
, "reset", HttpParser_reset
,0);
425 rb_define_method(cHttpParser
, "execute", HttpParser_execute
,2);
426 sym_http_body
= ID2SYM(rb_intern("http_body"));
427 init_common_fields();