2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include "http11_parser.h"
12 static http_parser
*data_get(VALUE self
)
16 Data_Get_Struct(self
, http_parser
, http
);
18 rb_raise(rb_eArgError
, "NULL found for http when shouldn't be.");
23 #define RSTRING_PTR(s) (RSTRING(s)->ptr)
26 #define RSTRING_LEN(s) (RSTRING(s)->len)
29 static VALUE mUnicorn
;
30 static VALUE cHttpParser
;
31 static VALUE eHttpParserError
;
32 static VALUE sym_http_body
;
34 #define HTTP_PREFIX "HTTP_"
35 #define HTTP_PREFIX_LEN (sizeof(HTTP_PREFIX) - 1)
37 static VALUE global_rack_url_scheme
;
38 static VALUE global_request_method
;
39 static VALUE global_request_uri
;
40 static VALUE global_fragment
;
41 static VALUE global_query_string
;
42 static VALUE global_http_version
;
43 static VALUE global_request_path
;
44 static VALUE global_path_info
;
45 static VALUE global_server_name
;
46 static VALUE global_server_port
;
47 static VALUE global_server_protocol
;
48 static VALUE global_server_protocol_value
;
49 static VALUE global_http_host
;
50 static VALUE global_http_x_forwarded_proto
;
51 static VALUE global_port_80
;
52 static VALUE global_port_443
;
53 static VALUE global_localhost
;
54 static VALUE global_http
;
56 /** Defines common length and error messages for input length validation. */
57 #define DEF_MAX_LENGTH(N, length) \
58 static const size_t MAX_##N##_LENGTH = length; \
59 static const char * const MAX_##N##_LENGTH_ERR = \
60 "HTTP element " # N " is longer than the " # length " allowed length."
63 * Validates the max length of given input and throws an HttpParserError
66 #define VALIDATE_MAX_LENGTH(len, N) do { \
67 if (len > MAX_##N##_LENGTH) \
68 rb_raise(eHttpParserError, MAX_##N##_LENGTH_ERR); \
71 /** Defines global strings in the init method. */
72 #define DEF_GLOBAL(N, val) do { \
73 global_##N = rb_obj_freeze(rb_str_new(val, sizeof(val) - 1)); \
74 rb_global_variable(&global_##N); \
77 /* Defines the maximum allowed lengths for various input elements.*/
78 DEF_MAX_LENGTH(FIELD_NAME
, 256);
79 DEF_MAX_LENGTH(FIELD_VALUE
, 80 * 1024);
80 DEF_MAX_LENGTH(REQUEST_URI
, 1024 * 12);
81 DEF_MAX_LENGTH(FRAGMENT
, 1024); /* Don't know if this length is specified somewhere or not */
82 DEF_MAX_LENGTH(REQUEST_PATH
, 1024);
83 DEF_MAX_LENGTH(QUERY_STRING
, (1024 * 10));
84 DEF_MAX_LENGTH(HEADER
, (1024 * (80 + 32)));
87 const signed long len
;
93 * A list of common HTTP headers we expect to receive.
94 * This allows us to avoid repeatedly creating identical string
95 * objects to be used with rb_hash_aset().
97 static struct common_field common_http_fields
[] = {
98 # define f(N) { (sizeof(N) - 1), N, Qnil }
101 f("ACCEPT_ENCODING"),
102 f("ACCEPT_LANGUAGE"),
107 f("CONTENT_ENCODING"),
116 f("IF_MODIFIED_SINCE"),
119 f("IF_UNMODIFIED_SINCE"),
120 f("KEEP_ALIVE"), /* Firefox sends this */
123 f("PROXY_AUTHORIZATION"),
128 f("TRANSFER_ENCODING"),
132 f("X_FORWARDED_FOR"), /* common for proxies */
133 f("X_FORWARDED_PROTO"), /* common for proxies */
134 f("X_REAL_IP"), /* common for proxies */
139 /* this function is not performance-critical */
140 static void init_common_fields(void)
143 struct common_field
*cf
= common_http_fields
;
144 char tmp
[256]; /* MAX_FIELD_NAME_LENGTH */
145 memcpy(tmp
, HTTP_PREFIX
, HTTP_PREFIX_LEN
);
147 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); cf
++, i
++) {
148 /* Rack doesn't like certain headers prefixed with "HTTP_" */
149 if (!strcmp("CONTENT_LENGTH", cf
->name
) ||
150 !strcmp("CONTENT_TYPE", cf
->name
)) {
151 cf
->value
= rb_str_new(cf
->name
, cf
->len
);
153 memcpy(tmp
+ HTTP_PREFIX_LEN
, cf
->name
, cf
->len
+ 1);
154 cf
->value
= rb_str_new(tmp
, HTTP_PREFIX_LEN
+ cf
->len
);
156 cf
->value
= rb_obj_freeze(cf
->value
);
157 rb_global_variable(&cf
->value
);
161 static VALUE
find_common_field_value(const char *field
, size_t flen
)
164 struct common_field
*cf
= common_http_fields
;
165 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); i
++, cf
++) {
166 if (cf
->len
== flen
&& !memcmp(cf
->name
, field
, flen
))
172 static void http_field(void *data
, const char *field
,
173 size_t flen
, const char *value
, size_t vlen
)
175 VALUE req
= (VALUE
)data
;
178 VALIDATE_MAX_LENGTH(flen
, FIELD_NAME
);
179 VALIDATE_MAX_LENGTH(vlen
, FIELD_VALUE
);
181 f
= find_common_field_value(field
, flen
);
185 * We got a strange header that we don't have a memoized value for.
186 * Fallback to creating a new string to use as a hash key.
188 * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
189 * in my testing, because: there's no minimum allocation length (and
190 * no check for it, either), RSTRING_LEN(f) does not need to be
191 * written twice, and and RSTRING_PTR(f) will already be
192 * null-terminated for us.
194 f
= rb_str_new(NULL
, HTTP_PREFIX_LEN
+ flen
);
195 memcpy(RSTRING_PTR(f
), HTTP_PREFIX
, HTTP_PREFIX_LEN
);
196 memcpy(RSTRING_PTR(f
) + HTTP_PREFIX_LEN
, field
, flen
);
197 assert(*(RSTRING_PTR(f
) + RSTRING_LEN(f
)) == '\0'); /* paranoia */
198 /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */
199 } else if (f
== global_http_host
&& rb_hash_aref(req
, f
) != Qnil
) {
203 rb_hash_aset(req
, f
, rb_str_new(value
, vlen
));
206 static void request_method(void *data
, const char *at
, size_t length
)
208 VALUE req
= (VALUE
)data
;
211 val
= rb_str_new(at
, length
);
212 rb_hash_aset(req
, global_request_method
, val
);
215 static void scheme(void *data
, const char *at
, size_t length
)
217 rb_hash_aset((VALUE
)data
, global_rack_url_scheme
, rb_str_new(at
, length
));
220 static void host(void *data
, const char *at
, size_t length
)
222 rb_hash_aset((VALUE
)data
, global_http_host
, rb_str_new(at
, length
));
225 static void request_uri(void *data
, const char *at
, size_t length
)
227 VALUE req
= (VALUE
)data
;
230 VALIDATE_MAX_LENGTH(length
, REQUEST_URI
);
232 val
= rb_str_new(at
, length
);
233 rb_hash_aset(req
, global_request_uri
, val
);
235 /* "OPTIONS * HTTP/1.1\r\n" is a valid request */
236 if (length
== 1 && *at
== '*') {
237 val
= rb_str_new(NULL
, 0);
238 rb_hash_aset(req
, global_request_path
, val
);
239 rb_hash_aset(req
, global_path_info
, val
);
243 static void fragment(void *data
, const char *at
, size_t length
)
245 VALUE req
= (VALUE
)data
;
248 VALIDATE_MAX_LENGTH(length
, FRAGMENT
);
250 val
= rb_str_new(at
, length
);
251 rb_hash_aset(req
, global_fragment
, val
);
254 static void request_path(void *data
, const char *at
, size_t length
)
256 VALUE req
= (VALUE
)data
;
259 VALIDATE_MAX_LENGTH(length
, REQUEST_PATH
);
261 val
= rb_str_new(at
, length
);
262 rb_hash_aset(req
, global_request_path
, val
);
264 /* rack says PATH_INFO must start with "/" or be empty */
265 if (!(length
== 1 && *at
== '*'))
266 rb_hash_aset(req
, global_path_info
, val
);
269 static void query_string(void *data
, const char *at
, size_t length
)
271 VALUE req
= (VALUE
)data
;
274 VALIDATE_MAX_LENGTH(length
, QUERY_STRING
);
276 val
= rb_str_new(at
, length
);
277 rb_hash_aset(req
, global_query_string
, val
);
280 static void http_version(void *data
, const char *at
, size_t length
)
282 VALUE req
= (VALUE
)data
;
283 VALUE val
= rb_str_new(at
, length
);
284 rb_hash_aset(req
, global_http_version
, val
);
287 /** Finalizes the request header to have a bunch of stuff that's needed. */
288 static void header_done(void *data
, const char *at
, size_t length
)
290 VALUE req
= (VALUE
)data
;
291 VALUE server_name
= global_localhost
;
292 VALUE server_port
= global_port_80
;
295 /* rack requires QUERY_STRING */
296 if (rb_hash_aref(req
, global_query_string
) == Qnil
)
297 rb_hash_aset(req
, global_query_string
, rb_str_new(NULL
, 0));
299 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
300 if ((temp
= rb_hash_aref(req
, global_rack_url_scheme
)) == Qnil
) {
301 if ((temp
= rb_hash_aref(req
, global_http_x_forwarded_proto
)) != Qnil
&&
302 RSTRING_LEN(temp
) == 5 &&
303 !memcmp("https", RSTRING_PTR(temp
), 5))
304 server_port
= global_port_443
;
307 rb_hash_aset(req
, global_rack_url_scheme
, temp
);
308 } else if (RSTRING_LEN(temp
) == 5 && !memcmp("https", RSTRING_PTR(temp
), 5)) {
309 server_port
= global_port_443
;
312 /* parse and set the SERVER_NAME and SERVER_PORT variables */
313 if ((temp
= rb_hash_aref(req
, global_http_host
)) != Qnil
) {
314 char *colon
= memchr(RSTRING_PTR(temp
), ':', RSTRING_LEN(temp
));
316 long port_start
= colon
- RSTRING_PTR(temp
) + 1;
318 server_name
= rb_str_substr(temp
, 0, colon
- RSTRING_PTR(temp
));
319 if ((RSTRING_LEN(temp
) - port_start
) > 0)
320 server_port
= rb_str_substr(temp
, port_start
, RSTRING_LEN(temp
));
325 rb_hash_aset(req
, global_server_name
, server_name
);
326 rb_hash_aset(req
, global_server_port
, server_port
);
327 rb_hash_aset(req
, global_server_protocol
, global_server_protocol_value
);
329 /* grab the initial body and stuff it into the hash */
330 temp
= rb_hash_aref(req
, global_request_method
);
332 long len
= RSTRING_LEN(temp
);
333 char *ptr
= RSTRING_PTR(temp
);
335 if (memcmp(ptr
, "HEAD", len
) && memcmp(ptr
, "GET", len
))
336 rb_hash_aset(req
, sym_http_body
, rb_str_new(at
, length
));
340 static void HttpParser_free(void *data
) {
349 static VALUE
HttpParser_alloc(VALUE klass
)
352 http_parser
*hp
= ALLOC_N(http_parser
, 1);
354 http_parser_init(hp
);
356 obj
= Data_Wrap_Struct(klass
, NULL
, HttpParser_free
, hp
);
364 * parser.new -> parser
366 * Creates a new parser.
368 static VALUE
HttpParser_init(VALUE self
)
370 http_parser_init(data_get(self
));
378 * parser.reset -> nil
380 * Resets the parser to it's initial state so that you can reuse it
381 * rather than making new ones.
383 static VALUE
HttpParser_reset(VALUE self
)
385 http_parser_init(data_get(self
));
393 * parser.execute(req_hash, data) -> true/false
395 * Takes a Hash and a String of data, parses the String of data filling
396 * in the Hash returning a boolean to indicate whether or not parsing
399 * This function now throws an exception when there is a parsing error.
400 * This makes the logic for working with the parser much easier. You
401 * will need to wrap the parser with an exception handling block.
404 static VALUE
HttpParser_execute(VALUE self
, VALUE req_hash
, VALUE data
)
406 http_parser
*http
= data_get(self
);
407 char *dptr
= RSTRING_PTR(data
);
408 long dlen
= RSTRING_LEN(data
);
410 if (http
->nread
< dlen
) {
411 http
->data
= (void *)req_hash
;
412 http_parser_execute(http
, dptr
, dlen
);
414 VALIDATE_MAX_LENGTH(http
->nread
, HEADER
);
416 if (!http_parser_has_error(http
))
417 return http_parser_is_finished(http
) ? Qtrue
: Qfalse
;
419 rb_raise(eHttpParserError
, "Invalid HTTP format, parsing fails.");
421 rb_raise(eHttpParserError
, "Requested start is after data buffer end.");
424 void Init_http11(void)
426 mUnicorn
= rb_define_module("Unicorn");
428 DEF_GLOBAL(rack_url_scheme
, "rack.url_scheme");
429 DEF_GLOBAL(request_method
, "REQUEST_METHOD");
430 DEF_GLOBAL(request_uri
, "REQUEST_URI");
431 DEF_GLOBAL(fragment
, "FRAGMENT");
432 DEF_GLOBAL(query_string
, "QUERY_STRING");
433 DEF_GLOBAL(http_version
, "HTTP_VERSION");
434 DEF_GLOBAL(request_path
, "REQUEST_PATH");
435 DEF_GLOBAL(path_info
, "PATH_INFO");
436 DEF_GLOBAL(server_name
, "SERVER_NAME");
437 DEF_GLOBAL(server_port
, "SERVER_PORT");
438 DEF_GLOBAL(server_protocol
, "SERVER_PROTOCOL");
439 DEF_GLOBAL(server_protocol_value
, "HTTP/1.1");
440 DEF_GLOBAL(http_x_forwarded_proto
, "HTTP_X_FORWARDED_PROTO");
441 DEF_GLOBAL(port_80
, "80");
442 DEF_GLOBAL(port_443
, "443");
443 DEF_GLOBAL(localhost
, "localhost");
444 DEF_GLOBAL(http
, "http");
446 eHttpParserError
= rb_define_class_under(mUnicorn
, "HttpParserError", rb_eIOError
);
448 cHttpParser
= rb_define_class_under(mUnicorn
, "HttpParser", rb_cObject
);
449 rb_define_alloc_func(cHttpParser
, HttpParser_alloc
);
450 rb_define_method(cHttpParser
, "initialize", HttpParser_init
,0);
451 rb_define_method(cHttpParser
, "reset", HttpParser_reset
,0);
452 rb_define_method(cHttpParser
, "execute", HttpParser_execute
,2);
453 sym_http_body
= ID2SYM(rb_intern("http_body"));
454 init_common_fields();
455 global_http_host
= find_common_field_value("HOST", 4);
456 assert(global_http_host
!= Qnil
);