2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include <sys/types.h>
11 #include "common_field_optimization.h"
12 #include "global_variables.h"
15 #define UH_FL_CHUNKED 0x1
16 #define UH_FL_HASBODY 0x2
17 #define UH_FL_INBODY 0x4
18 #define UH_FL_HASTRAILER 0x8
19 #define UH_FL_INTRAILER 0x10
20 #define UH_FL_INCHUNK 0x20
21 #define UH_FL_KAMETHOD 0x40
22 #define UH_FL_KAVERSION 0x80
23 #define UH_FL_HASHEADER 0x100
25 /* both of these flags need to be set for keepalive to be supported */
26 #define UH_FL_KEEPALIVE (UH_FL_KAMETHOD | UH_FL_KAVERSION)
28 /* keep this small for Rainbows! since every client has one */
30 int cs; /* Ragel internal state */
34 union { /* these 2 fields don't nest */
39 size_t field_len; /* only used during header processing */
40 size_t dest_offset; /* only used during body processing */
42 VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
49 static void finalize_header(struct http_parser *hp, VALUE req);
51 #define REMAINING (unsigned long)(pe - p)
52 #define LEN(AT, FPC) (FPC - buffer - hp->AT)
53 #define MARK(M,FPC) (hp->M = (FPC) - buffer)
54 #define PTR_TO(F) (buffer + hp->F)
55 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
57 #define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
58 #define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
59 #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
60 #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
63 * handles values of the "Connection:" header, keepalive is implied
64 * for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0
65 * Additionally, we require GET/HEAD requests to support keepalive.
67 static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
69 /* REQUEST_METHOD is always set before any headers */
70 if (HP_FL_TEST(hp, KAMETHOD)) {
71 if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
72 /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
73 HP_FL_SET(hp, KAVERSION);
74 } else if (STR_CSTR_CASE_EQ(val, "close")) {
76 * it doesn't matter what HTTP version or request method we have,
77 * if a client says "Connection: close", we disable keepalive
79 HP_FL_UNSET(hp, KEEPALIVE);
82 * client could've sent anything, ignore it for now. Maybe
83 * "HP_FL_UNSET(hp, KEEPALIVE);" just in case?
84 * Raising an exception might be too mean...
91 request_method(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
96 * we only support keepalive for GET and HEAD requests for now other
97 * methods are too rarely seen to be worth optimizing. POST is unsafe
98 * since some clients send extra bytes after POST bodies.
100 if (CONST_MEM_EQ("GET", ptr, len)) {
101 HP_FL_SET(hp, KAMETHOD);
103 } else if (CONST_MEM_EQ("HEAD", ptr, len)) {
104 HP_FL_SET(hp, KAMETHOD);
107 v = rb_str_new(ptr, len);
109 rb_hash_aset(req, g_request_method, v);
113 http_version(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
117 HP_FL_SET(hp, HASHEADER);
119 if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
120 /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
121 HP_FL_SET(hp, KAVERSION);
123 } else if (CONST_MEM_EQ("HTTP/1.0", ptr, len)) {
126 v = rb_str_new(ptr, len);
128 rb_hash_aset(req, g_server_protocol, v);
129 rb_hash_aset(req, g_http_version, v);
132 static inline void hp_invalid_if_trailer(struct http_parser *hp)
134 if (HP_FL_TEST(hp, INTRAILER))
135 rb_raise(eHttpParserError, "invalid Trailer");
138 static void write_cont_value(struct http_parser *hp,
139 char *buffer, const char *p)
143 if (hp->cont == Qfalse)
144 rb_raise(eHttpParserError, "invalid continuation line");
146 return; /* we're ignoring this header (probably Host:) */
148 assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string");
149 assert(hp->mark > 0 && "impossible continuation line offset");
151 if (LEN(mark, p) == 0)
154 if (RSTRING_LEN(hp->cont) > 0)
159 if (RSTRING_LEN(hp->cont) > 0) {
160 assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space");
163 rb_str_buf_cat(hp->cont, vptr, LEN(mark, p));
166 static void write_value(VALUE req, struct http_parser *hp,
167 const char *buffer, const char *p)
169 VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len);
173 VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
174 v = LEN(mark, p) == 0 ? rb_str_buf_new(128) : STR_NEW(mark, p);
176 VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME);
177 f = uncommon_field(PTR_TO(start.field), hp->s.field_len);
178 } else if (f == g_http_connection) {
179 hp_keepalive_connection(hp, v);
180 } else if (f == g_content_length) {
181 hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
182 if (hp->len.content < 0)
183 rb_raise(eHttpParserError, "invalid Content-Length");
184 HP_FL_SET(hp, HASBODY);
185 hp_invalid_if_trailer(hp);
186 } else if (f == g_http_transfer_encoding) {
187 if (STR_CSTR_CASE_EQ(v, "chunked")) {
188 HP_FL_SET(hp, CHUNKED);
189 HP_FL_SET(hp, HASBODY);
191 hp_invalid_if_trailer(hp);
192 } else if (f == g_http_trailer) {
193 HP_FL_SET(hp, HASTRAILER);
194 hp_invalid_if_trailer(hp);
196 assert(TYPE(f) == T_STRING && "memoized object is not a string");
200 e = rb_hash_aref(req, f);
202 hp->cont = rb_hash_aset(req, f, v);
203 } else if (f == g_http_host) {
205 * ignored, absolute URLs in REQUEST_URI take precedence over
206 * the Host: header (ref: rfc 2616, section 5.2.1)
210 rb_str_buf_cat(e, ",", 1);
211 hp->cont = rb_str_buf_append(e, v);
220 action mark {MARK(mark, fpc); }
222 action start_field { MARK(start.field, fpc); }
223 action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
224 action downcase_char { downcase_char(deconst(fpc)); }
225 action write_field { hp->s.field_len = LEN(start.field, fpc); }
226 action start_value { MARK(mark, fpc); }
227 action write_value { write_value(req, hp, buffer, fpc); }
228 action write_cont_value { write_cont_value(hp, buffer, fpc); }
229 action request_method {
230 request_method(hp, req, PTR_TO(mark), LEN(mark, fpc));
233 rb_hash_aset(req, g_rack_url_scheme, STR_NEW(mark, fpc));
236 rb_hash_aset(req, g_http_host, STR_NEW(mark, fpc));
241 VALIDATE_MAX_LENGTH(LEN(mark, fpc), REQUEST_URI);
242 str = rb_hash_aset(req, g_request_uri, STR_NEW(mark, fpc));
244 * "OPTIONS * HTTP/1.1\r\n" is a valid request, but we can't have '*'
245 * in REQUEST_PATH or PATH_INFO or else Rack::Lint will complain
247 if (STR_CSTR_EQ(str, "*")) {
248 str = rb_str_new(NULL, 0);
249 rb_hash_aset(req, g_path_info, str);
250 rb_hash_aset(req, g_request_path, str);
254 VALIDATE_MAX_LENGTH(LEN(mark, fpc), FRAGMENT);
255 rb_hash_aset(req, g_fragment, STR_NEW(mark, fpc));
257 action start_query {MARK(start.query, fpc); }
258 action query_string {
259 VALIDATE_MAX_LENGTH(LEN(start.query, fpc), QUERY_STRING);
260 rb_hash_aset(req, g_query_string, STR_NEW(start.query, fpc));
262 action http_version { http_version(hp, req, PTR_TO(mark), LEN(mark, fpc)); }
263 action request_path {
266 VALIDATE_MAX_LENGTH(LEN(mark, fpc), REQUEST_PATH);
267 val = rb_hash_aset(req, g_request_path, STR_NEW(mark, fpc));
269 /* rack says PATH_INFO must start with "/" or be empty */
270 if (!STR_CSTR_EQ(val, "*"))
271 rb_hash_aset(req, g_path_info, val);
273 action add_to_chunk_size {
274 hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
275 if (hp->len.chunk < 0)
276 rb_raise(eHttpParserError, "invalid chunk size");
279 finalize_header(hp, req);
281 cs = http_parser_first_final;
282 if (HP_FL_TEST(hp, HASBODY)) {
283 HP_FL_SET(hp, INBODY);
284 if (HP_FL_TEST(hp, CHUNKED))
285 cs = http_parser_en_ChunkedBody;
287 assert(!HP_FL_TEST(hp, CHUNKED) && "chunked encoding without body!");
290 * go back to Ruby so we can call the Rack application, we'll reenter
291 * the parser iff the body needs to be processed.
296 action end_trailers {
297 cs = http_parser_first_final;
301 action end_chunked_body {
302 HP_FL_SET(hp, INTRAILER);
303 cs = http_parser_en_Trailers;
305 assert(p <= pe && "buffer overflow after chunked body");
309 action skip_chunk_data {
310 skip_chunk_data_hack: {
311 size_t nr = MIN((size_t)hp->len.chunk, REMAINING);
312 memcpy(RSTRING_PTR(req) + hp->s.dest_offset, fpc, nr);
313 hp->s.dest_offset += nr;
316 assert(hp->len.chunk >= 0 && "negative chunk length");
317 if ((size_t)hp->len.chunk > REMAINING) {
318 HP_FL_SET(hp, INCHUNK);
326 include unicorn_http_common "unicorn_http_common.rl";
332 static void http_parser_init(struct http_parser *hp)
335 memset(hp, 0, sizeof(struct http_parser));
336 hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
342 static void http_parser_execute(struct http_parser *hp,
343 VALUE req, char *buffer, size_t len)
347 size_t off = hp->offset;
349 if (cs == http_parser_first_final)
352 assert(off <= len && "offset past end of buffer");
357 assert((void *)(pe - p) == (void *)(len - off) &&
358 "pointers aren't same distance");
360 if (HP_FL_TEST(hp, INCHUNK)) {
361 HP_FL_UNSET(hp, INCHUNK);
362 goto skip_chunk_data_hack;
365 post_exec: /* "_out:" also goes here */
366 if (hp->cs != http_parser_error)
368 hp->offset = p - buffer;
370 assert(p <= pe && "buffer overflow after parsing execute");
371 assert(hp->offset <= len && "offset longer than length");
374 static struct http_parser *data_get(VALUE self)
376 struct http_parser *hp;
378 Data_Get_Struct(self, struct http_parser, hp);
379 assert(hp && "failed to extract http_parser struct");
383 static void finalize_header(struct http_parser *hp, VALUE req)
385 VALUE temp = rb_hash_aref(req, g_rack_url_scheme);
386 VALUE server_name = g_localhost;
387 VALUE server_port = g_port_80;
389 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
391 temp = rb_hash_aref(req, g_http_x_forwarded_proto);
392 if (!NIL_P(temp) && STR_CSTR_EQ(temp, "https"))
393 server_port = g_port_443;
396 rb_hash_aset(req, g_rack_url_scheme, temp);
397 } else if (STR_CSTR_EQ(temp, "https")) {
398 server_port = g_port_443;
400 assert(server_port == g_port_80 && "server_port not set");
403 /* parse and set the SERVER_NAME and SERVER_PORT variables */
404 temp = rb_hash_aref(req, g_http_host);
406 char *colon = memchr(RSTRING_PTR(temp), ':', RSTRING_LEN(temp));
408 long port_start = colon - RSTRING_PTR(temp) + 1;
410 server_name = rb_str_substr(temp, 0, colon - RSTRING_PTR(temp));
411 if ((RSTRING_LEN(temp) - port_start) > 0)
412 server_port = rb_str_substr(temp, port_start, RSTRING_LEN(temp));
417 rb_hash_aset(req, g_server_name, server_name);
418 rb_hash_aset(req, g_server_port, server_port);
419 if (!HP_FL_TEST(hp, HASHEADER))
420 rb_hash_aset(req, g_server_protocol, g_http_09);
422 /* rack requires QUERY_STRING */
423 if (NIL_P(rb_hash_aref(req, g_query_string)))
424 rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
427 static void hp_mark(void *ptr)
429 struct http_parser *hp = ptr;
431 rb_gc_mark(hp->cont);
434 static VALUE HttpParser_alloc(VALUE klass)
436 struct http_parser *hp;
437 return Data_Make_Struct(klass, struct http_parser, hp_mark, -1, hp);
443 * parser.new => parser
445 * Creates a new parser.
447 static VALUE HttpParser_init(VALUE self)
449 http_parser_init(data_get(self));
456 * parser.reset => nil
458 * Resets the parser to it's initial state so that you can reuse it
459 * rather than making new ones.
461 static VALUE HttpParser_reset(VALUE self)
463 http_parser_init(data_get(self));
468 static void advance_str(VALUE str, off_t nr)
470 long len = RSTRING_LEN(str);
477 assert(nr <= len && "trying to advance past end of buffer");
479 if (len > 0) /* unlikely, len is usually 0 */
480 memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
481 rb_str_set_len(str, len);
486 * parser.content_length => nil or Integer
488 * Returns the number of bytes left to run through HttpParser#filter_body.
489 * This will initially be the value of the "Content-Length" HTTP header
490 * after header parsing is complete and will decrease in value as
491 * HttpParser#filter_body is called for each chunk. This should return
492 * zero for requests with no body.
494 * This will return nil on "Transfer-Encoding: chunked" requests.
496 static VALUE HttpParser_content_length(VALUE self)
498 struct http_parser *hp = data_get(self);
500 return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
504 * Document-method: trailers
506 * parser.trailers(req, data) => req or nil
508 * This is an alias for HttpParser#headers
512 * Document-method: headers
514 * parser.headers(req, data) => req or nil
516 * Takes a Hash and a String of data, parses the String of data filling
517 * in the Hash returning the Hash if parsing is finished, nil otherwise
518 * When returning the req Hash, it may modify data to point to where
519 * body processing should begin.
521 * Raises HttpParserError if there are parsing errors.
523 static VALUE HttpParser_headers(VALUE self, VALUE req, VALUE data)
525 struct http_parser *hp = data_get(self);
529 http_parser_execute(hp, req, RSTRING_PTR(data), RSTRING_LEN(data));
530 VALIDATE_MAX_LENGTH(hp->offset, HEADER);
532 if (hp->cs == http_parser_first_final ||
533 hp->cs == http_parser_en_ChunkedBody) {
534 advance_str(data, hp->offset + 1);
540 if (hp->cs == http_parser_error)
541 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
546 static int chunked_eof(struct http_parser *hp)
548 return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
553 * parser.body_eof? => true or false
555 * Detects if we're done filtering the body or not. This can be used
556 * to detect when to stop calling HttpParser#filter_body.
558 static VALUE HttpParser_body_eof(VALUE self)
560 struct http_parser *hp = data_get(self);
562 if (HP_FL_TEST(hp, CHUNKED))
563 return chunked_eof(hp) ? Qtrue : Qfalse;
565 return hp->len.content == 0 ? Qtrue : Qfalse;
570 * parser.keepalive? => true or false
572 * This should be used to detect if a request can really handle
573 * keepalives and pipelining. Currently, the rules are:
575 * 1. MUST be a GET or HEAD request
576 * 2. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive"
577 * 3. MUST NOT have "Connection: close" set
579 static VALUE HttpParser_keepalive(VALUE self)
581 struct http_parser *hp = data_get(self);
583 return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse;
588 * parser.headers? => true or false
590 * This should be used to detect if a request has headers (and if
591 * the response will have headers as well). HTTP/0.9 requests
592 * should return false, all subsequent HTTP versions will return true
594 static VALUE HttpParser_has_headers(VALUE self)
596 struct http_parser *hp = data_get(self);
598 return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse;
603 * parser.filter_body(buf, data) => nil/data
605 * Takes a String of +data+, will modify data if dechunking is done.
606 * Returns +nil+ if there is more data left to process. Returns
607 * +data+ if body processing is complete. When returning +data+,
608 * it may modify +data+ so the start of the string points to where
609 * the body ended so that trailer processing can begin.
611 * Raises HttpParserError if there are dechunking errors.
612 * Basically this is a glorified memcpy(3) that copies +data+
613 * into +buf+ while filtering it through the dechunker.
615 static VALUE HttpParser_filter_body(VALUE self, VALUE buf, VALUE data)
617 struct http_parser *hp = data_get(self);
622 dptr = RSTRING_PTR(data);
623 dlen = RSTRING_LEN(data);
626 rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
627 OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
629 if (HP_FL_TEST(hp, CHUNKED)) {
630 if (!chunked_eof(hp)) {
631 hp->s.dest_offset = 0;
632 http_parser_execute(hp, buf, dptr, dlen);
633 if (hp->cs == http_parser_error)
634 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
636 assert(hp->s.dest_offset <= hp->offset &&
637 "destination buffer overflow");
638 advance_str(data, hp->offset);
639 rb_str_set_len(buf, hp->s.dest_offset);
641 if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
642 assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
648 /* no need to enter the Ragel machine for unchunked transfers */
649 assert(hp->len.content >= 0 && "negative Content-Length");
650 if (hp->len.content > 0) {
651 long nr = MIN(dlen, hp->len.content);
653 memcpy(RSTRING_PTR(buf), dptr, nr);
654 hp->len.content -= nr;
655 if (hp->len.content == 0)
656 hp->cs = http_parser_first_final;
657 advance_str(data, nr);
658 rb_str_set_len(buf, nr);
662 hp->offset = 0; /* for trailer parsing */
666 #define SET_GLOBAL(var,str) do { \
667 var = find_common_field(str, sizeof(str) - 1); \
668 assert(!NIL_P(var) && "missed global field"); \
671 void Init_unicorn_http(void)
673 VALUE mUnicorn, cHttpParser;
675 mUnicorn = rb_define_module("Unicorn");
676 cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject);
678 rb_define_class_under(mUnicorn, "HttpParserError", rb_eIOError);
681 rb_define_alloc_func(cHttpParser, HttpParser_alloc);
682 rb_define_method(cHttpParser, "initialize", HttpParser_init,0);
683 rb_define_method(cHttpParser, "reset", HttpParser_reset,0);
684 rb_define_method(cHttpParser, "headers", HttpParser_headers, 2);
685 rb_define_method(cHttpParser, "filter_body", HttpParser_filter_body, 2);
686 rb_define_method(cHttpParser, "trailers", HttpParser_headers, 2);
687 rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0);
688 rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0);
689 rb_define_method(cHttpParser, "keepalive?", HttpParser_keepalive, 0);
690 rb_define_method(cHttpParser, "headers?", HttpParser_has_headers, 0);
693 * The maximum size a single chunk when using chunked transfer encoding.
694 * This is only a theoretical maximum used to detect errors in clients,
695 * it is highly unlikely to encounter clients that send more than
696 * several kilobytes at once.
698 rb_define_const(cHttpParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
701 * The maximum size of the body as specified by Content-Length.
702 * This is only a theoretical maximum, the actual limit is subject
703 * to the limits of the file system used for +Dir.tmpdir+.
705 rb_define_const(cHttpParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
707 init_common_fields();
708 SET_GLOBAL(g_http_host, "HOST");
709 SET_GLOBAL(g_http_trailer, "TRAILER");
710 SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING");
711 SET_GLOBAL(g_content_length, "CONTENT_LENGTH");
712 SET_GLOBAL(g_http_connection, "CONNECTION");