2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include <sys/types.h>
11 #include "common_field_optimization.h"
12 #include "global_variables.h"
15 #define UH_FL_CHUNKED 0x1
16 #define UH_FL_HASBODY 0x2
17 #define UH_FL_INBODY 0x4
18 #define UH_FL_HASTRAILER 0x8
19 #define UH_FL_INTRAILER 0x10
20 #define UH_FL_INCHUNK 0x20
21 #define UH_FL_KAMETHOD 0x40
22 #define UH_FL_KAVERSION 0x80
23 #define UH_FL_HASHEADER 0x100
25 /* both of these flags need to be set for keepalive to be supported */
26 #define UH_FL_KEEPALIVE (UH_FL_KAMETHOD | UH_FL_KAVERSION)
28 /* keep this small for Rainbows! since every client has one */
30 int cs; /* Ragel internal state */
34 union { /* these 2 fields don't nest */
39 size_t field_len; /* only used during header processing */
40 size_t dest_offset; /* only used during body processing */
42 VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
49 static void finalize_header(struct http_parser *hp, VALUE req);
51 #define REMAINING (unsigned long)(pe - p)
52 #define LEN(AT, FPC) (FPC - buffer - hp->AT)
53 #define MARK(M,FPC) (hp->M = (FPC) - buffer)
54 #define PTR_TO(F) (buffer + hp->F)
55 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
57 #define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
58 #define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
59 #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
60 #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
63 * handles values of the "Connection:" header, keepalive is implied
64 * for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0
65 * Additionally, we require GET/HEAD requests to support keepalive.
67 static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
69 /* REQUEST_METHOD is always set before any headers */
70 if (HP_FL_TEST(hp, KAMETHOD)) {
71 if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
72 /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
73 HP_FL_SET(hp, KAVERSION);
74 } else if (STR_CSTR_CASE_EQ(val, "close")) {
76 * it doesn't matter what HTTP version or request method we have,
77 * if a client says "Connection: close", we disable keepalive
79 HP_FL_UNSET(hp, KEEPALIVE);
82 * client could've sent anything, ignore it for now. Maybe
83 * "HP_FL_UNSET(hp, KEEPALIVE);" just in case?
84 * Raising an exception might be too mean...
91 request_method(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
96 * we only support keepalive for GET and HEAD requests for now other
97 * methods are too rarely seen to be worth optimizing. POST is unsafe
98 * since some clients send extra bytes after POST bodies.
100 if (CONST_MEM_EQ("GET", ptr, len)) {
101 HP_FL_SET(hp, KAMETHOD);
103 } else if (CONST_MEM_EQ("HEAD", ptr, len)) {
104 HP_FL_SET(hp, KAMETHOD);
107 v = rb_str_new(ptr, len);
109 rb_hash_aset(req, g_request_method, v);
113 http_version(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
117 HP_FL_SET(hp, HASHEADER);
119 if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
120 /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
121 HP_FL_SET(hp, KAVERSION);
123 } else if (CONST_MEM_EQ("HTTP/1.0", ptr, len)) {
126 v = rb_str_new(ptr, len);
128 rb_hash_aset(req, g_server_protocol, v);
129 rb_hash_aset(req, g_http_version, v);
132 static inline void hp_invalid_if_trailer(struct http_parser *hp)
134 if (HP_FL_TEST(hp, INTRAILER))
135 rb_raise(eHttpParserError, "invalid Trailer");
138 static void write_cont_value(struct http_parser *hp,
139 char *buffer, const char *p)
143 if (hp->cont == Qfalse)
144 rb_raise(eHttpParserError, "invalid continuation line");
146 return; /* we're ignoring this header (probably Host:) */
148 assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string");
149 assert(hp->mark > 0 && "impossible continuation line offset");
151 if (LEN(mark, p) == 0)
154 if (RSTRING_LEN(hp->cont) > 0)
159 if (RSTRING_LEN(hp->cont) > 0) {
160 assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space");
163 rb_str_buf_cat(hp->cont, vptr, LEN(mark, p));
166 static void write_value(VALUE req, struct http_parser *hp,
167 const char *buffer, const char *p)
169 VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len);
173 VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
174 v = LEN(mark, p) == 0 ? rb_str_buf_new(128) : STR_NEW(mark, p);
176 VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME);
177 f = uncommon_field(PTR_TO(start.field), hp->s.field_len);
178 } else if (f == g_http_connection) {
179 hp_keepalive_connection(hp, v);
180 } else if (f == g_content_length) {
181 hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
182 if (hp->len.content < 0)
183 rb_raise(eHttpParserError, "invalid Content-Length");
184 HP_FL_SET(hp, HASBODY);
185 hp_invalid_if_trailer(hp);
186 } else if (f == g_http_transfer_encoding) {
187 if (STR_CSTR_CASE_EQ(v, "chunked")) {
188 HP_FL_SET(hp, CHUNKED);
189 HP_FL_SET(hp, HASBODY);
191 hp_invalid_if_trailer(hp);
192 } else if (f == g_http_trailer) {
193 HP_FL_SET(hp, HASTRAILER);
194 hp_invalid_if_trailer(hp);
196 assert(TYPE(f) == T_STRING && "memoized object is not a string");
201 * ignore "Version" headers since they conflict with the HTTP_VERSION
204 if (rb_str_cmp(f, g_http_version) == 0) {
209 e = rb_hash_aref(req, f);
211 hp->cont = rb_hash_aset(req, f, v);
212 } else if (f == g_http_host) {
214 * ignored, absolute URLs in REQUEST_URI take precedence over
215 * the Host: header (ref: rfc 2616, section 5.2.1)
219 rb_str_buf_cat(e, ",", 1);
220 hp->cont = rb_str_buf_append(e, v);
229 action mark {MARK(mark, fpc); }
231 action start_field { MARK(start.field, fpc); }
232 action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
233 action downcase_char { downcase_char(deconst(fpc)); }
234 action write_field { hp->s.field_len = LEN(start.field, fpc); }
235 action start_value { MARK(mark, fpc); }
236 action write_value { write_value(req, hp, buffer, fpc); }
237 action write_cont_value { write_cont_value(hp, buffer, fpc); }
238 action request_method {
239 request_method(hp, req, PTR_TO(mark), LEN(mark, fpc));
242 rb_hash_aset(req, g_rack_url_scheme, STR_NEW(mark, fpc));
245 rb_hash_aset(req, g_http_host, STR_NEW(mark, fpc));
250 VALIDATE_MAX_LENGTH(LEN(mark, fpc), REQUEST_URI);
251 str = rb_hash_aset(req, g_request_uri, STR_NEW(mark, fpc));
253 * "OPTIONS * HTTP/1.1\r\n" is a valid request, but we can't have '*'
254 * in REQUEST_PATH or PATH_INFO or else Rack::Lint will complain
256 if (STR_CSTR_EQ(str, "*")) {
257 str = rb_str_new(NULL, 0);
258 rb_hash_aset(req, g_path_info, str);
259 rb_hash_aset(req, g_request_path, str);
263 VALIDATE_MAX_LENGTH(LEN(mark, fpc), FRAGMENT);
264 rb_hash_aset(req, g_fragment, STR_NEW(mark, fpc));
266 action start_query {MARK(start.query, fpc); }
267 action query_string {
268 VALIDATE_MAX_LENGTH(LEN(start.query, fpc), QUERY_STRING);
269 rb_hash_aset(req, g_query_string, STR_NEW(start.query, fpc));
271 action http_version { http_version(hp, req, PTR_TO(mark), LEN(mark, fpc)); }
272 action request_path {
275 VALIDATE_MAX_LENGTH(LEN(mark, fpc), REQUEST_PATH);
276 val = rb_hash_aset(req, g_request_path, STR_NEW(mark, fpc));
278 /* rack says PATH_INFO must start with "/" or be empty */
279 if (!STR_CSTR_EQ(val, "*"))
280 rb_hash_aset(req, g_path_info, val);
282 action add_to_chunk_size {
283 hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
284 if (hp->len.chunk < 0)
285 rb_raise(eHttpParserError, "invalid chunk size");
288 finalize_header(hp, req);
290 cs = http_parser_first_final;
291 if (HP_FL_TEST(hp, HASBODY)) {
292 HP_FL_SET(hp, INBODY);
293 if (HP_FL_TEST(hp, CHUNKED))
294 cs = http_parser_en_ChunkedBody;
296 assert(!HP_FL_TEST(hp, CHUNKED) && "chunked encoding without body!");
299 * go back to Ruby so we can call the Rack application, we'll reenter
300 * the parser iff the body needs to be processed.
305 action end_trailers {
306 cs = http_parser_first_final;
310 action end_chunked_body {
311 HP_FL_SET(hp, INTRAILER);
312 cs = http_parser_en_Trailers;
314 assert(p <= pe && "buffer overflow after chunked body");
318 action skip_chunk_data {
319 skip_chunk_data_hack: {
320 size_t nr = MIN((size_t)hp->len.chunk, REMAINING);
321 memcpy(RSTRING_PTR(req) + hp->s.dest_offset, fpc, nr);
322 hp->s.dest_offset += nr;
325 assert(hp->len.chunk >= 0 && "negative chunk length");
326 if ((size_t)hp->len.chunk > REMAINING) {
327 HP_FL_SET(hp, INCHUNK);
335 include unicorn_http_common "unicorn_http_common.rl";
341 static void http_parser_init(struct http_parser *hp)
344 memset(hp, 0, sizeof(struct http_parser));
345 hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
351 static void http_parser_execute(struct http_parser *hp,
352 VALUE req, char *buffer, size_t len)
356 size_t off = hp->offset;
358 if (cs == http_parser_first_final)
361 assert(off <= len && "offset past end of buffer");
366 assert((void *)(pe - p) == (void *)(len - off) &&
367 "pointers aren't same distance");
369 if (HP_FL_TEST(hp, INCHUNK)) {
370 HP_FL_UNSET(hp, INCHUNK);
371 goto skip_chunk_data_hack;
374 post_exec: /* "_out:" also goes here */
375 if (hp->cs != http_parser_error)
377 hp->offset = p - buffer;
379 assert(p <= pe && "buffer overflow after parsing execute");
380 assert(hp->offset <= len && "offset longer than length");
383 static struct http_parser *data_get(VALUE self)
385 struct http_parser *hp;
387 Data_Get_Struct(self, struct http_parser, hp);
388 assert(hp && "failed to extract http_parser struct");
392 static void finalize_header(struct http_parser *hp, VALUE req)
394 VALUE temp = rb_hash_aref(req, g_rack_url_scheme);
395 VALUE server_name = g_localhost;
396 VALUE server_port = g_port_80;
398 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
400 temp = rb_hash_aref(req, g_http_x_forwarded_proto);
401 if (!NIL_P(temp) && STR_CSTR_EQ(temp, "https"))
402 server_port = g_port_443;
405 rb_hash_aset(req, g_rack_url_scheme, temp);
406 } else if (STR_CSTR_EQ(temp, "https")) {
407 server_port = g_port_443;
409 assert(server_port == g_port_80 && "server_port not set");
412 /* parse and set the SERVER_NAME and SERVER_PORT variables */
413 temp = rb_hash_aref(req, g_http_host);
415 char *colon = memchr(RSTRING_PTR(temp), ':', RSTRING_LEN(temp));
417 long port_start = colon - RSTRING_PTR(temp) + 1;
419 server_name = rb_str_substr(temp, 0, colon - RSTRING_PTR(temp));
420 if ((RSTRING_LEN(temp) - port_start) > 0)
421 server_port = rb_str_substr(temp, port_start, RSTRING_LEN(temp));
426 rb_hash_aset(req, g_server_name, server_name);
427 rb_hash_aset(req, g_server_port, server_port);
428 if (!HP_FL_TEST(hp, HASHEADER))
429 rb_hash_aset(req, g_server_protocol, g_http_09);
431 /* rack requires QUERY_STRING */
432 if (NIL_P(rb_hash_aref(req, g_query_string)))
433 rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
436 static void hp_mark(void *ptr)
438 struct http_parser *hp = ptr;
440 rb_gc_mark(hp->cont);
443 static VALUE HttpParser_alloc(VALUE klass)
445 struct http_parser *hp;
446 return Data_Make_Struct(klass, struct http_parser, hp_mark, -1, hp);
452 * parser.new => parser
454 * Creates a new parser.
456 static VALUE HttpParser_init(VALUE self)
458 http_parser_init(data_get(self));
465 * parser.reset => nil
467 * Resets the parser to it's initial state so that you can reuse it
468 * rather than making new ones.
470 static VALUE HttpParser_reset(VALUE self)
472 http_parser_init(data_get(self));
477 static void advance_str(VALUE str, off_t nr)
479 long len = RSTRING_LEN(str);
486 assert(nr <= len && "trying to advance past end of buffer");
488 if (len > 0) /* unlikely, len is usually 0 */
489 memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
490 rb_str_set_len(str, len);
495 * parser.content_length => nil or Integer
497 * Returns the number of bytes left to run through HttpParser#filter_body.
498 * This will initially be the value of the "Content-Length" HTTP header
499 * after header parsing is complete and will decrease in value as
500 * HttpParser#filter_body is called for each chunk. This should return
501 * zero for requests with no body.
503 * This will return nil on "Transfer-Encoding: chunked" requests.
505 static VALUE HttpParser_content_length(VALUE self)
507 struct http_parser *hp = data_get(self);
509 return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
513 * Document-method: trailers
515 * parser.trailers(req, data) => req or nil
517 * This is an alias for HttpParser#headers
521 * Document-method: headers
523 * parser.headers(req, data) => req or nil
525 * Takes a Hash and a String of data, parses the String of data filling
526 * in the Hash returning the Hash if parsing is finished, nil otherwise
527 * When returning the req Hash, it may modify data to point to where
528 * body processing should begin.
530 * Raises HttpParserError if there are parsing errors.
532 static VALUE HttpParser_headers(VALUE self, VALUE req, VALUE data)
534 struct http_parser *hp = data_get(self);
538 http_parser_execute(hp, req, RSTRING_PTR(data), RSTRING_LEN(data));
539 VALIDATE_MAX_LENGTH(hp->offset, HEADER);
541 if (hp->cs == http_parser_first_final ||
542 hp->cs == http_parser_en_ChunkedBody) {
543 advance_str(data, hp->offset + 1);
549 if (hp->cs == http_parser_error)
550 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
555 static int chunked_eof(struct http_parser *hp)
557 return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
562 * parser.body_eof? => true or false
564 * Detects if we're done filtering the body or not. This can be used
565 * to detect when to stop calling HttpParser#filter_body.
567 static VALUE HttpParser_body_eof(VALUE self)
569 struct http_parser *hp = data_get(self);
571 if (HP_FL_TEST(hp, CHUNKED))
572 return chunked_eof(hp) ? Qtrue : Qfalse;
574 return hp->len.content == 0 ? Qtrue : Qfalse;
579 * parser.keepalive? => true or false
581 * This should be used to detect if a request can really handle
582 * keepalives and pipelining. Currently, the rules are:
584 * 1. MUST be a GET or HEAD request
585 * 2. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive"
586 * 3. MUST NOT have "Connection: close" set
588 static VALUE HttpParser_keepalive(VALUE self)
590 struct http_parser *hp = data_get(self);
592 return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse;
597 * parser.headers? => true or false
599 * This should be used to detect if a request has headers (and if
600 * the response will have headers as well). HTTP/0.9 requests
601 * should return false, all subsequent HTTP versions will return true
603 static VALUE HttpParser_has_headers(VALUE self)
605 struct http_parser *hp = data_get(self);
607 return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse;
612 * parser.filter_body(buf, data) => nil/data
614 * Takes a String of +data+, will modify data if dechunking is done.
615 * Returns +nil+ if there is more data left to process. Returns
616 * +data+ if body processing is complete. When returning +data+,
617 * it may modify +data+ so the start of the string points to where
618 * the body ended so that trailer processing can begin.
620 * Raises HttpParserError if there are dechunking errors.
621 * Basically this is a glorified memcpy(3) that copies +data+
622 * into +buf+ while filtering it through the dechunker.
624 static VALUE HttpParser_filter_body(VALUE self, VALUE buf, VALUE data)
626 struct http_parser *hp = data_get(self);
631 dptr = RSTRING_PTR(data);
632 dlen = RSTRING_LEN(data);
635 rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
636 OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
638 if (HP_FL_TEST(hp, CHUNKED)) {
639 if (!chunked_eof(hp)) {
640 hp->s.dest_offset = 0;
641 http_parser_execute(hp, buf, dptr, dlen);
642 if (hp->cs == http_parser_error)
643 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
645 assert(hp->s.dest_offset <= hp->offset &&
646 "destination buffer overflow");
647 advance_str(data, hp->offset);
648 rb_str_set_len(buf, hp->s.dest_offset);
650 if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
651 assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
657 /* no need to enter the Ragel machine for unchunked transfers */
658 assert(hp->len.content >= 0 && "negative Content-Length");
659 if (hp->len.content > 0) {
660 long nr = MIN(dlen, hp->len.content);
662 memcpy(RSTRING_PTR(buf), dptr, nr);
663 hp->len.content -= nr;
664 if (hp->len.content == 0)
665 hp->cs = http_parser_first_final;
666 advance_str(data, nr);
667 rb_str_set_len(buf, nr);
671 hp->offset = 0; /* for trailer parsing */
675 #define SET_GLOBAL(var,str) do { \
676 var = find_common_field(str, sizeof(str) - 1); \
677 assert(!NIL_P(var) && "missed global field"); \
680 void Init_unicorn_http(void)
682 VALUE mUnicorn, cHttpParser;
684 mUnicorn = rb_define_module("Unicorn");
685 cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject);
687 rb_define_class_under(mUnicorn, "HttpParserError", rb_eIOError);
690 rb_define_alloc_func(cHttpParser, HttpParser_alloc);
691 rb_define_method(cHttpParser, "initialize", HttpParser_init,0);
692 rb_define_method(cHttpParser, "reset", HttpParser_reset,0);
693 rb_define_method(cHttpParser, "headers", HttpParser_headers, 2);
694 rb_define_method(cHttpParser, "filter_body", HttpParser_filter_body, 2);
695 rb_define_method(cHttpParser, "trailers", HttpParser_headers, 2);
696 rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0);
697 rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0);
698 rb_define_method(cHttpParser, "keepalive?", HttpParser_keepalive, 0);
699 rb_define_method(cHttpParser, "headers?", HttpParser_has_headers, 0);
702 * The maximum size a single chunk when using chunked transfer encoding.
703 * This is only a theoretical maximum used to detect errors in clients,
704 * it is highly unlikely to encounter clients that send more than
705 * several kilobytes at once.
707 rb_define_const(cHttpParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
710 * The maximum size of the body as specified by Content-Length.
711 * This is only a theoretical maximum, the actual limit is subject
712 * to the limits of the file system used for +Dir.tmpdir+.
714 rb_define_const(cHttpParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
716 init_common_fields();
717 SET_GLOBAL(g_http_host, "HOST");
718 SET_GLOBAL(g_http_trailer, "TRAILER");
719 SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING");
720 SET_GLOBAL(g_content_length, "CONTENT_LENGTH");
721 SET_GLOBAL(g_http_connection, "CONNECTION");