2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include <sys/types.h>
11 #include "common_field_optimization.h"
12 #include "global_variables.h"
15 #define UH_FL_CHUNKED 0x1
16 #define UH_FL_HASBODY 0x2
17 #define UH_FL_INBODY 0x4
18 #define UH_FL_HASTRAILER 0x8
19 #define UH_FL_INTRAILER 0x10
20 #define UH_FL_INCHUNK 0x20
21 #define UH_FL_KAMETHOD 0x40
22 #define UH_FL_KAVERSION 0x80
24 #define UH_FL_KEEPALIVE (UH_FL_KAMETHOD | UH_FL_KAVERSION)
27 int cs; /* Ragel internal state */
30 union { /* these 3 fields don't nest */
36 size_t field_len; /* only used during header processing */
37 size_t dest_offset; /* only used during body processing */
45 static void finalize_header(VALUE req);
47 #define REMAINING (unsigned long)(pe - p)
48 #define LEN(AT, FPC) (FPC - buffer - hp->AT)
49 #define MARK(M,FPC) (hp->M = (FPC) - buffer)
50 #define PTR_TO(F) (buffer + hp->F)
51 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
54 request_method(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
58 if (CONST_MEM_EQ("GET", ptr, len)) {
59 hp->flags |= UH_FL_KAMETHOD;
61 } else if (CONST_MEM_EQ("HEAD", ptr, len)) {
62 hp->flags |= UH_FL_KAMETHOD;
65 v = rb_str_new(ptr, len);
67 rb_hash_aset(req, g_request_method, v);
71 http_version(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
75 if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
76 hp->flags |= UH_FL_KAVERSION;
79 v = rb_str_new(ptr, len);
81 rb_hash_aset(req, g_http_version, v);
84 static void invalid_if_trailer(int flags)
86 if (flags & UH_FL_INTRAILER)
87 rb_raise(eHttpParserError, "invalid Trailer");
90 static void write_value(VALUE req, struct http_parser *hp,
91 const char *buffer, const char *p)
93 VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len);
97 VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
100 VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME);
101 f = uncommon_field(PTR_TO(start.field), hp->s.field_len);
102 } else if (f == g_http_connection) {
103 if (hp->flags & UH_FL_KAMETHOD) {
104 if (STR_CSTR_CASE_EQ(v, "keep-alive"))
105 hp->flags |= UH_FL_KAVERSION;
106 else if (STR_CSTR_CASE_EQ(v, "close"))
107 hp->flags &= ~UH_FL_KEEPALIVE;
109 } else if (f == g_content_length) {
110 hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
111 if (hp->len.content < 0)
112 rb_raise(eHttpParserError, "invalid Content-Length");
113 hp->flags |= UH_FL_HASBODY;
114 invalid_if_trailer(hp->flags);
115 } else if (f == g_http_transfer_encoding) {
116 if (STR_CSTR_CASE_EQ(v, "chunked"))
117 hp->flags |= UH_FL_CHUNKED | UH_FL_HASBODY;
118 invalid_if_trailer(hp->flags);
119 } else if (f == g_http_trailer) {
120 hp->flags |= UH_FL_HASTRAILER;
121 invalid_if_trailer(hp->flags);
124 e = rb_hash_aref(req, f);
126 rb_hash_aset(req, f, v);
127 } else if (f != g_http_host) {
128 /* full URLs in REQUEST_URI take precedence for the Host: header */
129 rb_str_buf_cat(e, ",", 1);
130 rb_str_buf_append(e, v);
139 action mark {MARK(mark, fpc); }
141 action start_field { MARK(start.field, fpc); }
142 action snake_upcase_field { snake_upcase_char((char *)fpc); }
143 action downcase_char { downcase_char((char *)fpc); }
144 action write_field { hp->s.field_len = LEN(start.field, fpc); }
145 action start_value { MARK(mark, fpc); }
146 action write_value { write_value(req, hp, buffer, fpc); }
147 action request_method {
148 request_method(hp, req, PTR_TO(mark), LEN(mark, fpc));
151 rb_hash_aset(req, g_rack_url_scheme, STR_NEW(mark, fpc));
154 rb_hash_aset(req, g_http_host, STR_NEW(mark, fpc));
157 size_t len = LEN(mark, fpc);
160 VALIDATE_MAX_LENGTH(len, REQUEST_URI);
161 str = rb_hash_aset(req, g_request_uri, STR_NEW(mark, fpc));
163 * "OPTIONS * HTTP/1.1\r\n" is a valid request, but we can't have '*'
164 * in REQUEST_PATH or PATH_INFO or else Rack::Lint will complain
166 if (STR_CSTR_EQ(str, "*")) {
167 str = rb_str_new(NULL, 0);
168 rb_hash_aset(req, g_path_info, str);
169 rb_hash_aset(req, g_request_path, str);
173 VALIDATE_MAX_LENGTH(LEN(mark, fpc), FRAGMENT);
174 rb_hash_aset(req, g_fragment, STR_NEW(mark, fpc));
176 action start_query {MARK(start.query, fpc); }
177 action query_string {
178 VALIDATE_MAX_LENGTH(LEN(start.query, fpc), QUERY_STRING);
179 rb_hash_aset(req, g_query_string, STR_NEW(start.query, fpc));
181 action http_version { http_version(hp, req, PTR_TO(mark), LEN(mark, fpc)); }
182 action request_path {
184 size_t len = LEN(mark, fpc);
186 VALIDATE_MAX_LENGTH(len, REQUEST_PATH);
187 val = rb_hash_aset(req, g_request_path, STR_NEW(mark, fpc));
189 /* rack says PATH_INFO must start with "/" or be empty */
190 if (!STR_CSTR_EQ(val, "*"))
191 rb_hash_aset(req, g_path_info, val);
193 action add_to_chunk_size {
194 hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
195 if (hp->len.chunk < 0)
196 rb_raise(eHttpParserError, "invalid chunk size");
199 finalize_header(req);
201 cs = http_parser_first_final;
202 if (hp->flags & UH_FL_HASBODY) {
203 hp->flags |= UH_FL_INBODY;
204 if (hp->flags & UH_FL_CHUNKED)
205 cs = http_parser_en_ChunkedBody;
207 assert(!(hp->flags & UH_FL_CHUNKED));
210 * go back to Ruby so we can call the Rack application, we'll reenter
211 * the parser iff the body needs to be processed.
216 action end_trailers {
217 cs = http_parser_first_final;
221 action end_chunked_body {
222 if (hp->flags & UH_FL_HASTRAILER) {
223 hp->flags |= UH_FL_INTRAILER;
224 cs = http_parser_en_Trailers;
226 cs = http_parser_first_final;
232 action skip_chunk_data {
233 skip_chunk_data_hack: {
234 size_t nr = MIN(hp->len.chunk, REMAINING);
235 memcpy(RSTRING_PTR(req) + hp->s.dest_offset, fpc, nr);
236 hp->s.dest_offset += nr;
239 assert(hp->len.chunk >= 0);
240 if (hp->len.chunk > REMAINING) {
241 hp->flags |= UH_FL_INCHUNK;
249 include unicorn_http_common "unicorn_http_common.rl";
255 static void http_parser_init(struct http_parser *hp)
258 memset(hp, 0, sizeof(struct http_parser));
264 static void http_parser_execute(struct http_parser *hp,
265 VALUE req, const char *buffer, size_t len)
269 size_t off = hp->start.offset;
271 if (cs == http_parser_first_final)
274 assert(off <= len && "offset past end of buffer");
279 assert(pe - p == len - off && "pointers aren't same distance");
281 if (hp->flags & UH_FL_INCHUNK) {
282 hp->flags &= ~(UH_FL_INCHUNK);
283 goto skip_chunk_data_hack;
286 post_exec: /* "_out:" also goes here */
287 if (hp->cs != http_parser_error)
289 hp->start.offset = p - buffer;
291 assert(p <= pe && "buffer overflow after parsing execute");
292 assert(hp->start.offset <= len && "start.offset longer than length");
295 static struct http_parser *data_get(VALUE self)
297 struct http_parser *hp;
299 Data_Get_Struct(self, struct http_parser, hp);
304 static void finalize_header(VALUE req)
306 VALUE temp = rb_hash_aref(req, g_rack_url_scheme);
307 VALUE server_name = g_localhost;
308 VALUE server_port = g_port_80;
310 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
312 temp = rb_hash_aref(req, g_http_x_forwarded_proto);
313 if (temp != Qnil && STR_CSTR_EQ(temp, "https"))
314 server_port = g_port_443;
317 rb_hash_aset(req, g_rack_url_scheme, temp);
318 } else if (STR_CSTR_EQ(temp, "https")) {
319 server_port = g_port_443;
322 /* parse and set the SERVER_NAME and SERVER_PORT variables */
323 temp = rb_hash_aref(req, g_http_host);
325 char *colon = memchr(RSTRING_PTR(temp), ':', RSTRING_LEN(temp));
327 long port_start = colon - RSTRING_PTR(temp) + 1;
329 server_name = rb_str_substr(temp, 0, colon - RSTRING_PTR(temp));
330 if ((RSTRING_LEN(temp) - port_start) > 0)
331 server_port = rb_str_substr(temp, port_start, RSTRING_LEN(temp));
336 rb_hash_aset(req, g_server_name, server_name);
337 rb_hash_aset(req, g_server_port, server_port);
338 rb_hash_aset(req, g_server_protocol, g_http_11);
340 /* rack requires QUERY_STRING */
341 if (rb_hash_aref(req, g_query_string) == Qnil)
342 rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
345 static VALUE HttpParser_alloc(VALUE klass)
347 struct http_parser *hp;
348 return Data_Make_Struct(klass, struct http_parser, NULL, NULL, hp);
354 * parser.new => parser
356 * Creates a new parser.
358 static VALUE HttpParser_init(VALUE self)
360 http_parser_init(data_get(self));
367 * parser.reset => nil
369 * Resets the parser to it's initial state so that you can reuse it
370 * rather than making new ones.
372 static VALUE HttpParser_reset(VALUE self)
374 http_parser_init(data_get(self));
379 static void advance_str(VALUE str, off_t nr)
381 long len = RSTRING_LEN(str);
388 if (len > 0) /* unlikely, len is usually 0 */
389 memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
390 rb_str_set_len(str, len);
395 * parser.content_length => nil or Integer
397 * Returns the number of bytes left to run through HttpParser#filter_body.
398 * This will initially be the value of the "Content-Length" HTTP header
399 * after header parsing is complete and will decrease in value as
400 * HttpParser#filter_body is called for each chunk. This should return
401 * zero for requests with no body.
403 * This will return nil on "Transfer-Encoding: chunked" requests.
405 static VALUE HttpParser_content_length(VALUE self)
407 struct http_parser *hp = data_get(self);
409 return (hp->flags & UH_FL_CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
413 * Document-method: trailers
415 * parser.trailers(req, data) => req or nil
417 * This is an alias for HttpParser#headers
421 * Document-method: headers
423 * parser.headers(req, data) => req or nil
425 * Takes a Hash and a String of data, parses the String of data filling
426 * in the Hash returning the Hash if parsing is finished, nil otherwise
427 * When returning the req Hash, it may modify data to point to where
428 * body processing should begin.
430 * Raises HttpParserError if there are parsing errors.
432 static VALUE HttpParser_headers(VALUE self, VALUE req, VALUE data)
434 struct http_parser *hp = data_get(self);
436 http_parser_execute(hp, req, RSTRING_PTR(data), RSTRING_LEN(data));
437 VALIDATE_MAX_LENGTH(hp->start.offset, HEADER);
439 if (hp->cs == http_parser_first_final ||
440 hp->cs == http_parser_en_ChunkedBody) {
441 advance_str(data, hp->start.offset + 1);
442 hp->start.offset = 0;
447 if (hp->cs == http_parser_error)
448 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
453 static int chunked_eof(struct http_parser *hp)
455 return ((hp->cs == http_parser_first_final) ||
456 (hp->flags & UH_FL_INTRAILER));
461 * parser.body_eof? => true or false
463 * Detects if we're done filtering the body or not. This can be used
464 * to detect when to stop calling HttpParser#filter_body.
466 static VALUE HttpParser_body_eof(VALUE self)
468 struct http_parser *hp = data_get(self);
470 if (hp->flags & UH_FL_CHUNKED)
471 return chunked_eof(hp) ? Qtrue : Qfalse;
473 return hp->len.content == 0 ? Qtrue : Qfalse;
478 * parser.keepalive? => true or false
480 * This should be used to detect if a request can really handle
481 * keepalives and pipelining. Currently, the rules are:
483 * 1. MUST be a GET or HEAD request
484 * 2. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive"
485 * 3. MUST NOT have "Connection: close" set
487 static VALUE HttpParser_keepalive(VALUE self)
489 struct http_parser *hp = data_get(self);
491 return (hp->flags & UH_FL_KEEPALIVE) == UH_FL_KEEPALIVE ? Qtrue : Qfalse;
496 * parser.filter_body(buf, data) => nil/data
498 * Takes a String of +data+, will modify data if dechunking is done.
499 * Returns +nil+ if there is more data left to process. Returns
500 * +data+ if body processing is complete. When returning +data+,
501 * it may modify +data+ so the start of the string points to where
502 * the body ended so that trailer processing can begin.
504 * Raises HttpParserError if there are dechunking errors.
505 * Basically this is a glorified memcpy(3) that copies +data+
506 * into +buf+ while filtering it through the dechunker.
508 static VALUE HttpParser_filter_body(VALUE self, VALUE buf, VALUE data)
510 struct http_parser *hp = data_get(self);
511 char *dptr = RSTRING_PTR(data);
512 long dlen = RSTRING_LEN(data);
515 rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
516 OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
518 if (hp->flags & UH_FL_CHUNKED) {
522 hp->s.dest_offset = 0;
523 http_parser_execute(hp, buf, dptr, dlen);
524 if (hp->cs == http_parser_error)
525 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
527 assert(hp->s.dest_offset <= hp->start.offset);
528 advance_str(data, hp->start.offset);
529 rb_str_set_len(buf, hp->s.dest_offset);
531 if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
532 assert(hp->len.chunk == 0);
537 /* no need to enter the Ragel machine for unchunked transfers */
538 assert(hp->len.content >= 0);
539 if (hp->len.content > 0) {
540 long nr = MIN(dlen, hp->len.content);
542 memcpy(RSTRING_PTR(buf), dptr, nr);
543 hp->len.content -= nr;
544 if (hp->len.content == 0)
545 hp->cs = http_parser_first_final;
546 advance_str(data, nr);
547 rb_str_set_len(buf, nr);
552 hp->start.offset = 0; /* for trailer parsing */
556 #define SET_GLOBAL(var,str) do { \
557 var = find_common_field(str, sizeof(str) - 1); \
558 assert(var != Qnil); \
561 void Init_unicorn_http(void)
563 mUnicorn = rb_define_module("Unicorn");
565 rb_define_class_under(mUnicorn, "HttpParserError", rb_eIOError);
566 cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject);
568 rb_define_alloc_func(cHttpParser, HttpParser_alloc);
569 rb_define_method(cHttpParser, "initialize", HttpParser_init,0);
570 rb_define_method(cHttpParser, "reset", HttpParser_reset,0);
571 rb_define_method(cHttpParser, "headers", HttpParser_headers, 2);
572 rb_define_method(cHttpParser, "filter_body", HttpParser_filter_body, 2);
573 rb_define_method(cHttpParser, "trailers", HttpParser_headers, 2);
574 rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0);
575 rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0);
576 rb_define_method(cHttpParser, "keepalive?", HttpParser_keepalive, 0);
579 * The maximum size a single chunk when using chunked transfer encoding.
580 * This is only a theoretical maximum used to detect errors in clients,
581 * it is highly unlikely to encounter clients that send more than
582 * several kilobytes at once.
584 rb_define_const(cHttpParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
587 * The maximum size of the body as specified by Content-Length.
588 * This is only a theoretical maximum, the actual limit is subject
589 * to the limits of the file system used for +Dir.tmpdir+.
591 rb_define_const(cHttpParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
593 init_common_fields();
594 SET_GLOBAL(g_http_host, "HOST");
595 SET_GLOBAL(g_http_trailer, "TRAILER");
596 SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING");
597 SET_GLOBAL(g_content_length, "CONTENT_LENGTH");
598 SET_GLOBAL(g_http_connection, "CONNECTION");