2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include <sys/types.h>
11 #include "common_field_optimization.h"
12 #include "global_variables.h"
15 #define UH_FL_CHUNKED 0x1
16 #define UH_FL_HASBODY 0x2
17 #define UH_FL_INBODY 0x4
18 #define UH_FL_HASTRAILER 0x8
19 #define UH_FL_INTRAILER 0x10
20 #define UH_FL_INCHUNK 0x20
21 #define UH_FL_KAMETHOD 0x40
22 #define UH_FL_KAVERSION 0x80
24 #define UH_FL_KEEPALIVE (UH_FL_KAMETHOD | UH_FL_KAVERSION)
27 int cs; /* Ragel internal state */
30 union { /* these 3 fields don't nest */
36 size_t field_len; /* only used during header processing */
37 size_t dest_offset; /* only used during body processing */
45 static void finalize_header(VALUE req);
47 #define REMAINING (unsigned long)(pe - p)
48 #define LEN(AT, FPC) (FPC - buffer - hp->AT)
49 #define MARK(M,FPC) (hp->M = (FPC) - buffer)
50 #define PTR_TO(F) (buffer + hp->F)
51 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
54 request_method(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
58 if (CONST_MEM_EQ("GET", ptr, len)) {
59 hp->flags |= UH_FL_KAMETHOD;
61 } else if (CONST_MEM_EQ("HEAD", ptr, len)) {
62 hp->flags |= UH_FL_KAMETHOD;
65 v = rb_str_new(ptr, len);
67 rb_hash_aset(req, g_request_method, v);
71 http_version(struct http_parser *hp, VALUE req, const char *ptr, size_t len)
75 if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
76 hp->flags |= UH_FL_KAVERSION;
79 v = rb_str_new(ptr, len);
81 rb_hash_aset(req, g_http_version, v);
84 static void invalid_if_trailer(int flags)
86 if (flags & UH_FL_INTRAILER)
87 rb_raise(eHttpParserError, "invalid Trailer");
90 static void write_value(VALUE req, struct http_parser *hp,
91 const char *buffer, const char *p)
93 VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len);
97 VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
100 VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME);
101 f = uncommon_field(PTR_TO(start.field), hp->s.field_len);
102 } else if (f == g_http_connection) {
103 if (hp->flags & UH_FL_KAMETHOD) {
104 if (STR_CSTR_CASE_EQ(v, "keep-alive"))
105 hp->flags |= UH_FL_KAVERSION;
106 else if (STR_CSTR_CASE_EQ(v, "close"))
107 hp->flags &= ~UH_FL_KEEPALIVE;
109 } else if (f == g_content_length) {
110 hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
111 if (hp->len.content < 0)
112 rb_raise(eHttpParserError, "invalid Content-Length");
113 hp->flags |= UH_FL_HASBODY;
114 invalid_if_trailer(hp->flags);
115 } else if (f == g_http_transfer_encoding) {
116 if (STR_CSTR_CASE_EQ(v, "chunked"))
117 hp->flags |= UH_FL_CHUNKED | UH_FL_HASBODY;
118 invalid_if_trailer(hp->flags);
119 } else if (f == g_http_trailer) {
120 hp->flags |= UH_FL_HASTRAILER;
121 invalid_if_trailer(hp->flags);
124 e = rb_hash_aref(req, f);
126 rb_hash_aset(req, f, v);
127 } else if (f != g_http_host) {
128 /* full URLs in REQUEST_URI take precedence for the Host: header */
129 rb_str_buf_cat(e, ",", 1);
130 rb_str_buf_append(e, v);
139 action mark {MARK(mark, fpc); }
141 action start_field { MARK(start.field, fpc); }
142 action snake_upcase_field { snake_upcase_char((char *)fpc); }
143 action downcase_char { downcase_char((char *)fpc); }
144 action write_field { hp->s.field_len = LEN(start.field, fpc); }
145 action start_value { MARK(mark, fpc); }
146 action write_value { write_value(req, hp, buffer, fpc); }
147 action request_method {
148 request_method(hp, req, PTR_TO(mark), LEN(mark, fpc));
151 rb_hash_aset(req, g_rack_url_scheme, STR_NEW(mark, fpc));
154 rb_hash_aset(req, g_http_host, STR_NEW(mark, fpc));
157 size_t len = LEN(mark, fpc);
160 VALIDATE_MAX_LENGTH(len, REQUEST_URI);
161 str = rb_hash_aset(req, g_request_uri, STR_NEW(mark, fpc));
163 * "OPTIONS * HTTP/1.1\r\n" is a valid request, but we can't have '*'
164 * in REQUEST_PATH or PATH_INFO or else Rack::Lint will complain
166 if (STR_CSTR_EQ(str, "*")) {
167 str = rb_str_new(NULL, 0);
168 rb_hash_aset(req, g_path_info, str);
169 rb_hash_aset(req, g_request_path, str);
173 VALIDATE_MAX_LENGTH(LEN(mark, fpc), FRAGMENT);
174 rb_hash_aset(req, g_fragment, STR_NEW(mark, fpc));
176 action start_query {MARK(start.query, fpc); }
177 action query_string {
178 VALIDATE_MAX_LENGTH(LEN(start.query, fpc), QUERY_STRING);
179 rb_hash_aset(req, g_query_string, STR_NEW(start.query, fpc));
181 action http_version { http_version(hp, req, PTR_TO(mark), LEN(mark, fpc)); }
182 action request_path {
184 size_t len = LEN(mark, fpc);
186 VALIDATE_MAX_LENGTH(len, REQUEST_PATH);
187 val = rb_hash_aset(req, g_request_path, STR_NEW(mark, fpc));
189 /* rack says PATH_INFO must start with "/" or be empty */
190 if (!STR_CSTR_EQ(val, "*"))
191 rb_hash_aset(req, g_path_info, val);
193 action add_to_chunk_size {
194 hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
195 if (hp->len.chunk < 0)
196 rb_raise(eHttpParserError, "invalid chunk size");
199 finalize_header(req);
201 cs = http_parser_first_final;
202 if (hp->flags & UH_FL_HASBODY) {
203 hp->flags |= UH_FL_INBODY;
204 if (hp->flags & UH_FL_CHUNKED)
205 cs = http_parser_en_ChunkedBody;
207 assert(!(hp->flags & UH_FL_CHUNKED));
210 * go back to Ruby so we can call the Rack application, we'll reenter
211 * the parser iff the body needs to be processed.
216 action end_trailers {
217 cs = http_parser_first_final;
221 action end_chunked_body {
222 if (hp->flags & UH_FL_HASTRAILER) {
223 hp->flags |= UH_FL_INTRAILER;
224 cs = http_parser_en_Trailers;
226 cs = http_parser_first_final;
232 action skip_chunk_data {
233 skip_chunk_data_hack: {
234 size_t nr = MIN(hp->len.chunk, REMAINING);
235 memcpy(RSTRING_PTR(req) + hp->s.dest_offset, fpc, nr);
236 hp->s.dest_offset += nr;
239 assert(hp->len.chunk >= 0);
240 if (hp->len.chunk > REMAINING) {
241 hp->flags |= UH_FL_INCHUNK;
249 include unicorn_http_common "unicorn_http_common.rl";
255 static void http_parser_init(struct http_parser *hp)
258 memset(hp, 0, sizeof(struct http_parser));
264 static void http_parser_execute(struct http_parser *hp,
265 VALUE req, const char *buffer, size_t len)
269 size_t off = hp->start.offset;
271 if (cs == http_parser_first_final)
274 assert(off <= len && "offset past end of buffer");
279 assert(pe - p == len - off && "pointers aren't same distance");
281 if (hp->flags & UH_FL_INCHUNK) {
282 hp->flags &= ~(UH_FL_INCHUNK);
283 goto skip_chunk_data_hack;
286 post_exec: /* "_out:" also goes here */
287 if (hp->cs != http_parser_error)
289 hp->start.offset = p - buffer;
291 assert(p <= pe && "buffer overflow after parsing execute");
292 assert(hp->start.offset <= len && "start.offset longer than length");
295 static struct http_parser *data_get(VALUE self)
297 struct http_parser *hp;
299 Data_Get_Struct(self, struct http_parser, hp);
304 static void finalize_header(VALUE req)
306 VALUE temp = rb_hash_aref(req, g_rack_url_scheme);
307 VALUE server_name = g_localhost;
308 VALUE server_port = g_port_80;
310 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
312 temp = rb_hash_aref(req, g_http_x_forwarded_proto);
313 if (temp != Qnil && STR_CSTR_EQ(temp, "https"))
314 server_port = g_port_443;
317 rb_hash_aset(req, g_rack_url_scheme, temp);
318 } else if (STR_CSTR_EQ(temp, "https")) {
319 server_port = g_port_443;
322 /* parse and set the SERVER_NAME and SERVER_PORT variables */
323 temp = rb_hash_aref(req, g_http_host);
325 char *colon = memchr(RSTRING_PTR(temp), ':', RSTRING_LEN(temp));
327 long port_start = colon - RSTRING_PTR(temp) + 1;
329 server_name = rb_str_substr(temp, 0, colon - RSTRING_PTR(temp));
330 if ((RSTRING_LEN(temp) - port_start) > 0)
331 server_port = rb_str_substr(temp, port_start, RSTRING_LEN(temp));
336 rb_hash_aset(req, g_server_name, server_name);
337 rb_hash_aset(req, g_server_port, server_port);
338 rb_hash_aset(req, g_server_protocol, g_http_11);
340 /* rack requires QUERY_STRING */
341 if (rb_hash_aref(req, g_query_string) == Qnil)
342 rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
345 static VALUE HttpParser_alloc(VALUE klass)
347 struct http_parser *hp;
348 return Data_Make_Struct(klass, struct http_parser, NULL, NULL, hp);
354 * parser.new -> parser
356 * Creates a new parser.
358 static VALUE HttpParser_init(VALUE self)
360 http_parser_init(data_get(self));
368 * parser.reset -> nil
370 * Resets the parser to it's initial state so that you can reuse it
371 * rather than making new ones.
373 static VALUE HttpParser_reset(VALUE self)
375 http_parser_init(data_get(self));
380 static void advance_str(VALUE str, off_t nr)
382 long len = RSTRING_LEN(str);
389 if (len > 0) /* unlikely, len is usually 0 */
390 memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
391 rb_str_set_len(str, len);
394 static VALUE HttpParser_content_length(VALUE self)
396 struct http_parser *hp = data_get(self);
398 return (hp->flags & UH_FL_CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
403 * parser.headers(req, data) -> req or nil
404 * parser.trailers(req, data) -> req or nil
406 * Takes a Hash and a String of data, parses the String of data filling
407 * in the Hash returning the Hash if parsing is finished, nil otherwise
408 * When returning the req Hash, it may modify data to point to where
409 * body processing should begin
411 * Raises HttpParserError if there are parsing errors
413 static VALUE HttpParser_headers(VALUE self, VALUE req, VALUE data)
415 struct http_parser *hp = data_get(self);
417 http_parser_execute(hp, req, RSTRING_PTR(data), RSTRING_LEN(data));
418 VALIDATE_MAX_LENGTH(hp->start.offset, HEADER);
420 if (hp->cs == http_parser_first_final ||
421 hp->cs == http_parser_en_ChunkedBody) {
422 advance_str(data, hp->start.offset + 1);
423 hp->start.offset = 0;
428 if (hp->cs == http_parser_error)
429 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
434 static int chunked_eof(struct http_parser *hp)
436 return ((hp->cs == http_parser_first_final) ||
437 (hp->flags & UH_FL_INTRAILER));
440 static VALUE HttpParser_body_eof(VALUE self)
442 struct http_parser *hp = data_get(self);
444 if (hp->flags & UH_FL_CHUNKED)
445 return chunked_eof(hp) ? Qtrue : Qfalse;
447 return hp->len.content == 0 ? Qtrue : Qfalse;
450 static VALUE HttpParser_keepalive(VALUE self)
452 struct http_parser *hp = data_get(self);
454 return (hp->flags & UH_FL_KEEPALIVE) == UH_FL_KEEPALIVE ? Qtrue : Qfalse;
459 * parser.filter_body(buf, data) -> nil/data
461 * Takes a String of +data+, will modify data if dechunking is done.
462 * Returns +nil+ if there is more data left to process. Returns
463 * +data+ if body processing is complete. When returning +data+,
464 * it may modify +data+ so the start of the string points to where
465 * the body ended so that trailer processing can begin.
467 * Raises HttpParserError if there are dechunking errors
468 * Basically this is a glorified memcpy(3) that copies +data+
469 * into +buf+ while filtering it through the dechunker.
471 static VALUE HttpParser_filter_body(VALUE self, VALUE buf, VALUE data)
473 struct http_parser *hp = data_get(self);
474 char *dptr = RSTRING_PTR(data);
475 long dlen = RSTRING_LEN(data);
478 rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
479 OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
481 if (hp->flags & UH_FL_CHUNKED) {
485 hp->s.dest_offset = 0;
486 http_parser_execute(hp, buf, dptr, dlen);
487 if (hp->cs == http_parser_error)
488 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
490 assert(hp->s.dest_offset <= hp->start.offset);
491 advance_str(data, hp->start.offset);
492 rb_str_set_len(buf, hp->s.dest_offset);
494 if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
495 assert(hp->len.chunk == 0);
500 /* no need to enter the Ragel machine for unchunked transfers */
501 assert(hp->len.content >= 0);
502 if (hp->len.content > 0) {
503 long nr = MIN(dlen, hp->len.content);
505 memcpy(RSTRING_PTR(buf), dptr, nr);
506 hp->len.content -= nr;
507 if (hp->len.content == 0)
508 hp->cs = http_parser_first_final;
509 advance_str(data, nr);
510 rb_str_set_len(buf, nr);
515 hp->start.offset = 0; /* for trailer parsing */
519 #define SET_GLOBAL(var,str) do { \
520 var = find_common_field(str, sizeof(str) - 1); \
521 assert(var != Qnil); \
524 void Init_unicorn_http(void)
527 rb_define_alloc_func(cHttpParser, HttpParser_alloc);
528 rb_define_method(cHttpParser, "initialize", HttpParser_init,0);
529 rb_define_method(cHttpParser, "reset", HttpParser_reset,0);
530 rb_define_method(cHttpParser, "headers", HttpParser_headers, 2);
531 rb_define_method(cHttpParser, "filter_body", HttpParser_filter_body, 2);
532 rb_define_method(cHttpParser, "trailers", HttpParser_headers, 2);
533 rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0);
534 rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0);
535 rb_define_method(cHttpParser, "keepalive?", HttpParser_keepalive, 0);
538 * The maximum size a single chunk when using chunked transfer encoding.
539 * This is only a theoretical maximum used to detect errors in clients,
540 * it is highly unlikely to encounter clients that send more than
541 * several kilobytes at once.
543 rb_define_const(cHttpParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
546 * The maximum size of the body as specified by Content-Length.
547 * This is only a theoretical maximum, the actual limit is subject
548 * to the limits of the file system used for +Dir::tmpdir+
550 rb_define_const(cHttpParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
552 init_common_fields();
553 SET_GLOBAL(g_http_host, "HOST");
554 SET_GLOBAL(g_http_trailer, "TRAILER");
555 SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING");
556 SET_GLOBAL(g_content_length, "CONTENT_LENGTH");
557 SET_GLOBAL(g_http_connection, "CONNECTION");