2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include <sys/types.h>
11 #include "common_field_optimization.h"
12 #include "global_variables.h"
15 #define UH_FL_CHUNKED 0x1
16 #define UH_FL_HASBODY 0x2
17 #define UH_FL_INBODY 0x4
18 #define UH_FL_HASTRAILER 0x8
19 #define UH_FL_INTRAILER 0x10
20 #define UH_FL_INCHUNK 0x20
23 int cs; /* Ragel internal state */
26 union { /* these 3 fields don't nest */
32 size_t field_len; /* only used during header processing */
33 size_t dest_offset; /* only used during body processing */
41 static void finalize_header(VALUE req);
43 #define REMAINING (unsigned long)(pe - p)
44 #define LEN(AT, FPC) (FPC - buffer - hp->AT)
45 #define MARK(M,FPC) (hp->M = (FPC) - buffer)
46 #define PTR_TO(F) (buffer + hp->F)
47 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
49 static void invalid_if_trailer(int flags)
51 if (flags & UH_FL_INTRAILER)
52 rb_raise(eHttpParserError, "invalid Trailer");
55 static void write_value(VALUE req, struct http_parser *hp,
56 const char *buffer, const char *p)
58 VALUE f = find_common_field(PTR_TO(start.field), hp->s.field_len);
62 VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
65 VALIDATE_MAX_LENGTH(hp->s.field_len, FIELD_NAME);
66 f = uncommon_field(PTR_TO(start.field), hp->s.field_len);
67 } else if (f == g_content_length) {
68 hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
69 if (hp->len.content < 0)
70 rb_raise(eHttpParserError, "invalid Content-Length");
71 hp->flags |= UH_FL_HASBODY;
72 invalid_if_trailer(hp->flags);
73 } else if (f == g_http_transfer_encoding) {
74 if (STR_CSTR_CASE_EQ(v, "chunked"))
75 hp->flags |= UH_FL_CHUNKED | UH_FL_HASBODY;
76 invalid_if_trailer(hp->flags);
77 } else if (f == g_http_trailer) {
78 hp->flags |= UH_FL_HASTRAILER;
79 invalid_if_trailer(hp->flags);
82 e = rb_hash_aref(req, f);
84 rb_hash_aset(req, f, v);
85 } else if (f != g_http_host) {
86 /* full URLs in REQUEST_URI take precedence for the Host: header */
87 rb_str_buf_cat(e, ",", 1);
88 rb_str_buf_append(e, v);
97 action mark {MARK(mark, fpc); }
99 action start_field { MARK(start.field, fpc); }
100 action snake_upcase_field { snake_upcase_char((char *)fpc); }
101 action downcase_char { downcase_char((char *)fpc); }
102 action write_field { hp->s.field_len = LEN(start.field, fpc); }
103 action start_value { MARK(mark, fpc); }
104 action write_value { write_value(req, hp, buffer, fpc); }
105 action request_method {
106 rb_hash_aset(req, g_request_method, STR_NEW(mark, fpc));
109 rb_hash_aset(req, g_rack_url_scheme, STR_NEW(mark, fpc));
112 rb_hash_aset(req, g_http_host, STR_NEW(mark, fpc));
115 size_t len = LEN(mark, fpc);
118 VALIDATE_MAX_LENGTH(len, REQUEST_URI);
119 str = rb_hash_aset(req, g_request_uri, STR_NEW(mark, fpc));
121 * "OPTIONS * HTTP/1.1\r\n" is a valid request, but we can't have '*'
122 * in REQUEST_PATH or PATH_INFO or else Rack::Lint will complain
124 if (STR_CSTR_EQ(str, "*")) {
125 str = rb_str_new(NULL, 0);
126 rb_hash_aset(req, g_path_info, str);
127 rb_hash_aset(req, g_request_path, str);
131 VALIDATE_MAX_LENGTH(LEN(mark, fpc), FRAGMENT);
132 rb_hash_aset(req, g_fragment, STR_NEW(mark, fpc));
134 action start_query {MARK(start.query, fpc); }
135 action query_string {
136 VALIDATE_MAX_LENGTH(LEN(start.query, fpc), QUERY_STRING);
137 rb_hash_aset(req, g_query_string, STR_NEW(start.query, fpc));
139 action http_version {
140 rb_hash_aset(req, g_http_version, STR_NEW(mark, fpc));
142 action request_path {
144 size_t len = LEN(mark, fpc);
146 VALIDATE_MAX_LENGTH(len, REQUEST_PATH);
147 val = rb_hash_aset(req, g_request_path, STR_NEW(mark, fpc));
149 /* rack says PATH_INFO must start with "/" or be empty */
150 if (!STR_CSTR_EQ(val, "*"))
151 rb_hash_aset(req, g_path_info, val);
153 action add_to_chunk_size {
154 hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
155 if (hp->len.chunk < 0)
156 rb_raise(eHttpParserError, "invalid chunk size");
159 finalize_header(req);
161 cs = http_parser_first_final;
162 if (hp->flags & UH_FL_HASBODY) {
163 hp->flags |= UH_FL_INBODY;
164 if (hp->flags & UH_FL_CHUNKED)
165 cs = http_parser_en_ChunkedBody;
167 assert(!(hp->flags & UH_FL_CHUNKED));
170 * go back to Ruby so we can call the Rack application, we'll reenter
171 * the parser iff the body needs to be processed.
176 action end_trailers {
177 cs = http_parser_first_final;
181 action end_chunked_body {
182 if (hp->flags & UH_FL_HASTRAILER) {
183 hp->flags |= UH_FL_INTRAILER;
184 cs = http_parser_en_Trailers;
186 cs = http_parser_first_final;
192 action skip_chunk_data {
193 skip_chunk_data_hack: {
194 size_t nr = MIN(hp->len.chunk, REMAINING);
195 memcpy(RSTRING_PTR(req) + hp->s.dest_offset, fpc, nr);
196 hp->s.dest_offset += nr;
199 assert(hp->len.chunk >= 0);
200 if (hp->len.chunk > REMAINING) {
201 hp->flags |= UH_FL_INCHUNK;
209 include unicorn_http_common "unicorn_http_common.rl";
215 static void http_parser_init(struct http_parser *hp)
218 memset(hp, 0, sizeof(struct http_parser));
224 static void http_parser_execute(struct http_parser *hp,
225 VALUE req, const char *buffer, size_t len)
229 size_t off = hp->start.offset;
231 if (cs == http_parser_first_final)
234 assert(off <= len && "offset past end of buffer");
239 assert(pe - p == len - off && "pointers aren't same distance");
241 if (hp->flags & UH_FL_INCHUNK) {
242 hp->flags &= ~(UH_FL_INCHUNK);
243 goto skip_chunk_data_hack;
246 post_exec: /* "_out:" also goes here */
247 if (hp->cs != http_parser_error)
249 hp->start.offset = p - buffer;
251 assert(p <= pe && "buffer overflow after parsing execute");
252 assert(hp->start.offset <= len && "start.offset longer than length");
255 static struct http_parser *data_get(VALUE self)
257 struct http_parser *hp;
259 Data_Get_Struct(self, struct http_parser, hp);
264 static void finalize_header(VALUE req)
266 VALUE temp = rb_hash_aref(req, g_rack_url_scheme);
267 VALUE server_name = g_localhost;
268 VALUE server_port = g_port_80;
270 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
272 temp = rb_hash_aref(req, g_http_x_forwarded_proto);
273 if (temp != Qnil && STR_CSTR_EQ(temp, "https"))
274 server_port = g_port_443;
277 rb_hash_aset(req, g_rack_url_scheme, temp);
278 } else if (STR_CSTR_EQ(temp, "https")) {
279 server_port = g_port_443;
282 /* parse and set the SERVER_NAME and SERVER_PORT variables */
283 temp = rb_hash_aref(req, g_http_host);
285 char *colon = memchr(RSTRING_PTR(temp), ':', RSTRING_LEN(temp));
287 long port_start = colon - RSTRING_PTR(temp) + 1;
289 server_name = rb_str_substr(temp, 0, colon - RSTRING_PTR(temp));
290 if ((RSTRING_LEN(temp) - port_start) > 0)
291 server_port = rb_str_substr(temp, port_start, RSTRING_LEN(temp));
296 rb_hash_aset(req, g_server_name, server_name);
297 rb_hash_aset(req, g_server_port, server_port);
298 rb_hash_aset(req, g_server_protocol, g_server_protocol_value);
300 /* rack requires QUERY_STRING */
301 if (rb_hash_aref(req, g_query_string) == Qnil)
302 rb_hash_aset(req, g_query_string, rb_str_new(NULL, 0));
305 static VALUE HttpParser_alloc(VALUE klass)
307 struct http_parser *hp;
308 return Data_Make_Struct(klass, struct http_parser, NULL, NULL, hp);
314 * parser.new -> parser
316 * Creates a new parser.
318 static VALUE HttpParser_init(VALUE self)
320 http_parser_init(data_get(self));
328 * parser.reset -> nil
330 * Resets the parser to it's initial state so that you can reuse it
331 * rather than making new ones.
333 static VALUE HttpParser_reset(VALUE self)
335 http_parser_init(data_get(self));
340 static void advance_str(VALUE str, off_t nr)
342 long len = RSTRING_LEN(str);
349 if (len > 0) /* unlikely, len is usually 0 */
350 memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
351 rb_str_set_len(str, len);
354 static VALUE HttpParser_content_length(VALUE self)
356 struct http_parser *hp = data_get(self);
358 return (hp->flags & UH_FL_CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
363 * parser.headers(req, data) -> req or nil
364 * parser.trailers(req, data) -> req or nil
366 * Takes a Hash and a String of data, parses the String of data filling
367 * in the Hash returning the Hash if parsing is finished, nil otherwise
368 * When returning the req Hash, it may modify data to point to where
369 * body processing should begin
371 * Raises HttpParserError if there are parsing errors
373 static VALUE HttpParser_headers(VALUE self, VALUE req, VALUE data)
375 struct http_parser *hp = data_get(self);
377 http_parser_execute(hp, req, RSTRING_PTR(data), RSTRING_LEN(data));
378 VALIDATE_MAX_LENGTH(hp->start.offset, HEADER);
380 if (hp->cs == http_parser_first_final ||
381 hp->cs == http_parser_en_ChunkedBody) {
382 advance_str(data, hp->start.offset + 1);
383 hp->start.offset = 0;
388 if (hp->cs == http_parser_error)
389 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
394 static int chunked_eof(struct http_parser *hp)
396 return ((hp->cs == http_parser_first_final) ||
397 (hp->flags & UH_FL_INTRAILER));
400 static VALUE HttpParser_body_eof(VALUE self)
402 struct http_parser *hp = data_get(self);
404 if (hp->flags & UH_FL_CHUNKED)
405 return chunked_eof(hp) ? Qtrue : Qfalse;
407 return hp->len.content == 0 ? Qtrue : Qfalse;
412 * parser.filter_body(buf, data) -> nil/data
414 * Takes a String of +data+, will modify data if dechunking is done.
415 * Returns +nil+ if there is more data left to process. Returns
416 * +data+ if body processing is complete. When returning +data+,
417 * it may modify +data+ so the start of the string points to where
418 * the body ended so that trailer processing can begin.
420 * Raises HttpParserError if there are dechunking errors
421 * Basically this is a glorified memcpy(3) that copies +data+
422 * into +buf+ while filtering it through the dechunker.
424 static VALUE HttpParser_filter_body(VALUE self, VALUE buf, VALUE data)
426 struct http_parser *hp = data_get(self);
427 char *dptr = RSTRING_PTR(data);
428 long dlen = RSTRING_LEN(data);
431 rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
432 OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
434 if (hp->flags & UH_FL_CHUNKED) {
438 hp->s.dest_offset = 0;
439 http_parser_execute(hp, buf, dptr, dlen);
440 if (hp->cs == http_parser_error)
441 rb_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
443 assert(hp->s.dest_offset <= hp->start.offset);
444 advance_str(data, hp->start.offset);
445 rb_str_set_len(buf, hp->s.dest_offset);
447 if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
448 assert(hp->len.chunk == 0);
453 /* no need to enter the Ragel machine for unchunked transfers */
454 assert(hp->len.content >= 0);
455 if (hp->len.content > 0) {
456 long nr = MIN(dlen, hp->len.content);
458 memcpy(RSTRING_PTR(buf), dptr, nr);
459 hp->len.content -= nr;
460 if (hp->len.content == 0)
461 hp->cs = http_parser_first_final;
462 advance_str(data, nr);
463 rb_str_set_len(buf, nr);
468 hp->start.offset = 0; /* for trailer parsing */
472 #define SET_GLOBAL(var,str) do { \
473 var = find_common_field(str, sizeof(str) - 1); \
474 assert(var != Qnil); \
477 void Init_unicorn_http(void)
480 rb_define_alloc_func(cHttpParser, HttpParser_alloc);
481 rb_define_method(cHttpParser, "initialize", HttpParser_init,0);
482 rb_define_method(cHttpParser, "reset", HttpParser_reset,0);
483 rb_define_method(cHttpParser, "headers", HttpParser_headers, 2);
484 rb_define_method(cHttpParser, "filter_body", HttpParser_filter_body, 2);
485 rb_define_method(cHttpParser, "trailers", HttpParser_headers, 2);
486 rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0);
487 rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0);
490 * The maximum size a single chunk when using chunked transfer encoding.
491 * This is only a theoretical maximum used to detect errors in clients,
492 * it is highly unlikely to encounter clients that send more than
493 * several kilobytes at once.
495 rb_define_const(cHttpParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
498 * The maximum size of the body as specified by Content-Length.
499 * This is only a theoretical maximum, the actual limit is subject
500 * to the limits of the file system used for +Dir::tmpdir+
502 rb_define_const(cHttpParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
504 init_common_fields();
505 SET_GLOBAL(g_http_host, "HOST");
506 SET_GLOBAL(g_http_trailer, "TRAILER");
507 SET_GLOBAL(g_http_transfer_encoding, "TRANSFER_ENCODING");
508 SET_GLOBAL(g_content_length, "CONTENT_LENGTH");