2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include "http11_parser.h"
13 #define RSTRING_PTR(s) (RSTRING(s)->ptr)
16 #define RSTRING_LEN(s) (RSTRING(s)->len)
19 static VALUE mUnicorn
;
20 static VALUE cHttpParser
;
21 static VALUE eHttpParserError
;
22 static VALUE sym_http_body
;
24 #define HTTP_PREFIX "HTTP_"
25 #define HTTP_PREFIX_LEN (sizeof(HTTP_PREFIX) - 1)
27 static VALUE global_rack_url_scheme
;
28 static VALUE global_request_method
;
29 static VALUE global_request_uri
;
30 static VALUE global_fragment
;
31 static VALUE global_query_string
;
32 static VALUE global_http_version
;
33 static VALUE global_content_length
;
34 static VALUE global_request_path
;
35 static VALUE global_content_type
;
36 static VALUE global_server_name
;
37 static VALUE global_server_port
;
38 static VALUE global_server_protocol
;
39 static VALUE global_server_protocol_value
;
40 static VALUE global_http_host
;
41 static VALUE global_http_x_forwarded_proto
;
42 static VALUE global_port_80
;
43 static VALUE global_localhost
;
45 /** Defines common length and error messages for input length validation. */
46 #define DEF_MAX_LENGTH(N,length) const size_t MAX_##N##_LENGTH = length; const char *MAX_##N##_LENGTH_ERR = "HTTP element " # N " is longer than the " # length " allowed length."
48 /** Validates the max length of given input and throws an HttpParserError exception if over. */
49 #define VALIDATE_MAX_LENGTH(len, N) if(len > MAX_##N##_LENGTH) { rb_raise(eHttpParserError, MAX_##N##_LENGTH_ERR); }
51 /** Defines global strings in the init method. */
52 #define DEF_GLOBAL(N, val) global_##N = rb_obj_freeze(rb_str_new2(val)); rb_global_variable(&global_##N)
55 /* Defines the maximum allowed lengths for various input elements.*/
56 DEF_MAX_LENGTH(FIELD_NAME
, 256);
57 DEF_MAX_LENGTH(FIELD_VALUE
, 80 * 1024);
58 DEF_MAX_LENGTH(REQUEST_URI
, 1024 * 12);
59 DEF_MAX_LENGTH(FRAGMENT
, 1024); /* Don't know if this length is specified somewhere or not */
60 DEF_MAX_LENGTH(REQUEST_PATH
, 1024);
61 DEF_MAX_LENGTH(QUERY_STRING
, (1024 * 10));
62 DEF_MAX_LENGTH(HEADER
, (1024 * (80 + 32)));
65 const signed long len
;
71 * A list of common HTTP headers we expect to receive.
72 * This allows us to avoid repeatedly creating identical string
73 * objects to be used with rb_hash_aset().
75 static struct common_field common_http_fields
[] = {
76 # define f(N) { (sizeof(N) - 1), N, Qnil }
85 f("CONTENT_ENCODING"),
94 f("IF_MODIFIED_SINCE"),
97 f("IF_UNMODIFIED_SINCE"),
98 f("KEEP_ALIVE"), /* Firefox sends this */
101 f("PROXY_AUTHORIZATION"),
106 f("TRANSFER_ENCODING"),
110 f("X_FORWARDED_FOR"), /* common for proxies */
111 f("X_FORWARDED_PROTO"), /* common for proxies */
112 f("X_REAL_IP"), /* common for proxies */
117 /* this function is not performance-critical */
118 static void init_common_fields(void)
121 struct common_field
*cf
= common_http_fields
;
122 char tmp
[256]; /* MAX_FIELD_NAME_LENGTH */
123 memcpy(tmp
, HTTP_PREFIX
, HTTP_PREFIX_LEN
);
125 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); cf
++, i
++) {
126 /* Rack doesn't like certain headers prefixed with "HTTP_" */
127 if (!strcmp("CONTENT_LENGTH", cf
->name
) ||
128 !strcmp("CONTENT_TYPE", cf
->name
)) {
129 cf
->value
= rb_str_new(cf
->name
, cf
->len
);
131 memcpy(tmp
+ HTTP_PREFIX_LEN
, cf
->name
, cf
->len
+ 1);
132 cf
->value
= rb_str_new(tmp
, HTTP_PREFIX_LEN
+ cf
->len
);
134 cf
->value
= rb_obj_freeze(cf
->value
);
135 rb_global_variable(&cf
->value
);
139 static VALUE
find_common_field_value(const char *field
, size_t flen
)
142 struct common_field
*cf
= common_http_fields
;
143 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); i
++, cf
++) {
144 if (cf
->len
== flen
&& !memcmp(cf
->name
, field
, flen
))
150 static void http_field(void *data
, const char *field
,
151 size_t flen
, const char *value
, size_t vlen
)
153 VALUE req
= (VALUE
)data
;
157 VALIDATE_MAX_LENGTH(flen
, FIELD_NAME
);
158 VALIDATE_MAX_LENGTH(vlen
, FIELD_VALUE
);
160 v
= rb_str_new(value
, vlen
);
162 f
= find_common_field_value(field
, flen
);
166 * We got a strange header that we don't have a memoized value for.
167 * Fallback to creating a new string to use as a hash key.
169 * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
170 * in my testing, because: there's no minimum allocation length (and
171 * no check for it, either), RSTRING_LEN(f) does not need to be
172 * written twice, and and RSTRING_PTR(f) will already be
173 * null-terminated for us.
175 f
= rb_str_new(NULL
, HTTP_PREFIX_LEN
+ flen
);
176 memcpy(RSTRING_PTR(f
), HTTP_PREFIX
, HTTP_PREFIX_LEN
);
177 memcpy(RSTRING_PTR(f
) + HTTP_PREFIX_LEN
, field
, flen
);
178 assert(*(RSTRING_PTR(f
) + RSTRING_LEN(f
)) == '\0'); /* paranoia */
179 /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */
182 rb_hash_aset(req
, f
, v
);
185 static void request_method(void *data
, const char *at
, size_t length
)
187 VALUE req
= (VALUE
)data
;
190 val
= rb_str_new(at
, length
);
191 rb_hash_aset(req
, global_request_method
, val
);
194 static void request_uri(void *data
, const char *at
, size_t length
)
196 VALUE req
= (VALUE
)data
;
199 VALIDATE_MAX_LENGTH(length
, REQUEST_URI
);
201 val
= rb_str_new(at
, length
);
202 rb_hash_aset(req
, global_request_uri
, val
);
205 static void fragment(void *data
, const char *at
, size_t length
)
207 VALUE req
= (VALUE
)data
;
210 VALIDATE_MAX_LENGTH(length
, FRAGMENT
);
212 val
= rb_str_new(at
, length
);
213 rb_hash_aset(req
, global_fragment
, val
);
216 static void request_path(void *data
, const char *at
, size_t length
)
218 VALUE req
= (VALUE
)data
;
221 VALIDATE_MAX_LENGTH(length
, REQUEST_PATH
);
223 val
= rb_str_new(at
, length
);
224 rb_hash_aset(req
, global_request_path
, val
);
227 static void query_string(void *data
, const char *at
, size_t length
)
229 VALUE req
= (VALUE
)data
;
232 VALIDATE_MAX_LENGTH(length
, QUERY_STRING
);
234 val
= rb_str_new(at
, length
);
235 rb_hash_aset(req
, global_query_string
, val
);
238 static void http_version(void *data
, const char *at
, size_t length
)
240 VALUE req
= (VALUE
)data
;
241 VALUE val
= rb_str_new(at
, length
);
242 rb_hash_aset(req
, global_http_version
, val
);
245 /** Finalizes the request header to have a bunch of stuff that's
248 static void header_done(void *data
, const char *at
, size_t length
)
250 VALUE req
= (VALUE
)data
;
254 /* set rack.url_scheme to "https" or "http" */
255 if ((temp
= rb_hash_aref(req
, global_http_x_forwarded_proto
)) != Qnil
) {
256 if (strcmp("https", RSTRING_PTR(temp
)))
257 temp
= rb_str_new("http", 4);
258 /* we leave temp alone if it's "https" */
260 temp
= rb_str_new("http", 4);
262 rb_hash_aset(req
, global_rack_url_scheme
, temp
);
264 /* set the SERVER_NAME and SERVER_PORT variables */
265 if((temp
= rb_hash_aref(req
, global_http_host
)) != Qnil
) {
266 colon
= memchr(RSTRING_PTR(temp
), ':', RSTRING_LEN(temp
));
268 rb_hash_aset(req
, global_server_name
, rb_str_substr(temp
, 0, colon
- RSTRING_PTR(temp
)));
269 rb_hash_aset(req
, global_server_port
,
270 rb_str_substr(temp
, colon
- RSTRING_PTR(temp
)+1,
273 rb_hash_aset(req
, global_server_name
, temp
);
274 rb_hash_aset(req
, global_server_port
, global_port_80
);
277 rb_hash_aset(req
, global_server_name
, global_localhost
);
278 rb_hash_aset(req
, global_server_port
, global_port_80
);
281 /* grab the initial body and stuff it into the hash */
282 rb_hash_aset(req
, sym_http_body
, rb_str_new(at
, length
));
283 rb_hash_aset(req
, global_server_protocol
, global_server_protocol_value
);
286 static void HttpParser_free(void *data
) {
295 static VALUE
HttpParser_alloc(VALUE klass
)
298 http_parser
*hp
= ALLOC_N(http_parser
, 1);
300 hp
->http_field
= http_field
;
301 hp
->request_method
= request_method
;
302 hp
->request_uri
= request_uri
;
303 hp
->fragment
= fragment
;
304 hp
->request_path
= request_path
;
305 hp
->query_string
= query_string
;
306 hp
->http_version
= http_version
;
307 hp
->header_done
= header_done
;
308 http_parser_init(hp
);
310 obj
= Data_Wrap_Struct(klass
, NULL
, HttpParser_free
, hp
);
318 * parser.new -> parser
320 * Creates a new parser.
322 static VALUE
HttpParser_init(VALUE self
)
324 http_parser
*http
= NULL
;
325 DATA_GET(self
, http_parser
, http
);
326 http_parser_init(http
);
334 * parser.reset -> nil
336 * Resets the parser to it's initial state so that you can reuse it
337 * rather than making new ones.
339 static VALUE
HttpParser_reset(VALUE self
)
341 http_parser
*http
= NULL
;
342 DATA_GET(self
, http_parser
, http
);
343 http_parser_init(http
);
351 * parser.execute(req_hash, data) -> true/false
353 * Takes a Hash and a String of data, parses the String of data filling
354 * in the Hash returning a boolean to indicate whether or not parsing
357 * This function now throws an exception when there is a parsing error.
358 * This makes the logic for working with the parser much easier. You
359 * will need to wrap the parser with an exception handling block.
362 static VALUE
HttpParser_execute(VALUE self
, VALUE req_hash
, VALUE data
)
365 char *dptr
= RSTRING_PTR(data
);
366 long dlen
= RSTRING_LEN(data
);
368 DATA_GET(self
, http_parser
, http
);
370 if (http
->nread
< dlen
) {
371 http
->data
= (void *)req_hash
;
372 http_parser_execute(http
, dptr
, dlen
);
374 VALIDATE_MAX_LENGTH(http
->nread
, HEADER
);
376 if (!http_parser_has_error(http
))
377 return http_parser_is_finished(http
) ? Qtrue
: Qfalse
;
379 rb_raise(eHttpParserError
, "Invalid HTTP format, parsing fails.");
381 rb_raise(eHttpParserError
, "Requested start is after data buffer end.");
387 mUnicorn
= rb_define_module("Unicorn");
389 DEF_GLOBAL(rack_url_scheme
, "rack.url_scheme");
390 DEF_GLOBAL(request_method
, "REQUEST_METHOD");
391 DEF_GLOBAL(request_uri
, "REQUEST_URI");
392 DEF_GLOBAL(fragment
, "FRAGMENT");
393 DEF_GLOBAL(query_string
, "QUERY_STRING");
394 DEF_GLOBAL(http_version
, "HTTP_VERSION");
395 DEF_GLOBAL(request_path
, "REQUEST_PATH");
396 DEF_GLOBAL(content_length
, "CONTENT_LENGTH");
397 DEF_GLOBAL(content_type
, "CONTENT_TYPE");
398 DEF_GLOBAL(server_name
, "SERVER_NAME");
399 DEF_GLOBAL(server_port
, "SERVER_PORT");
400 DEF_GLOBAL(server_protocol
, "SERVER_PROTOCOL");
401 DEF_GLOBAL(server_protocol_value
, "HTTP/1.1");
402 DEF_GLOBAL(http_host
, "HTTP_HOST");
403 DEF_GLOBAL(http_x_forwarded_proto
, "HTTP_X_FORWARDED_PROTO");
404 DEF_GLOBAL(port_80
, "80");
405 DEF_GLOBAL(localhost
, "localhost");
407 eHttpParserError
= rb_define_class_under(mUnicorn
, "HttpParserError", rb_eIOError
);
409 cHttpParser
= rb_define_class_under(mUnicorn
, "HttpParser", rb_cObject
);
410 rb_define_alloc_func(cHttpParser
, HttpParser_alloc
);
411 rb_define_method(cHttpParser
, "initialize", HttpParser_init
,0);
412 rb_define_method(cHttpParser
, "reset", HttpParser_reset
,0);
413 rb_define_method(cHttpParser
, "execute", HttpParser_execute
,2);
414 sym_http_body
= ID2SYM(rb_intern("http_body"));
415 init_common_fields();