2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
10 #include "http11_parser.h"
13 #define RSTRING_PTR(s) (RSTRING(s)->ptr)
16 #define RSTRING_LEN(s) (RSTRING(s)->len)
19 static VALUE mUnicorn
;
20 static VALUE cHttpParser
;
21 static VALUE eHttpParserError
;
22 static VALUE sym_http_body
;
24 #define HTTP_PREFIX "HTTP_"
25 #define HTTP_PREFIX_LEN (sizeof(HTTP_PREFIX) - 1)
27 static VALUE global_rack_url_scheme
;
28 static VALUE global_request_method
;
29 static VALUE global_request_uri
;
30 static VALUE global_fragment
;
31 static VALUE global_query_string
;
32 static VALUE global_http_version
;
33 static VALUE global_content_length
;
34 static VALUE global_request_path
;
35 static VALUE global_content_type
;
36 static VALUE global_server_name
;
37 static VALUE global_server_port
;
38 static VALUE global_server_protocol
;
39 static VALUE global_server_protocol_value
;
40 static VALUE global_http_host
;
41 static VALUE global_http_x_forwarded_proto
;
42 static VALUE global_port_80
;
43 static VALUE global_port_443
;
44 static VALUE global_localhost
;
45 static VALUE global_http
;
47 /** Defines common length and error messages for input length validation. */
48 #define DEF_MAX_LENGTH(N,length) const size_t MAX_##N##_LENGTH = length; const char *MAX_##N##_LENGTH_ERR = "HTTP element " # N " is longer than the " # length " allowed length."
50 /** Validates the max length of given input and throws an HttpParserError exception if over. */
51 #define VALIDATE_MAX_LENGTH(len, N) if(len > MAX_##N##_LENGTH) { rb_raise(eHttpParserError, MAX_##N##_LENGTH_ERR); }
53 /** Defines global strings in the init method. */
54 #define DEF_GLOBAL(N, val) global_##N = rb_obj_freeze(rb_str_new2(val)); rb_global_variable(&global_##N)
57 /* Defines the maximum allowed lengths for various input elements.*/
58 DEF_MAX_LENGTH(FIELD_NAME
, 256);
59 DEF_MAX_LENGTH(FIELD_VALUE
, 80 * 1024);
60 DEF_MAX_LENGTH(REQUEST_URI
, 1024 * 12);
61 DEF_MAX_LENGTH(FRAGMENT
, 1024); /* Don't know if this length is specified somewhere or not */
62 DEF_MAX_LENGTH(REQUEST_PATH
, 1024);
63 DEF_MAX_LENGTH(QUERY_STRING
, (1024 * 10));
64 DEF_MAX_LENGTH(HEADER
, (1024 * (80 + 32)));
67 const signed long len
;
73 * A list of common HTTP headers we expect to receive.
74 * This allows us to avoid repeatedly creating identical string
75 * objects to be used with rb_hash_aset().
77 static struct common_field common_http_fields
[] = {
78 # define f(N) { (sizeof(N) - 1), N, Qnil }
87 f("CONTENT_ENCODING"),
96 f("IF_MODIFIED_SINCE"),
99 f("IF_UNMODIFIED_SINCE"),
100 f("KEEP_ALIVE"), /* Firefox sends this */
103 f("PROXY_AUTHORIZATION"),
108 f("TRANSFER_ENCODING"),
112 f("X_FORWARDED_FOR"), /* common for proxies */
113 f("X_FORWARDED_PROTO"), /* common for proxies */
114 f("X_REAL_IP"), /* common for proxies */
119 /* this function is not performance-critical */
120 static void init_common_fields(void)
123 struct common_field
*cf
= common_http_fields
;
124 char tmp
[256]; /* MAX_FIELD_NAME_LENGTH */
125 memcpy(tmp
, HTTP_PREFIX
, HTTP_PREFIX_LEN
);
127 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); cf
++, i
++) {
128 /* Rack doesn't like certain headers prefixed with "HTTP_" */
129 if (!strcmp("CONTENT_LENGTH", cf
->name
) ||
130 !strcmp("CONTENT_TYPE", cf
->name
)) {
131 cf
->value
= rb_str_new(cf
->name
, cf
->len
);
133 memcpy(tmp
+ HTTP_PREFIX_LEN
, cf
->name
, cf
->len
+ 1);
134 cf
->value
= rb_str_new(tmp
, HTTP_PREFIX_LEN
+ cf
->len
);
136 cf
->value
= rb_obj_freeze(cf
->value
);
137 rb_global_variable(&cf
->value
);
141 static VALUE
find_common_field_value(const char *field
, size_t flen
)
144 struct common_field
*cf
= common_http_fields
;
145 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); i
++, cf
++) {
146 if (cf
->len
== flen
&& !memcmp(cf
->name
, field
, flen
))
152 static void http_field(void *data
, const char *field
,
153 size_t flen
, const char *value
, size_t vlen
)
155 VALUE req
= (VALUE
)data
;
159 VALIDATE_MAX_LENGTH(flen
, FIELD_NAME
);
160 VALIDATE_MAX_LENGTH(vlen
, FIELD_VALUE
);
162 v
= rb_str_new(value
, vlen
);
164 f
= find_common_field_value(field
, flen
);
168 * We got a strange header that we don't have a memoized value for.
169 * Fallback to creating a new string to use as a hash key.
171 * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
172 * in my testing, because: there's no minimum allocation length (and
173 * no check for it, either), RSTRING_LEN(f) does not need to be
174 * written twice, and and RSTRING_PTR(f) will already be
175 * null-terminated for us.
177 f
= rb_str_new(NULL
, HTTP_PREFIX_LEN
+ flen
);
178 memcpy(RSTRING_PTR(f
), HTTP_PREFIX
, HTTP_PREFIX_LEN
);
179 memcpy(RSTRING_PTR(f
) + HTTP_PREFIX_LEN
, field
, flen
);
180 assert(*(RSTRING_PTR(f
) + RSTRING_LEN(f
)) == '\0'); /* paranoia */
181 /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */
184 rb_hash_aset(req
, f
, v
);
187 static void request_method(void *data
, const char *at
, size_t length
)
189 VALUE req
= (VALUE
)data
;
192 val
= rb_str_new(at
, length
);
193 rb_hash_aset(req
, global_request_method
, val
);
196 static void request_uri(void *data
, const char *at
, size_t length
)
198 VALUE req
= (VALUE
)data
;
201 VALIDATE_MAX_LENGTH(length
, REQUEST_URI
);
203 val
= rb_str_new(at
, length
);
204 rb_hash_aset(req
, global_request_uri
, val
);
207 static void fragment(void *data
, const char *at
, size_t length
)
209 VALUE req
= (VALUE
)data
;
212 VALIDATE_MAX_LENGTH(length
, FRAGMENT
);
214 val
= rb_str_new(at
, length
);
215 rb_hash_aset(req
, global_fragment
, val
);
218 static void request_path(void *data
, const char *at
, size_t length
)
220 VALUE req
= (VALUE
)data
;
223 VALIDATE_MAX_LENGTH(length
, REQUEST_PATH
);
225 val
= rb_str_new(at
, length
);
226 rb_hash_aset(req
, global_request_path
, val
);
229 static void query_string(void *data
, const char *at
, size_t length
)
231 VALUE req
= (VALUE
)data
;
234 VALIDATE_MAX_LENGTH(length
, QUERY_STRING
);
236 val
= rb_str_new(at
, length
);
237 rb_hash_aset(req
, global_query_string
, val
);
240 static void http_version(void *data
, const char *at
, size_t length
)
242 VALUE req
= (VALUE
)data
;
243 VALUE val
= rb_str_new(at
, length
);
244 rb_hash_aset(req
, global_http_version
, val
);
247 /** Finalizes the request header to have a bunch of stuff that's needed. */
248 static void header_done(void *data
, const char *at
, size_t length
)
250 VALUE req
= (VALUE
)data
;
251 VALUE server_name
= global_localhost
;
252 VALUE server_port
= global_port_80
;
255 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
256 if ((temp
= rb_hash_aref(req
, global_http_x_forwarded_proto
)) != Qnil
&&
257 RSTRING_LEN(temp
) == 5 &&
258 !memcmp("https", RSTRING_PTR(temp
), 5))
259 server_port
= global_port_443
;
262 rb_hash_aset(req
, global_rack_url_scheme
, temp
);
264 /* parse and set the SERVER_NAME and SERVER_PORT variables */
265 if ((temp
= rb_hash_aref(req
, global_http_host
)) != Qnil
) {
266 char *colon
= memchr(RSTRING_PTR(temp
), ':', RSTRING_LEN(temp
));
268 server_name
= rb_str_substr(temp
, 0, colon
- RSTRING_PTR(temp
));
269 server_port
= rb_str_substr(temp
, colon
- RSTRING_PTR(temp
)+1,
275 rb_hash_aset(req
, global_server_name
, server_name
);
276 rb_hash_aset(req
, global_server_port
, server_port
);
278 /* grab the initial body and stuff it into the hash */
279 rb_hash_aset(req
, sym_http_body
, rb_str_new(at
, length
));
280 rb_hash_aset(req
, global_server_protocol
, global_server_protocol_value
);
283 static void HttpParser_free(void *data
) {
292 static VALUE
HttpParser_alloc(VALUE klass
)
295 http_parser
*hp
= ALLOC_N(http_parser
, 1);
297 hp
->http_field
= http_field
;
298 hp
->request_method
= request_method
;
299 hp
->request_uri
= request_uri
;
300 hp
->fragment
= fragment
;
301 hp
->request_path
= request_path
;
302 hp
->query_string
= query_string
;
303 hp
->http_version
= http_version
;
304 hp
->header_done
= header_done
;
305 http_parser_init(hp
);
307 obj
= Data_Wrap_Struct(klass
, NULL
, HttpParser_free
, hp
);
315 * parser.new -> parser
317 * Creates a new parser.
319 static VALUE
HttpParser_init(VALUE self
)
321 http_parser
*http
= NULL
;
322 DATA_GET(self
, http_parser
, http
);
323 http_parser_init(http
);
331 * parser.reset -> nil
333 * Resets the parser to it's initial state so that you can reuse it
334 * rather than making new ones.
336 static VALUE
HttpParser_reset(VALUE self
)
338 http_parser
*http
= NULL
;
339 DATA_GET(self
, http_parser
, http
);
340 http_parser_init(http
);
348 * parser.execute(req_hash, data) -> true/false
350 * Takes a Hash and a String of data, parses the String of data filling
351 * in the Hash returning a boolean to indicate whether or not parsing
354 * This function now throws an exception when there is a parsing error.
355 * This makes the logic for working with the parser much easier. You
356 * will need to wrap the parser with an exception handling block.
359 static VALUE
HttpParser_execute(VALUE self
, VALUE req_hash
, VALUE data
)
362 char *dptr
= RSTRING_PTR(data
);
363 long dlen
= RSTRING_LEN(data
);
365 DATA_GET(self
, http_parser
, http
);
367 if (http
->nread
< dlen
) {
368 http
->data
= (void *)req_hash
;
369 http_parser_execute(http
, dptr
, dlen
);
371 VALIDATE_MAX_LENGTH(http
->nread
, HEADER
);
373 if (!http_parser_has_error(http
))
374 return http_parser_is_finished(http
) ? Qtrue
: Qfalse
;
376 rb_raise(eHttpParserError
, "Invalid HTTP format, parsing fails.");
378 rb_raise(eHttpParserError
, "Requested start is after data buffer end.");
384 mUnicorn
= rb_define_module("Unicorn");
386 DEF_GLOBAL(rack_url_scheme
, "rack.url_scheme");
387 DEF_GLOBAL(request_method
, "REQUEST_METHOD");
388 DEF_GLOBAL(request_uri
, "REQUEST_URI");
389 DEF_GLOBAL(fragment
, "FRAGMENT");
390 DEF_GLOBAL(query_string
, "QUERY_STRING");
391 DEF_GLOBAL(http_version
, "HTTP_VERSION");
392 DEF_GLOBAL(request_path
, "REQUEST_PATH");
393 DEF_GLOBAL(content_length
, "CONTENT_LENGTH");
394 DEF_GLOBAL(content_type
, "CONTENT_TYPE");
395 DEF_GLOBAL(server_name
, "SERVER_NAME");
396 DEF_GLOBAL(server_port
, "SERVER_PORT");
397 DEF_GLOBAL(server_protocol
, "SERVER_PROTOCOL");
398 DEF_GLOBAL(server_protocol_value
, "HTTP/1.1");
399 DEF_GLOBAL(http_host
, "HTTP_HOST");
400 DEF_GLOBAL(http_x_forwarded_proto
, "HTTP_X_FORWARDED_PROTO");
401 DEF_GLOBAL(port_80
, "80");
402 DEF_GLOBAL(port_443
, "443");
403 DEF_GLOBAL(localhost
, "localhost");
404 DEF_GLOBAL(http
, "http");
406 eHttpParserError
= rb_define_class_under(mUnicorn
, "HttpParserError", rb_eIOError
);
408 cHttpParser
= rb_define_class_under(mUnicorn
, "HttpParser", rb_cObject
);
409 rb_define_alloc_func(cHttpParser
, HttpParser_alloc
);
410 rb_define_method(cHttpParser
, "initialize", HttpParser_init
,0);
411 rb_define_method(cHttpParser
, "reset", HttpParser_reset
,0);
412 rb_define_method(cHttpParser
, "execute", HttpParser_execute
,2);
413 sym_http_body
= ID2SYM(rb_intern("http_body"));
414 init_common_fields();