2 * Copyright (c) 2009 Eric Wong (all bugs are Eric's fault)
3 * Copyright (c) 2005 Zed A. Shaw
4 * You can redistribute it and/or modify it under the same terms as Ruby.
8 #include "unicorn_http.h"
10 static http_parser
*data_get(VALUE self
)
14 Data_Get_Struct(self
, http_parser
, http
);
19 static VALUE mUnicorn
;
20 static VALUE cHttpParser
;
21 static VALUE eHttpParserError
;
22 static VALUE sym_http_body
;
24 #define HTTP_PREFIX "HTTP_"
25 #define HTTP_PREFIX_LEN (sizeof(HTTP_PREFIX) - 1)
27 static VALUE global_rack_url_scheme
;
28 static VALUE global_request_method
;
29 static VALUE global_request_uri
;
30 static VALUE global_fragment
;
31 static VALUE global_query_string
;
32 static VALUE global_http_version
;
33 static VALUE global_request_path
;
34 static VALUE global_path_info
;
35 static VALUE global_server_name
;
36 static VALUE global_server_port
;
37 static VALUE global_server_protocol
;
38 static VALUE global_server_protocol_value
;
39 static VALUE global_http_host
;
40 static VALUE global_http_x_forwarded_proto
;
41 static VALUE global_port_80
;
42 static VALUE global_port_443
;
43 static VALUE global_localhost
;
44 static VALUE global_http
;
46 /** Defines common length and error messages for input length validation. */
47 #define DEF_MAX_LENGTH(N, length) \
48 static const size_t MAX_##N##_LENGTH = length; \
49 static const char * const MAX_##N##_LENGTH_ERR = \
50 "HTTP element " # N " is longer than the " # length " allowed length."
53 * Validates the max length of given input and throws an HttpParserError
56 #define VALIDATE_MAX_LENGTH(len, N) do { \
57 if (len > MAX_##N##_LENGTH) \
58 rb_raise(eHttpParserError, MAX_##N##_LENGTH_ERR); \
61 /** Defines global strings in the init method. */
62 #define DEF_GLOBAL(N, val) do { \
63 global_##N = rb_obj_freeze(rb_str_new(val, sizeof(val) - 1)); \
64 rb_global_variable(&global_##N); \
67 /* Defines the maximum allowed lengths for various input elements.*/
68 DEF_MAX_LENGTH(FIELD_NAME
, 256);
69 DEF_MAX_LENGTH(FIELD_VALUE
, 80 * 1024);
70 DEF_MAX_LENGTH(REQUEST_URI
, 1024 * 12);
71 DEF_MAX_LENGTH(FRAGMENT
, 1024); /* Don't know if this length is specified somewhere or not */
72 DEF_MAX_LENGTH(REQUEST_PATH
, 1024);
73 DEF_MAX_LENGTH(QUERY_STRING
, (1024 * 10));
74 DEF_MAX_LENGTH(HEADER
, (1024 * (80 + 32)));
77 const signed long len
;
83 * A list of common HTTP headers we expect to receive.
84 * This allows us to avoid repeatedly creating identical string
85 * objects to be used with rb_hash_aset().
87 static struct common_field common_http_fields
[] = {
88 # define f(N) { (sizeof(N) - 1), N, Qnil }
97 f("CONTENT_ENCODING"),
106 f("IF_MODIFIED_SINCE"),
109 f("IF_UNMODIFIED_SINCE"),
110 f("KEEP_ALIVE"), /* Firefox sends this */
113 f("PROXY_AUTHORIZATION"),
118 f("TRANSFER_ENCODING"),
122 f("X_FORWARDED_FOR"), /* common for proxies */
123 f("X_FORWARDED_PROTO"), /* common for proxies */
124 f("X_REAL_IP"), /* common for proxies */
129 /* this function is not performance-critical */
130 static void init_common_fields(void)
133 struct common_field
*cf
= common_http_fields
;
134 char tmp
[256]; /* MAX_FIELD_NAME_LENGTH */
135 memcpy(tmp
, HTTP_PREFIX
, HTTP_PREFIX_LEN
);
137 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); cf
++, i
++) {
138 /* Rack doesn't like certain headers prefixed with "HTTP_" */
139 if (!strcmp("CONTENT_LENGTH", cf
->name
) ||
140 !strcmp("CONTENT_TYPE", cf
->name
)) {
141 cf
->value
= rb_str_new(cf
->name
, cf
->len
);
143 memcpy(tmp
+ HTTP_PREFIX_LEN
, cf
->name
, cf
->len
+ 1);
144 cf
->value
= rb_str_new(tmp
, HTTP_PREFIX_LEN
+ cf
->len
);
146 cf
->value
= rb_obj_freeze(cf
->value
);
147 rb_global_variable(&cf
->value
);
151 static VALUE
find_common_field_value(const char *field
, size_t flen
)
154 struct common_field
*cf
= common_http_fields
;
155 for(i
= 0; i
< ARRAY_SIZE(common_http_fields
); i
++, cf
++) {
156 if (cf
->len
== flen
&& !memcmp(cf
->name
, field
, flen
))
162 static void http_field(VALUE req
, const char *field
,
163 size_t flen
, const char *value
, size_t vlen
)
167 VALIDATE_MAX_LENGTH(flen
, FIELD_NAME
);
168 VALIDATE_MAX_LENGTH(vlen
, FIELD_VALUE
);
170 f
= find_common_field_value(field
, flen
);
174 * We got a strange header that we don't have a memoized value for.
175 * Fallback to creating a new string to use as a hash key.
177 * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len)
178 * in my testing, because: there's no minimum allocation length (and
179 * no check for it, either), RSTRING_LEN(f) does not need to be
180 * written twice, and and RSTRING_PTR(f) will already be
181 * null-terminated for us.
183 f
= rb_str_new(NULL
, HTTP_PREFIX_LEN
+ flen
);
184 memcpy(RSTRING_PTR(f
), HTTP_PREFIX
, HTTP_PREFIX_LEN
);
185 memcpy(RSTRING_PTR(f
) + HTTP_PREFIX_LEN
, field
, flen
);
186 assert(*(RSTRING_PTR(f
) + RSTRING_LEN(f
)) == '\0'); /* paranoia */
187 /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */
188 } else if (f
== global_http_host
&& rb_hash_aref(req
, f
) != Qnil
) {
192 rb_hash_aset(req
, f
, rb_str_new(value
, vlen
));
195 static void request_method(VALUE req
, const char *at
, size_t length
)
197 rb_hash_aset(req
, global_request_method
, rb_str_new(at
, length
));
200 static void scheme(VALUE req
, const char *at
, size_t length
)
202 rb_hash_aset(req
, global_rack_url_scheme
, rb_str_new(at
, length
));
205 static void host(VALUE req
, const char *at
, size_t length
)
207 rb_hash_aset(req
, global_http_host
, rb_str_new(at
, length
));
210 static void request_uri(VALUE req
, const char *at
, size_t length
)
212 VALIDATE_MAX_LENGTH(length
, REQUEST_URI
);
214 rb_hash_aset(req
, global_request_uri
, rb_str_new(at
, length
));
216 /* "OPTIONS * HTTP/1.1\r\n" is a valid request */
217 if (length
== 1 && *at
== '*') {
218 VALUE val
= rb_str_new(NULL
, 0);
219 rb_hash_aset(req
, global_request_path
, val
);
220 rb_hash_aset(req
, global_path_info
, val
);
224 static void fragment(VALUE req
, const char *at
, size_t length
)
226 VALIDATE_MAX_LENGTH(length
, FRAGMENT
);
228 rb_hash_aset(req
, global_fragment
, rb_str_new(at
, length
));
231 static void request_path(VALUE req
, const char *at
, size_t length
)
235 VALIDATE_MAX_LENGTH(length
, REQUEST_PATH
);
237 val
= rb_str_new(at
, length
);
238 rb_hash_aset(req
, global_request_path
, val
);
240 /* rack says PATH_INFO must start with "/" or be empty */
241 if (!(length
== 1 && *at
== '*'))
242 rb_hash_aset(req
, global_path_info
, val
);
245 static void query_string(VALUE req
, const char *at
, size_t length
)
247 VALIDATE_MAX_LENGTH(length
, QUERY_STRING
);
249 rb_hash_aset(req
, global_query_string
, rb_str_new(at
, length
));
252 static void http_version(VALUE req
, const char *at
, size_t length
)
254 rb_hash_aset(req
, global_http_version
, rb_str_new(at
, length
));
257 /** Finalizes the request header to have a bunch of stuff that's needed. */
258 static void header_done(VALUE req
, const char *at
, size_t length
)
260 VALUE server_name
= global_localhost
;
261 VALUE server_port
= global_port_80
;
264 /* rack requires QUERY_STRING */
265 if (rb_hash_aref(req
, global_query_string
) == Qnil
)
266 rb_hash_aset(req
, global_query_string
, rb_str_new(NULL
, 0));
268 /* set rack.url_scheme to "https" or "http", no others are allowed by Rack */
269 if ((temp
= rb_hash_aref(req
, global_rack_url_scheme
)) == Qnil
) {
270 if ((temp
= rb_hash_aref(req
, global_http_x_forwarded_proto
)) != Qnil
&&
271 RSTRING_LEN(temp
) == 5 &&
272 !memcmp("https", RSTRING_PTR(temp
), 5))
273 server_port
= global_port_443
;
276 rb_hash_aset(req
, global_rack_url_scheme
, temp
);
277 } else if (RSTRING_LEN(temp
) == 5 && !memcmp("https", RSTRING_PTR(temp
), 5)) {
278 server_port
= global_port_443
;
281 /* parse and set the SERVER_NAME and SERVER_PORT variables */
282 if ((temp
= rb_hash_aref(req
, global_http_host
)) != Qnil
) {
283 char *colon
= memchr(RSTRING_PTR(temp
), ':', RSTRING_LEN(temp
));
285 long port_start
= colon
- RSTRING_PTR(temp
) + 1;
287 server_name
= rb_str_substr(temp
, 0, colon
- RSTRING_PTR(temp
));
288 if ((RSTRING_LEN(temp
) - port_start
) > 0)
289 server_port
= rb_str_substr(temp
, port_start
, RSTRING_LEN(temp
));
294 rb_hash_aset(req
, global_server_name
, server_name
);
295 rb_hash_aset(req
, global_server_port
, server_port
);
296 rb_hash_aset(req
, global_server_protocol
, global_server_protocol_value
);
298 /* grab the initial body and stuff it into the hash */
299 temp
= rb_hash_aref(req
, global_request_method
);
301 long len
= RSTRING_LEN(temp
);
302 char *ptr
= RSTRING_PTR(temp
);
304 if (memcmp(ptr
, "HEAD", len
) && memcmp(ptr
, "GET", len
))
305 rb_hash_aset(req
, sym_http_body
, rb_str_new(at
, length
));
309 static VALUE
HttpParser_alloc(VALUE klass
)
312 return Data_Make_Struct(klass
, http_parser
, NULL
, NULL
, hp
);
318 * parser.new -> parser
320 * Creates a new parser.
322 static VALUE
HttpParser_init(VALUE self
)
324 http_parser_init(data_get(self
));
332 * parser.reset -> nil
334 * Resets the parser to it's initial state so that you can reuse it
335 * rather than making new ones.
337 static VALUE
HttpParser_reset(VALUE self
)
339 http_parser_init(data_get(self
));
347 * parser.execute(req, data) -> true/false
349 * Takes a Hash and a String of data, parses the String of data filling
350 * in the Hash returning a boolean to indicate whether or not parsing
353 * This function now throws an exception when there is a parsing error.
354 * This makes the logic for working with the parser much easier. You
355 * will need to wrap the parser with an exception handling block.
358 static VALUE
HttpParser_execute(VALUE self
, VALUE req
, VALUE data
)
360 http_parser
*http
= data_get(self
);
361 char *dptr
= RSTRING_PTR(data
);
362 long dlen
= RSTRING_LEN(data
);
364 if (http
->start
.offset
< dlen
) {
365 http_parser_execute(http
, req
, dptr
, dlen
);
367 VALIDATE_MAX_LENGTH(http
->start
.offset
, HEADER
);
369 if (!http_parser_has_error(http
))
370 return http_parser_is_finished(http
) ? Qtrue
: Qfalse
;
372 rb_raise(eHttpParserError
, "Invalid HTTP format, parsing fails.");
374 rb_raise(eHttpParserError
, "Requested start is after data buffer end.");
377 void Init_unicorn_http(void)
379 mUnicorn
= rb_define_module("Unicorn");
381 DEF_GLOBAL(rack_url_scheme
, "rack.url_scheme");
382 DEF_GLOBAL(request_method
, "REQUEST_METHOD");
383 DEF_GLOBAL(request_uri
, "REQUEST_URI");
384 DEF_GLOBAL(fragment
, "FRAGMENT");
385 DEF_GLOBAL(query_string
, "QUERY_STRING");
386 DEF_GLOBAL(http_version
, "HTTP_VERSION");
387 DEF_GLOBAL(request_path
, "REQUEST_PATH");
388 DEF_GLOBAL(path_info
, "PATH_INFO");
389 DEF_GLOBAL(server_name
, "SERVER_NAME");
390 DEF_GLOBAL(server_port
, "SERVER_PORT");
391 DEF_GLOBAL(server_protocol
, "SERVER_PROTOCOL");
392 DEF_GLOBAL(server_protocol_value
, "HTTP/1.1");
393 DEF_GLOBAL(http_x_forwarded_proto
, "HTTP_X_FORWARDED_PROTO");
394 DEF_GLOBAL(port_80
, "80");
395 DEF_GLOBAL(port_443
, "443");
396 DEF_GLOBAL(localhost
, "localhost");
397 DEF_GLOBAL(http
, "http");
399 eHttpParserError
= rb_define_class_under(mUnicorn
, "HttpParserError", rb_eIOError
);
401 cHttpParser
= rb_define_class_under(mUnicorn
, "HttpParser", rb_cObject
);
402 rb_define_alloc_func(cHttpParser
, HttpParser_alloc
);
403 rb_define_method(cHttpParser
, "initialize", HttpParser_init
,0);
404 rb_define_method(cHttpParser
, "reset", HttpParser_reset
,0);
405 rb_define_method(cHttpParser
, "execute", HttpParser_execute
,2);
406 sym_http_body
= ID2SYM(rb_intern("http_body"));
407 init_common_fields();
408 global_http_host
= find_common_field_value("HOST", 4);
409 assert(global_http_host
!= Qnil
);