[core] fix one-byte OOB read (underflow)
[lighttpd.git] / src / request.c
blob9be71b1e38c0fbc8c841fcef1fdee09033f024ce
1 #include "first.h"
3 #include "request.h"
4 #include "base.h"
5 #include "burl.h"
6 #include "http_header.h"
7 #include "http_kv.h"
8 #include "log.h"
9 #include "sock_addr.h"
11 #include <limits.h>
12 #include <stdlib.h>
13 #include <string.h>
15 static int request_check_hostname(buffer *host) {
16 enum { DOMAINLABEL, TOPLABEL } stage = TOPLABEL;
17 size_t i;
18 int label_len = 0;
19 size_t host_len, hostport_len;
20 char *colon;
21 int is_ip = -1; /* -1 don't know yet, 0 no, 1 yes */
22 int level = 0;
25 * hostport = host [ ":" port ]
26 * host = hostname | IPv4address | IPv6address
27 * hostname = *( domainlabel "." ) toplabel [ "." ]
28 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
29 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
30 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
31 * IPv6address = "[" ... "]"
32 * port = *digit
35 /* IPv6 adress */
36 if (host->ptr[0] == '[') {
37 char *c = host->ptr + 1;
38 int colon_cnt = 0;
40 /* check the address inside [...] */
41 for (; *c && *c != ']'; c++) {
42 if (*c == ':') {
43 if (++colon_cnt > 7) {
44 return -1;
46 } else if (!light_isxdigit(*c) && '.' != *c) {
47 return -1;
51 /* missing ] */
52 if (!*c) {
53 return -1;
56 /* check port */
57 if (*(c+1) == ':') {
58 for (c += 2; *c; c++) {
59 if (!light_isdigit(*c)) {
60 return -1;
64 else if ('\0' != *(c+1)) {
65 /* only a port is allowed to follow [...] */
66 return -1;
68 return 0;
71 hostport_len = host_len = buffer_string_length(host);
73 if (NULL != (colon = memchr(host->ptr, ':', host_len))) {
74 char *c = colon + 1;
76 /* check portnumber */
77 for (; *c; c++) {
78 if (!light_isdigit(*c)) return -1;
81 /* remove the port from the host-len */
82 host_len = colon - host->ptr;
85 /* Host is empty */
86 if (host_len == 0) return -1;
88 /* if the hostname ends in a "." strip it */
89 if (host->ptr[host_len-1] == '.') {
90 /* shift port info one left */
91 if (NULL != colon) memmove(colon-1, colon, hostport_len - host_len);
92 buffer_string_set_length(host, --hostport_len);
93 if (--host_len == 0) return -1;
97 /* scan from the right and skip the \0 */
98 for (i = host_len; i-- > 0; ) {
99 const char c = host->ptr[i];
101 switch (stage) {
102 case TOPLABEL:
103 if (c == '.') {
104 /* only switch stage, if this is not the last character */
105 if (i != host_len - 1) {
106 if (label_len == 0) {
107 return -1;
110 /* check the first character at right of the dot */
111 if (is_ip == 0) {
112 if (!light_isalnum(host->ptr[i+1])) {
113 return -1;
115 } else if (!light_isdigit(host->ptr[i+1])) {
116 is_ip = 0;
117 } else if ('-' == host->ptr[i+1]) {
118 return -1;
119 } else {
120 /* just digits */
121 is_ip = 1;
124 stage = DOMAINLABEL;
126 label_len = 0;
127 level++;
128 } else if (i == 0) {
129 /* just a dot and nothing else is evil */
130 return -1;
132 } else if (i == 0) {
133 /* the first character of the hostname */
134 if (!light_isalnum(c)) {
135 return -1;
137 label_len++;
138 } else {
139 if (c != '-' && !light_isalnum(c)) {
140 return -1;
142 if (is_ip == -1) {
143 if (!light_isdigit(c)) is_ip = 0;
145 label_len++;
148 break;
149 case DOMAINLABEL:
150 if (is_ip == 1) {
151 if (c == '.') {
152 if (label_len == 0) {
153 return -1;
156 label_len = 0;
157 level++;
158 } else if (!light_isdigit(c)) {
159 return -1;
160 } else {
161 label_len++;
163 } else {
164 if (c == '.') {
165 if (label_len == 0) {
166 return -1;
169 /* c is either - or alphanum here */
170 if ('-' == host->ptr[i+1]) {
171 return -1;
174 label_len = 0;
175 level++;
176 } else if (i == 0) {
177 if (!light_isalnum(c)) {
178 return -1;
180 label_len++;
181 } else {
182 if (c != '-' && !light_isalnum(c)) {
183 return -1;
185 label_len++;
189 break;
193 /* a IP has to consist of 4 parts */
194 if (is_ip == 1 && level != 3) {
195 return -1;
198 if (label_len == 0) {
199 return -1;
202 return 0;
205 int http_request_host_normalize(buffer *b, int scheme_port) {
207 * check for and canonicalize numeric IP address and portnum (optional)
208 * (IP address may be followed by ":portnum" (optional))
209 * - IPv6: "[...]"
210 * - IPv4: "x.x.x.x"
211 * - IPv4: 12345678 (32-bit decimal number)
212 * - IPv4: 012345678 (32-bit octal number)
213 * - IPv4: 0x12345678 (32-bit hex number)
215 * allow any chars (except ':' and '\0' and stray '[' or ']')
216 * (other code may check chars more strictly or more pedantically)
217 * ':' delimits (optional) port at end of string
218 * "[]" wraps IPv6 address literal
219 * '\0' should have been rejected earlier were it present
221 * any chars includes, but is not limited to:
222 * - allow '-' any where, even at beginning of word
223 * (security caution: might be confused for cmd flag if passed to shell)
224 * - allow all-digit TLDs
225 * (might be mistaken for IPv4 addr by inet_aton()
226 * unless non-digits appear in subdomain)
229 /* Note: not using getaddrinfo() since it does not support "[]" around IPv6
230 * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings.
231 * Not using inet_pton() (when available) on IPv4 for similar reasons. */
233 const char * const p = b->ptr;
234 const size_t blen = buffer_string_length(b);
235 long port = 0;
237 if (*p != '[') {
238 char * const colon = (char *)memchr(p, ':', blen);
239 if (colon) {
240 if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/
241 if (colon[1] != '\0') {
242 char *e;
243 port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/
244 if (0 < port && port <= USHRT_MAX && *e == '\0') {
245 /* valid port */
246 } else {
247 return -1;
249 } /*(else ignore stray colon at string end)*/
250 buffer_string_set_length(b, (size_t)(colon - p)); /*(remove port str)*/
253 if (light_isdigit(*p)) do {
254 /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/
255 /* (check one-element cache of normalized IPv4 address string) */
256 static struct { char s[INET_ADDRSTRLEN]; size_t n; } laddr;
257 size_t n = colon ? (size_t)(colon - p) : blen;
258 sock_addr addr;
259 if (n == laddr.n && 0 == memcmp(p, laddr.s, n)) break;
260 if (1 == sock_addr_inet_pton(&addr, p, AF_INET, 0)) {
261 sock_addr_inet_ntop_copy_buffer(b, &addr);
262 n = buffer_string_length(b);
263 if (n < sizeof(laddr.s)) memcpy(laddr.s, b->ptr, (laddr.n = n));
265 } while (0);
266 } else do { /* IPv6 addr */
267 #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON)
269 /* (check one-element cache of normalized IPv4 address string) */
270 static struct { char s[INET6_ADDRSTRLEN]; size_t n; } laddr;
271 sock_addr addr;
272 char *bracket = b->ptr+blen-1;
273 char *percent = strchr(b->ptr+1, '%');
274 size_t len;
275 int rc;
276 char buf[INET6_ADDRSTRLEN+16]; /*(+16 for potential %interface name)*/
277 if (blen <= 2) return -1; /*(invalid "[]")*/
278 if (*bracket != ']') {
279 bracket = (char *)memchr(b->ptr+1, ']', blen-1);
280 if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){
281 return -1;
283 if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/
284 char *e;
285 port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/
286 if (0 < port && port <= USHRT_MAX && *e == '\0') {
287 /* valid port */
288 } else {
289 return -1;
294 len = (size_t)((percent ? percent : bracket) - (b->ptr+1));
295 if (laddr.n == len && 0 == memcmp(laddr.s, b->ptr+1, len)) {
296 /* truncate after ']' and re-add normalized port, if needed */
297 buffer_string_set_length(b, (size_t)(bracket - b->ptr + 1));
298 break;
301 *bracket = '\0';/*(terminate IPv6 string)*/
302 if (percent) *percent = '\0'; /*(remove %interface from address)*/
303 rc = sock_addr_inet_pton(&addr, b->ptr+1, AF_INET6, 0);
304 if (percent) *percent = '%'; /*(restore %interface)*/
305 *bracket = ']'; /*(restore bracket)*/
306 if (1 != rc) return -1;
308 sock_addr_inet_ntop(&addr, buf, sizeof(buf));
309 len = strlen(buf);
310 if (percent) {
311 if (percent > bracket) return -1;
312 if (len + (size_t)(bracket - percent) >= sizeof(buf)) return -1;
313 if (len < sizeof(laddr.s)) memcpy(laddr.s, buf, (laddr.n = len));
314 memcpy(buf+len, percent, (size_t)(bracket - percent));
315 len += (size_t)(bracket - percent);
317 buffer_string_set_length(b, 1); /* truncate after '[' */
318 buffer_append_string_len(b, buf, len);
319 buffer_append_string_len(b, CONST_STR_LEN("]"));
321 #else
323 return -1;
325 #endif
326 } while (0);
328 if (0 != port && port != scheme_port) {
329 buffer_append_string_len(b, CONST_STR_LEN(":"));
330 buffer_append_int(b, (int)port);
333 return 0;
336 static int scheme_port (const buffer *scheme)
338 return buffer_is_equal_string(scheme, CONST_STR_LEN("https")) ? 443 : 80;
341 int http_request_host_policy (connection *con, buffer *b, const buffer *scheme) {
342 return (((con->conf.http_parseopts & HTTP_PARSEOPT_HOST_STRICT)
343 && 0 != request_check_hostname(b))
344 || ((con->conf.http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE)
345 && 0 != http_request_host_normalize(b, scheme_port(scheme))));
348 static int http_request_split_value(array *vals, const char *current, size_t len) {
349 int state = 0;
350 const char *token_start = NULL, *token_end = NULL;
352 * parse
354 * val1, val2, val3, val4
356 * into a array (more or less a explode() incl. stripping of whitespaces
359 for (size_t i = 0; i <= len; ++i, ++current) {
360 switch (state) {
361 case 0: /* find start of a token */
362 switch (*current) {
363 case ' ':
364 case '\t': /* skip white space */
365 case ',': /* skip empty token */
366 break;
367 case '\0': /* end of string */
368 return 0;
369 default:
370 /* found real data, switch to state 1 to find the end of the token */
371 token_start = token_end = current;
372 state = 1;
373 break;
375 break;
376 case 1: /* find end of token and last non white space character */
377 switch (*current) {
378 case ' ':
379 case '\t':
380 /* space - don't update token_end */
381 break;
382 case ',':
383 case '\0': /* end of string also marks the end of a token */
384 array_insert_value(vals, token_start, token_end-token_start+1);
385 state = 0;
386 break;
387 default:
388 /* no white space, update token_end to include current character */
389 token_end = current;
390 break;
392 break;
396 return 0;
399 static int request_uri_is_valid_char(unsigned char c) {
400 return (c > 32 && c != 127 && c != 255);
403 __attribute_cold__
404 __attribute_noinline__
405 static int http_request_header_line_invalid(server *srv, int status, const char *msg) {
406 if (srv->srvconf.log_request_header_on_error) {
407 if (msg) log_error_write(srv, __FILE__, __LINE__, "s", msg);
409 return status;
412 __attribute_cold__
413 __attribute_noinline__
414 static int http_request_header_char_invalid(server *srv, char ch, const char *msg) {
415 if (srv->srvconf.log_request_header_on_error) {
416 if ((unsigned char)ch > 32 && ch != 127) {
417 char buf[2] = { ch, '\0' };
418 log_error_write(srv,__FILE__,__LINE__,"sSSS",msg,"('",buf,"')");
420 else {
421 log_error_write(srv,__FILE__,__LINE__,"sSXS",msg,"(",ch,")");
424 return 400;
427 enum keep_alive_set {
428 HTTP_CONNECTION_UNSET,
429 HTTP_CONNECTION_KEEPALIVE,
430 HTTP_CONNECTION_CLOSE,
433 typedef struct {
434 enum keep_alive_set keep_alive_set;
435 char con_length_set;
436 char *reqline_host;
437 int reqline_hostlen;
438 size_t reqline_len;
439 } parse_header_state;
441 static void init_parse_header_state(parse_header_state* state) {
442 state->keep_alive_set = HTTP_CONNECTION_UNSET;
443 state->con_length_set = 0;
444 state->reqline_host = NULL;
445 state->reqline_hostlen = 0;
446 state->reqline_len = 0;
449 /* add header to list of headers
450 * certain headers are also parsed
451 * might drop a header if deemed unnecessary/broken
453 * returns 0 on success, HTTP status on error
455 static int parse_single_header(server *srv, connection *con, parse_header_state *state, char *k, size_t klen, char *v, size_t vlen) {
456 const enum http_header_e id = http_header_hkey_get(k, klen);
457 buffer **saveb = NULL;
459 /* strip leading whitespace */
460 for (; vlen > 0 && (v[0] == ' ' || v[0] == '\t'); ++v, --vlen) ;
462 /* strip trailing whitespace */
463 while (vlen > 0 && (v[vlen - 1] == ' ' || v[vlen - 1] == '\t')) --vlen;
465 /* empty header-fields are not allowed by HTTP-RFC, we just ignore them */
466 if (0 == vlen) return 0; /* ignore header */
469 * Note: k might not be '\0'-terminated
472 switch (id) {
473 /*case HTTP_HEADER_OTHER:*/
474 default:
475 break;
476 case HTTP_HEADER_HOST:
477 if (!(con->request.htags & HTTP_HEADER_HOST)) {
478 saveb = &con->request.http_host;
479 if (vlen >= 1024) { /*(expecting < 256)*/
480 return http_request_header_line_invalid(srv, 400, "uri-authority too long -> 400");
483 else if (state->reqline_host) {
484 /* ignore all Host: headers as we got Host in request line */
485 return 0; /* ignore header */
487 else {
488 return http_request_header_line_invalid(srv, 400, "duplicate Host header -> 400");
490 break;
491 case HTTP_HEADER_CONNECTION:
493 array * const vals = srv->split_vals;
494 array_reset_data_strings(vals);
495 http_request_split_value(vals, v, vlen); /* split on , */
496 for (size_t vi = 0; vi < vals->used; ++vi) {
497 data_string *dsv = (data_string *)vals->data[vi];
498 if (buffer_eq_icase_slen(dsv->value,
499 CONST_STR_LEN("keep-alive"))) {
500 state->keep_alive_set = HTTP_CONNECTION_KEEPALIVE;
501 break;
503 else if (buffer_eq_icase_slen(dsv->value,
504 CONST_STR_LEN("close"))) {
505 state->keep_alive_set = HTTP_CONNECTION_CLOSE;
506 break;
510 break;
511 case HTTP_HEADER_CONTENT_TYPE:
512 if (con->request.htags & HTTP_HEADER_CONTENT_TYPE) {
513 return http_request_header_line_invalid(srv, 400, "duplicate Content-Type header -> 400");
515 break;
516 case HTTP_HEADER_IF_NONE_MATCH:
517 /* if dup, only the first one will survive */
518 if (con->request.htags & HTTP_HEADER_IF_NONE_MATCH) {
519 return 0; /* ignore header */
521 break;
522 case HTTP_HEADER_CONTENT_LENGTH:
523 if (!(con->request.htags & HTTP_HEADER_CONTENT_LENGTH)) {
524 char *err;
525 off_t r = strtoll(v, &err, 10);
527 if (*err == '\0' && r >= 0) {
528 con->request.content_length = r;
530 else {
531 return http_request_header_line_invalid(srv, 400, "invalid Content-Length header -> 400");
534 else {
535 return http_request_header_line_invalid(srv, 400, "duplicate Content-Length header -> 400");
537 break;
538 case HTTP_HEADER_IF_MODIFIED_SINCE:
539 if (con->request.htags & HTTP_HEADER_IF_MODIFIED_SINCE) {
540 /* Proxies sometimes send dup headers
541 * if they are the same we ignore the second
542 * if not, we raise an error */
543 buffer *vb =
544 http_header_request_get(con, HTTP_HEADER_IF_MODIFIED_SINCE,
545 CONST_STR_LEN("If-Modified-Since"));
546 if (vb && buffer_is_equal_caseless_string(vb, v, vlen)) {
547 /* ignore it if they are the same */
548 return 0; /* ignore header */
550 else {
551 return http_request_header_line_invalid(srv, 400, "duplicate If-Modified-Since header -> 400");
554 break;
557 con->request.htags |= id;
558 http_header_request_append(con, id, k, klen, v, vlen);
560 if (saveb) {
561 *saveb = http_header_request_get(con, id, k, klen);
564 return 0;
567 static size_t http_request_parse_reqline(server *srv, connection *con, buffer *hdrs, parse_header_state *state) {
568 char * const ptr = hdrs->ptr;
569 char *uri = NULL, *proto = NULL;
571 size_t i;
572 const unsigned int http_header_strict = (con->conf.http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
574 /* hdrs must end with '\n' (already checked before parsing headers) */
575 #ifdef __COVERITY__
576 if (NULL == strchr(ptr, '\n')) return 400;
577 #endif
580 * Request: "^(GET|POST|HEAD) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$"
581 * Option : "^([-a-zA-Z]+): (.+)$"
582 * End : "^$"
585 /* parse the first line of the request
587 * should be:
589 * <method> <uri> <protocol>\r\n
590 * */
591 for (i = 0; ptr[i] != '\n'; ++i) {
592 if (ptr[i] == ' ') {
593 if (NULL == uri) uri = ptr + i + 1;
594 else if (NULL == proto) proto = ptr + i + 1;
595 else return http_request_header_line_invalid(srv, 400, "overlong request line; extra space -> 400"); /* ERROR, one space to much */
598 ptr[i] = '\0';
599 state->reqline_len = i+1;
602 char *nuri = NULL;
603 size_t j, jlen;
605 /* \r\n -> \0\0 */
606 if (0 == i) return 400;
607 if (ptr[i-1] == '\r') {
608 ptr[i-1] = '\0';
609 } else if (http_header_strict) { /* '\n' */
610 return http_request_header_line_invalid(srv, 400, "missing CR before LF in header -> 400");
613 if (NULL == proto) {
614 return http_request_header_line_invalid(srv, 400, "incomplete request line -> 400");
617 con->request.http_method = get_http_method_key(ptr, uri - 1 - ptr);
618 if (HTTP_METHOD_UNSET == con->request.http_method) {
619 return http_request_header_line_invalid(srv, 501, "unknown http-method -> 501");
623 * RFC7230:
624 * HTTP-version = HTTP-name "/" DIGIT "." DIGIT
625 * HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive
627 if (proto[0]=='H' && proto[1]=='T' && proto[2]=='T' && proto[3]=='P' && proto[4] == '/') {
628 if (proto[5] == '1' && proto[6] == '.' && (proto[7] == '1' || proto[7] == '0')) {
629 con->request.http_version = (proto[7] == '1') ? HTTP_VERSION_1_1 : HTTP_VERSION_1_0;
630 } else {
631 return http_request_header_line_invalid(srv, 505, "unknown HTTP version -> 505");
633 } else {
634 return http_request_header_line_invalid(srv, 400, "unknown protocol -> 400");
637 jlen = (size_t)(proto - uri - 1);
639 if (*uri == '/') {
640 /* (common case) */
641 buffer_copy_string_len(con->request.uri, uri, jlen);
642 } else if (jlen > 7 && buffer_eq_icase_ssn(uri, "http://", 7) &&
643 NULL != (nuri = memchr(uri + 7, '/', jlen-7))) {
644 state->reqline_host = uri + 7;
645 state->reqline_hostlen = nuri - state->reqline_host;
647 buffer_copy_string_len(con->request.uri, nuri, proto - nuri - 1);
648 } else if (jlen > 8 && buffer_eq_icase_ssn(uri, "https://", 8) &&
649 NULL != (nuri = memchr(uri + 8, '/', jlen-8))) {
650 state->reqline_host = uri + 8;
651 state->reqline_hostlen = nuri - state->reqline_host;
653 buffer_copy_string_len(con->request.uri, nuri, proto - nuri - 1);
654 } else if (!http_header_strict
655 || (HTTP_METHOD_CONNECT == con->request.http_method && (uri[0] == ':' || light_isdigit(uri[0])))
656 || (HTTP_METHOD_OPTIONS == con->request.http_method && uri[0] == '*' && 1 == jlen)) {
657 buffer_copy_string_len(con->request.uri, uri, jlen);
658 } else {
659 return http_request_header_line_invalid(srv, 400, "request-URI parse error -> 400");
662 /* check uri for invalid characters */
663 jlen = buffer_string_length(con->request.uri);
664 if (0 == jlen) return http_request_header_line_invalid(srv, 400, "no uri specified -> 400");
665 if ((con->conf.http_parseopts & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT)) {
666 j = jlen; /* URI will be checked in http_response_prepare() */
667 } else if (http_header_strict) {
668 for (j = 0; j < jlen && request_uri_is_valid_char(con->request.uri->ptr[j]); j++) ;
669 } else {
670 char *z = memchr(con->request.uri->ptr, '\0', jlen);
671 j = (NULL == z) ? jlen : (size_t)(z - con->request.uri->ptr);
673 if (j < jlen) {
674 return http_request_header_char_invalid(srv, con->request.uri->ptr[j], "invalid character in URI -> 400");
677 buffer_copy_buffer(con->request.orig_uri, con->request.uri);
680 if (state->reqline_host) {
681 /* Insert as host header */
682 if (state->reqline_hostlen >= 1024) { /*(expecting < 256)*/
683 return http_request_header_line_invalid(srv, 400, "uri-authority too long -> 400");
685 http_header_request_set(con, HTTP_HEADER_HOST, CONST_STR_LEN("Host"), state->reqline_host, state->reqline_hostlen);
686 con->request.http_host = http_header_request_get(con, HTTP_HEADER_HOST, CONST_STR_LEN("Host"));
689 return 0;
692 int http_request_parse(server *srv, connection *con, buffer *hdrs) {
693 char * const ptr = hdrs->ptr;
694 char *value = NULL;
695 size_t i, first, ilen;
696 const unsigned int http_header_strict = (con->conf.http_parseopts & HTTP_PARSEOPT_HEADER_STRICT);
697 int status;
699 parse_header_state state;
700 init_parse_header_state(&state);
702 status = http_request_parse_reqline(srv, con, hdrs, &state);
703 if (0 != status) return status;
705 i = first = state.reqline_len;
707 if (ptr[i] == ' ' || ptr[i] == '\t') {
708 return http_request_header_line_invalid(srv, 400, "WS at the start of first line -> 400");
711 ilen = buffer_string_length(hdrs);
712 for (int is_key = 1, key_len = 0; i < ilen; ++i) {
713 char *cur = ptr + i;
715 if (is_key) {
717 * 1*<any CHAR except CTLs or separators>
718 * CTLs == 0-31 + 127, CHAR = 7-bit ascii (0..127)
721 switch(*cur) {
722 case ' ':
723 case '\t':
724 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
725 * 3.2.4. Field Parsing
726 * [...]
727 * No whitespace is allowed between the header field-name and colon. In
728 * the past, differences in the handling of such whitespace have led to
729 * security vulnerabilities in request routing and response handling. A
730 * server MUST reject any received request message that contains
731 * whitespace between a header field-name and colon with a response code
732 * of 400 (Bad Request). A proxy MUST remove any such whitespace from a
733 * response message before forwarding the message downstream.
735 if (http_header_strict)
736 return http_request_header_line_invalid(srv, 400, "invalid whitespace between field-name and colon -> 400");
737 /* skip every thing up to the : */
738 do { ++cur; } while (*cur == ' ' || *cur == '\t');
739 if (*cur != ':') {
740 return http_request_header_line_invalid(srv, 400, "WS character in key -> 400");
742 /* fall through */
743 case ':':
744 is_key = 0;
745 key_len = i - first;
746 value = cur + 1;
747 i = cur - ptr;
748 break;
749 case '(':
750 case ')':
751 case '<':
752 case '>':
753 case '@':
754 case ',':
755 case ';':
756 case '\\':
757 case '\"':
758 case '/':
759 case '[':
760 case ']':
761 case '?':
762 case '=':
763 case '{':
764 case '}':
765 return http_request_header_char_invalid(srv, *cur, "invalid character in header key -> 400");
766 case '\r':
767 if (ptr[i+1] == '\n' && i == first) {
768 /* End of Header */
769 ++i;
770 } else {
771 return http_request_header_line_invalid(srv, 400, "CR without LF -> 400");
773 break;
774 case '\n':
775 if (http_header_strict) {
776 return http_request_header_line_invalid(srv, 400, "missing CR before LF in header -> 400");
777 } else if (i == first) {
778 /* End of Header */
779 break;
781 /* fall through */
782 default:
783 if (http_header_strict ? (*cur < 32 || ((unsigned char)*cur) >= 127) : *cur == '\0') {
784 return http_request_header_char_invalid(srv, *cur, "invalid character in header key -> 400");
786 /* ok */
787 break;
789 } else {
790 switch(*cur) {
791 case '\r':
792 if (cur[1] != '\n') {
793 return http_request_header_line_invalid(srv, 400, "CR without LF -> 400");
795 if (cur[2] == ' ' || cur[2] == '\t') { /* header line folding */
796 cur[0] = ' ';
797 cur[1] = ' ';
798 i += 2;
799 continue;
801 ++i;
802 /* fall through */
803 case '\n':
804 if (*cur == '\n') {
805 if (http_header_strict) {
806 return http_request_header_line_invalid(srv, 400, "missing CR before LF in header -> 400");
808 if (cur[1] == ' ' || cur[1] == '\t') { /* header line folding */
809 cur[0] = ' ';
810 i += 1;
811 continue;
815 /* End of Headerline */
816 *cur = '\0'; /*(for if value is further parsed and '\0' is expected at end of string)*/
818 status = parse_single_header(srv, con, &state, ptr + first, key_len, value, cur - value);
819 if (0 != status) return status;
821 first = i+1;
822 is_key = 1;
823 value = NULL;
824 break;
825 case ' ':
826 case '\t':
827 break;
828 default:
829 if (http_header_strict ? (*cur >= 0 && *cur < 32) : *cur == '\0') {
830 return http_request_header_char_invalid(srv, *cur, "invalid character in header -> 400");
832 break;
837 /* do some post-processing */
839 if (con->request.http_version == HTTP_VERSION_1_1) {
840 if (state.keep_alive_set != HTTP_CONNECTION_CLOSE) {
841 /* no Connection-Header sent */
843 /* HTTP/1.1 -> keep-alive default TRUE */
844 con->keep_alive = 1;
845 } else {
846 con->keep_alive = 0;
849 /* RFC 2616, 14.23 */
850 if (con->request.http_host == NULL ||
851 buffer_string_is_empty(con->request.http_host)) {
852 return http_request_header_line_invalid(srv, 400, "HTTP/1.1 but Host missing -> 400");
854 } else {
855 if (state.keep_alive_set == HTTP_CONNECTION_KEEPALIVE) {
856 /* no Connection-Header sent */
858 /* HTTP/1.0 -> keep-alive default FALSE */
859 con->keep_alive = 1;
860 } else {
861 con->keep_alive = 0;
865 /* check hostname field if it is set */
866 if (!buffer_is_empty(con->request.http_host) &&
867 0 != http_request_host_policy(con, con->request.http_host, con->proto)) {
868 return http_request_header_line_invalid(srv, 400, "Invalid Hostname -> 400");
871 if (con->request.htags & HTTP_HEADER_TRANSFER_ENCODING) {
872 buffer *vb = http_header_request_get(con, HTTP_HEADER_TRANSFER_ENCODING, CONST_STR_LEN("Transfer-Encoding"));
873 if (NULL != vb) {
874 if (con->request.http_version == HTTP_VERSION_1_0) {
875 return http_request_header_line_invalid(srv, 400, "HTTP/1.0 with Transfer-Encoding (bad HTTP/1.0 proxy?) -> 400");
878 if (!buffer_eq_icase_slen(vb, CONST_STR_LEN("chunked"))) {
879 /* Transfer-Encoding might contain additional encodings,
880 * which are not currently supported by lighttpd */
881 return http_request_header_line_invalid(srv, 501, NULL); /* Not Implemented */
884 /* reset value for Transfer-Encoding, a hop-by-hop header,
885 * which must not be blindly forwarded to backends */
886 http_header_request_unset(con, HTTP_HEADER_TRANSFER_ENCODING, CONST_STR_LEN("Transfer-Encoding"));
888 if (con->request.htags & HTTP_HEADER_CONTENT_LENGTH) {
889 /* RFC7230 Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing
890 * 3.3.3. Message Body Length
891 * [...]
892 * If a message is received with both a Transfer-Encoding and a
893 * Content-Length header field, the Transfer-Encoding overrides the
894 * Content-Length. Such a message might indicate an attempt to
895 * perform request smuggling (Section 9.5) or response splitting
896 * (Section 9.4) and ought to be handled as an error. A sender MUST
897 * remove the received Content-Length field prior to forwarding such
898 * a message downstream.
900 if (http_header_strict) {
901 return http_request_header_line_invalid(srv, 400, "invalid Transfer-Encoding + Content-Length -> 400");
903 else {
904 /* ignore Content-Length */
905 http_header_request_unset(con, HTTP_HEADER_CONTENT_LENGTH, CONST_STR_LEN("Content-Length"));
909 state.con_length_set = 1;
910 con->request.content_length = -1;
913 else if (con->request.htags & HTTP_HEADER_CONTENT_LENGTH) {
914 state.con_length_set = 1;
917 switch(con->request.http_method) {
918 case HTTP_METHOD_GET:
919 case HTTP_METHOD_HEAD:
920 /* content-length is forbidden for those */
921 if (state.con_length_set && 0 != con->request.content_length
922 && !(con->conf.http_parseopts & HTTP_PARSEOPT_METHOD_GET_BODY)) {
923 return http_request_header_line_invalid(srv, 400, "GET/HEAD with content-length -> 400");
925 break;
926 case HTTP_METHOD_POST:
927 /* content-length is required for them */
928 if (!state.con_length_set) {
929 return http_request_header_line_invalid(srv, 411, "POST-request, but content-length missing -> 411");
931 break;
932 default:
933 break;
936 return 0;