From b47494d4cda6dc1a152f9033e4617897842c2f50 Mon Sep 17 00:00:00 2001 From: Glenn Strauss Date: Wed, 18 May 2016 05:42:42 -0400 Subject: [PATCH] [config] opts for http header parsing strictness (fixes #551, fixes #1086, fixes #1184, fixes #2143, #2258, #2281, fixes #946, fixes #1330, fixes #602, #1016) server.http-parseopt-header-strict = "enable" server.http-parseopt-host-strict = "enable" (implies host-normalize) server.http-parseopt-host-normalize = "disable" defaults retain current behavior, which is strict header parsing and strict host parsing, with enhancement to normalize IPv4 address and port number strings. For lighttpd tests, these need to be enabled (and are by default) For marginally faster HTTP header parsing for benchmarks, disable these. To allow - underscores in hostname - hypen ('-') at beginning of hostname - all-numeric TLDs server.http-parseopt-host-strict = "disable" x-ref: "lighttpd doesn't allow underscores in host names" https://redmine.lighttpd.net/issues/551 "hyphen in hostname" https://redmine.lighttpd.net/issues/1086 "a numeric tld" https://redmine.lighttpd.net/issues/1184 "Numeric tld's" https://redmine.lighttpd.net/issues/2143 "Bad Request" https://redmine.lighttpd.net/issues/2258 "400 Bad Request when using Numeric TLDs" https://redmine.lighttpd.net/issues/2281 To allow a variety of numerical formats to be converted to IP addresses server.http-parseopt-host-strict = "disable" server.http-parseopt-host-normalize = "enable" x-ref: "URL encoding leads to "400 - Bad Request"" https://redmine.lighttpd.net/issues/946 "400 Bad Request when using IP's numeric value ("ip2long()")" https://redmine.lighttpd.net/issues/1330 To allow most 8-bit and 7-bit chars in headers server.http-parseopt-header-strict = "disable" (not recommended) x-ref: "Russian letters not alowed?" https://redmine.lighttpd.net/issues/602 "header Content-Disposition with russian '?' (CP1251, ascii code 255) causes error" https://redmine.lighttpd.net/issues/1016 --- src/base.h | 5 ++ src/configfile.c | 18 +++++++ src/request.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++------ src/request.h | 7 +++ src/server.c | 3 ++ 5 files changed, 169 insertions(+), 16 deletions(-) diff --git a/src/base.h b/src/base.h index f3e5c7d3..871e68a6 100644 --- a/src/base.h +++ b/src/base.h @@ -300,6 +300,7 @@ typedef struct { unsigned short etag_use_mtime; unsigned short etag_use_size; unsigned short force_lowercase_filenames; /* if the FS is case-insensitive, force all files to lower-case */ + unsigned int http_parseopts; unsigned int max_request_size; int listen_backlog; @@ -550,6 +551,10 @@ typedef struct { unsigned short enable_cores; unsigned short reject_expect_100_with_417; buffer *xattr_name; + + unsigned short http_header_strict; + unsigned short http_host_strict; + unsigned short http_host_normalize; } server_config; typedef struct server_socket { diff --git a/src/configfile.c b/src/configfile.c index 7198b964..11f1cc7f 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -8,6 +8,7 @@ #include "configparser.h" #include "configfile.h" #include "proc_open.h" +#include "request.h" #include @@ -114,6 +115,9 @@ static int config_insert(server *srv) { { "mimetype.xattr-name", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_SERVER }, /* 69 */ { "server.listen-backlog", NULL, T_CONFIG_INT, T_CONFIG_SCOPE_CONNECTION }, /* 70 */ { "server.error-handler-404", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION }, /* 71 */ + { "server.http-parseopt-header-strict",NULL, T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_SERVER }, /* 72 */ + { "server.http-parseopt-host-strict", NULL, T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_SERVER }, /* 73 */ + { "server.http-parseopt-host-normalize",NULL,T_CONFIG_BOOLEAN, T_CONFIG_SCOPE_SERVER }, /* 74 */ { "server.host", "use server.bind instead", @@ -178,6 +182,9 @@ static int config_insert(server *srv) { cv[68].destination = &(srv->srvconf.upload_temp_file_size); cv[69].destination = srv->srvconf.xattr_name; + cv[72].destination = &(srv->srvconf.http_header_strict); + cv[73].destination = &(srv->srvconf.http_host_strict); + cv[74].destination = &(srv->srvconf.http_host_normalize); srv->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *)); @@ -300,6 +307,15 @@ static int config_insert(server *srv) { } } + { + specific_config *s = srv->config_storage[0]; + s->http_parseopts= /*(global, but stored in con->conf.http_parseopts)*/ + (srv->srvconf.http_header_strict ?(HTTP_PARSEOPT_HEADER_STRICT) :0) + |(srv->srvconf.http_host_strict ?(HTTP_PARSEOPT_HOST_STRICT + |HTTP_PARSEOPT_HOST_NORMALIZE):0) + |(srv->srvconf.http_host_normalize ?(HTTP_PARSEOPT_HOST_NORMALIZE):0); + } + if (buffer_string_is_empty(stat_cache_string)) { srv->srvconf.stat_cache_engine = STAT_CACHE_ENGINE_SIMPLE; } else if (buffer_is_equal_string(stat_cache_string, CONST_STR_LEN("simple"))) { @@ -391,6 +407,8 @@ static int config_insert(server *srv) { int config_setup_connection(server *srv, connection *con) { specific_config *s = srv->config_storage[0]; + PATCH(http_parseopts); + PATCH(allow_http11); PATCH(mimetypes); PATCH(document_root); diff --git a/src/request.c b/src/request.c index 08047b55..77b90fea 100644 --- a/src/request.c +++ b/src/request.c @@ -12,7 +12,7 @@ #include #include -static int request_check_hostname(server *srv, connection *con, buffer *host) { +static int request_check_hostname(buffer *host) { enum { DOMAINLABEL, TOPLABEL } stage = TOPLABEL; size_t i; int label_len = 0; @@ -21,9 +21,6 @@ static int request_check_hostname(server *srv, connection *con, buffer *host) { int is_ip = -1; /* -1 don't know yet, 0 no, 1 yes */ int level = 0; - UNUSED(srv); - UNUSED(con); - /* * hostport = host [ ":" port ] * host = hostname | IPv4address | IPv6address @@ -35,9 +32,6 @@ static int request_check_hostname(server *srv, connection *con, buffer *host) { * port = *digit */ - /* no Host: */ - if (buffer_is_empty(host)) return 0; - host_len = buffer_string_length(host); /* IPv6 adress */ @@ -209,6 +203,124 @@ static int request_check_hostname(server *srv, connection *con, buffer *host) { return 0; } +int http_request_host_normalize(buffer *b) { + /* + * check for and canonicalize numeric IP address and portnum (optional) + * (IP address may be followed by ":portnum" (optional)) + * - IPv6: "[...]" + * - IPv4: "x.x.x.x" + * - IPv4: 12345678 (32-bit decimal number) + * - IPv4: 012345678 (32-bit octal number) + * - IPv4: 0x12345678 (32-bit hex number) + * + * allow any chars (except ':' and '\0' and stray '[' or ']') + * (other code may check chars more strictly or more pedantically) + * ':' delimits (optional) port at end of string + * "[]" wraps IPv6 address literal + * '\0' should have been rejected earlier were it present + * + * any chars includes, but is not limited to: + * - allow '-' any where, even at beginning of word + * (security caution: might be confused for cmd flag if passed to shell) + * - allow all-digit TLDs + * (might be mistaken for IPv4 addr by inet_aton() + * unless non-digits appear in subdomain) + */ + + /* Note: not using getaddrinfo() since it does not support "[]" around IPv6 + * and is not as lenient as inet_aton() and inet_addr() for IPv4 strings. + * Not using inet_pton() (when available) on IPv4 for similar reasons. */ + + const char * const p = b->ptr; + const size_t blen = buffer_string_length(b); + long port = 0; + + if (*p != '[') { + char * const colon = (char *)memchr(p, ':', blen); + if (colon) { + if (*p == ':') return -1; /*(empty host then port, or naked IPv6)*/ + if (colon[1] != '\0') { + char *e; + port = strtol(colon+1, &e, 0); /*(allow decimal, octal, hex)*/ + if (0 < port && port <= USHRT_MAX && *e == '\0') { + /* valid port */ + } else { + return -1; + } + } /*(else ignore stray colon at string end)*/ + buffer_commit(b, (size_t)(colon - p)); /*(remove port str)*/ + } + + if (light_isdigit(*p)) { + /* (IPv4 address literal or domain starting w/ digit (e.g. 3com))*/ + struct in_addr addr; + #if defined(HAVE_INET_ATON) /*(Windows does not provide inet_aton())*/ + if (0 != inet_aton(p, &addr)) + #else + if ((addr.s_addr = inet_addr(p)) != INADDR_NONE) + #endif + { + #if defined(HAVE_INET_PTON)/*(expect inet_ntop() if inet_pton())*/ + #ifndef INET_ADDRSTRLEN + #define INET_ADDRSTRLEN 16 + #endif + char buf[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, (const void *)&addr, buf, sizeof(buf)); + buffer_copy_string(b, buf); + #else + buffer_copy_string(b, inet_ntoa(addr)); /*(not thread-safe)*/ + #endif + } + } + } else { /* IPv6 addr */ + #if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) + + struct in6_addr addr; + char *bracket = b->ptr+blen-1; + int rc; + char buf[INET6_ADDRSTRLEN]; + if (blen == 2) return -1; /*(invalid "[]")*/ + if (*bracket != ']') { + bracket = (char *)memchr(b->ptr+1, ']', blen-1); + if (NULL == bracket || bracket[1] != ':' || bracket - b->ptr == 1){ + return -1; + } + if (bracket[2] != '\0') { /*(ignore stray colon at string end)*/ + char *e; + port = strtol(bracket+2, &e, 0); /*(allow decimal, octal, hex)*/ + if (0 < port && port <= USHRT_MAX && *e == '\0') { + /* valid port */ + } else { + return -1; + } + } + } + + *bracket = '\0';/*(terminate IPv6 string)*/ + rc = inet_pton(AF_INET6, b->ptr+1, &addr); + *bracket = ']'; /*(restore bracket)*/ + if (1 != rc) return -1; + + inet_ntop(AF_INET6,(const void *)&addr, buf, sizeof(buf)); + buffer_commit(b, 1); /* truncate after '[' */ + buffer_append_string(b, buf); + buffer_append_string_len(b, CONST_STR_LEN("]")); + + #else + + return -1; + + #endif + } + + if (port) { + buffer_append_string_len(b, CONST_STR_LEN(":")); + buffer_append_int(b, (int)port); + } + + return 0; +} + #if 0 #define DUMP_HEADER #endif @@ -302,6 +414,7 @@ int http_request_parse(server *srv, connection *con) { size_t i, first, ilen; int done = 0; + const unsigned int http_header_strict = (con->conf.http_parseopts & HTTP_PARSEOPT_HEADER_STRICT); /* * Request: "^(GET|POST|HEAD) ([^ ]+(\\?[^ ]+|)) (HTTP/1\\.[01])$" @@ -478,13 +591,18 @@ int http_request_parse(server *srv, connection *con) { /* check uri for invalid characters */ jlen = buffer_string_length(con->request.uri); - for (j = 0; j < jlen; j++) { - if (!request_uri_is_valid_char(con->request.uri->ptr[j])) { - unsigned char buf[2]; + if (http_header_strict) { + for (j = 0; j < jlen && request_uri_is_valid_char(con->request.uri->ptr[j]); j++) ; + } else { + char *z = memchr(con->request.uri->ptr, '\0', jlen); + j = (NULL == z) ? jlen : (size_t)(z - con->request.uri->ptr); + } + if (j < jlen) { con->http_status = 400; con->keep_alive = 0; if (srv->srvconf.log_request_header_on_error) { + unsigned char buf[2]; buf[0] = con->request.uri->ptr[j]; buf[1] = '\0'; @@ -507,7 +625,6 @@ int http_request_parse(server *srv, connection *con) { } return 0; - } } buffer_copy_buffer(con->request.orig_uri, con->request.uri); @@ -705,7 +822,7 @@ int http_request_parse(server *srv, connection *con) { } break; default: - if (*cur < 32 || ((unsigned char)*cur) >= 127) { + if (http_header_strict ? (*cur < 32 || ((unsigned char)*cur) >= 127) : *cur == '\0') { con->http_status = 400; con->keep_alive = 0; con->response.keep_alive = 0; @@ -1028,9 +1145,9 @@ int http_request_parse(server *srv, connection *con) { case '\t': /* strip leading WS */ if (value == cur) value = cur+1; - /* fallthrough */ + break; default: - if (*cur >= 0 && *cur < 32 && *cur != '\t') { + if (http_header_strict ? (*cur >= 0 && *cur < 32) : *cur == '\0') { if (srv->srvconf.log_request_header_on_error) { log_error_write(srv, __FILE__, __LINE__, "sds", "invalid char in header", (int)*cur, "-> 400"); @@ -1087,8 +1204,11 @@ int http_request_parse(server *srv, connection *con) { } /* check hostname field if it is set */ - if (NULL != con->request.http_host && - 0 != request_check_hostname(srv, con, con->request.http_host)) { + if (!buffer_is_empty(con->request.http_host) && + (((con->conf.http_parseopts & HTTP_PARSEOPT_HOST_STRICT) && + 0 != request_check_hostname(con->request.http_host)) + || ((con->conf.http_parseopts & HTTP_PARSEOPT_HOST_NORMALIZE) && + 0 != http_request_host_normalize(con->request.http_host)))) { if (srv->srvconf.log_request_header_on_error) { log_error_write(srv, __FILE__, __LINE__, "s", diff --git a/src/request.h b/src/request.h index bb05bcd7..a3aeccdf 100644 --- a/src/request.h +++ b/src/request.h @@ -4,7 +4,14 @@ #include "server.h" +typedef enum { + HTTP_PARSEOPT_HEADER_STRICT = 1 + ,HTTP_PARSEOPT_HOST_STRICT = 2 + ,HTTP_PARSEOPT_HOST_NORMALIZE = 4 +} http_parseopts_e; + int http_request_parse(server *srv, connection *con); int http_request_header_finished(server *srv, connection *con); +int http_request_host_normalize(buffer *b); #endif diff --git a/src/server.c b/src/server.c index fe88799f..b3f36a75 100644 --- a/src/server.c +++ b/src/server.c @@ -277,6 +277,9 @@ static server *server_init(void) { srv->srvconf.upload_tempdirs = array_init(); srv->srvconf.reject_expect_100_with_417 = 1; srv->srvconf.xattr_name = buffer_init_string("Content-Type"); + srv->srvconf.http_header_strict = 1; + srv->srvconf.http_host_strict = 1; /*(implies http_host_normalize)*/ + srv->srvconf.http_host_normalize = 0; /* use syslog */ srv->errorlog_fd = STDERR_FILENO; -- 2.11.4.GIT