decompress_data: Do not leak memory in mem_realloc.
[elinks.git] / src / protocol / http / http.c
blob588b01b60ca1df1a01891f9905f5eb41bd503deb
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "encoding/encoding.h"
27 #include "intl/charsets.h"
28 #include "intl/gettext/libintl.h"
29 #include "main/module.h"
30 #include "network/connection.h"
31 #include "network/progress.h"
32 #include "network/socket.h"
33 #include "osdep/ascii.h"
34 #include "osdep/osdep.h"
35 #include "osdep/sysname.h"
36 #include "protocol/auth/auth.h"
37 #include "protocol/auth/digest.h"
38 #include "protocol/date.h"
39 #include "protocol/header.h"
40 #include "protocol/http/blacklist.h"
41 #include "protocol/http/codes.h"
42 #include "protocol/http/http.h"
43 #include "protocol/uri.h"
44 #include "session/session.h"
45 #include "terminal/terminal.h"
46 #include "util/base64.h"
47 #include "util/conv.h"
48 #include "util/memory.h"
49 #include "util/string.h"
51 #ifdef CONFIG_GSSAPI
52 #include "http_negotiate.h"
53 #endif
55 struct http_version {
56 int major;
57 int minor;
60 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
61 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
62 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
63 #define PRE_HTTP_1_0(x) ((x).major < 1)
64 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
65 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
66 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
69 struct http_connection_info {
70 enum blacklist_flags bl_flags;
71 struct http_version recv_version;
72 struct http_version sent_version;
74 int close;
76 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
77 #define LEN_FINISHED 0
78 int length;
80 /* Either bytes coming in this chunk yet or "parser state". */
81 #define CHUNK_DATA_END -3
82 #define CHUNK_ZERO_SIZE -2
83 #define CHUNK_SIZE -1
84 int chunk_remaining;
86 int code;
90 static struct auth_entry proxy_auth;
92 static unsigned char *accept_charset = NULL;
95 static struct option_info http_options[] = {
96 INIT_OPT_TREE("protocol", N_("HTTP"),
97 "http", 0,
98 N_("HTTP-specific options.")),
101 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
102 "bugs", 0,
103 N_("Server-side HTTP bugs workarounds.")),
105 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
106 "accept_charset", 0, 1,
107 N_("The Accept-Charset header is quite long and sending it can trigger\n"
108 "bugs in some rarely found servers.")),
110 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
111 "allow_blacklist", 0, 1,
112 N_("Allow blacklisting of buggy servers.")),
114 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
115 "broken_302_redirect", 0, 1,
116 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
117 "This is a problem for a lot of web discussion boards and the like.\n"
118 "If they will do strange things to you, try to play with this.")),
120 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
121 "post_no_keepalive", 0, 0,
122 N_("Disable keepalive connection after POST request.")),
124 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
125 "http10", 0, 0,
126 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
128 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
129 "proxy", 0,
130 N_("HTTP proxy configuration.")),
132 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
133 "host", 0, "",
134 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
135 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
137 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
138 "user", 0, "",
139 N_("Proxy authentication username.")),
141 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
142 "passwd", 0, "",
143 N_("Proxy authentication password.")),
146 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
147 "referer", 0,
148 N_("HTTP referer sending options. HTTP referer is a special header\n"
149 "sent in the HTTP requests, which is supposed to contain the previous\n"
150 "page visited by the browser. This way, the server can know what link\n"
151 "did you follow when accessing that page. However, this behaviour\n"
152 "can unfortunately considerably affect privacy and can lead even to a\n"
153 "security problem on some badly designed web pages.")),
155 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
156 "policy", 0,
157 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
158 N_("Mode of sending HTTP referer:\n"
159 "0 is send no referer\n"
160 "1 is send current URL as referer\n"
161 "2 is send fixed fake referer\n"
162 "3 is send previous URL as referer (correct, but insecure)")),
164 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
165 "fake", 0, "",
166 N_("Fake referer to be sent when policy is 2.")),
169 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
170 "accept_language", 0, "",
171 N_("Send Accept-Language header.")),
173 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
174 "accept_ui_language", 0, 1,
175 N_("Request localised versions of documents from web-servers (using the\n"
176 "Accept-Language header) using the language you have configured for\n"
177 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
178 "value available to scripts). Note that some see this as a potential\n"
179 "security risk because it tells web-masters and the FBI sniffers about\n"
180 "your language preference.")),
182 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
183 "trace", 0, 0,
184 N_("If active, all HTTP requests are sent with TRACE as their method\n"
185 "rather than GET or POST. This is useful for debugging of both ELinks\n"
186 "and various server-side scripts --- the server only returns the client's\n"
187 "request back to the client verbatim. Note that this type of request may\n"
188 "not be enabled on all servers.")),
190 /* OSNews.com is supposed to be relying on the textmode token, at least. */
191 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
192 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
193 N_("Change the User Agent ID. That means identification string, which\n"
194 "is sent to HTTP server when a document is requested. The 'textmode'\n"
195 "token in the first field is our silent attempt to establish this as\n"
196 "a standard for new textmode user agents, so that the webmasters can\n"
197 "have just a single uniform test for these if they are e.g. pushing\n"
198 "some lite version to them automagically.\n"
199 "%v in the string means ELinks version\n"
200 "%s in the string means system identification\n"
201 "%t in the string means size of the terminal\n"
202 "%b in the string means number of bars displayed by ELinks\n"
203 "Use \" \" if you don't want any User-Agent header to be sent at all.")),
206 INIT_OPT_TREE("protocol", N_("HTTPS"),
207 "https", 0,
208 N_("HTTPS-specific options.")),
210 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
211 "proxy", 0,
212 N_("HTTPS proxy configuration.")),
214 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
215 "host", 0, "",
216 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
217 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
218 NULL_OPTION_INFO,
221 static void done_http();
223 struct module http_protocol_module = struct_module(
224 /* name: */ N_("HTTP"),
225 /* options: */ http_options,
226 /* hooks: */ NULL,
227 /* submodules: */ NULL,
228 /* data: */ NULL,
229 /* init: */ NULL,
230 /* done: */ done_http
234 static void
235 done_http(void)
237 mem_free_if(proxy_auth.realm);
238 mem_free_if(proxy_auth.nonce);
239 mem_free_if(proxy_auth.opaque);
241 free_blacklist();
243 if (accept_charset)
244 mem_free(accept_charset);
247 static void
248 init_accept_charset(void)
250 struct string ac;
251 unsigned char *cs;
252 int i;
254 if (!init_string(&ac)) return;
256 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
257 if (ac.length) {
258 add_to_string(&ac, ", ");
259 } else {
260 add_to_string(&ac, "Accept-Charset: ");
262 add_to_string(&ac, cs);
265 if (ac.length) {
266 add_crlf_to_string(&ac);
269 accept_charset = squeezastring(&ac);
271 done_string(&ac);
275 unsigned char *
276 subst_user_agent(unsigned char *fmt, unsigned char *version,
277 unsigned char *sysname, unsigned char *termsize)
279 struct string agent;
281 if (!init_string(&agent)) return NULL;
283 while (*fmt) {
284 int p;
286 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
288 add_bytes_to_string(&agent, fmt, p);
289 fmt += p;
291 if (*fmt != '%') continue;
293 fmt++;
294 switch (*fmt) {
295 case 'b':
296 if (!list_empty(sessions)) {
297 unsigned char bs[4] = "";
298 int blen = 0;
299 struct session *ses = sessions.prev;
300 int bars = ses->status.show_status_bar
301 + ses->status.show_tabs_bar
302 + ses->status.show_title_bar;
304 ulongcat(bs, &blen, bars, 2, 0);
305 add_to_string(&agent, bs);
307 break;
308 case 'v':
309 add_to_string(&agent, version);
310 break;
311 case 's':
312 add_to_string(&agent, sysname);
313 break;
314 case 't':
315 if (termsize)
316 add_to_string(&agent, termsize);
317 break;
318 default:
319 add_bytes_to_string(&agent, fmt - 1, 2);
320 break;
322 if (*fmt) fmt++;
325 return agent.source;
328 static void
329 add_url_to_http_string(struct string *header, struct uri *uri, int components)
331 /* This block substitues spaces in URL by %20s. This is
332 * certainly not the right place where to do it, but now the
333 * behaviour is at least improved compared to what we had
334 * before. We should probably encode all URLs as early as
335 * possible, and possibly decode them back in protocol
336 * backends. --pasky */
337 unsigned char *string = get_uri_string(uri, components);
338 unsigned char *data = string;
340 if (!string) return;
342 while (*data) {
343 int len = strcspn(data, " \t\r\n\\");
345 add_bytes_to_string(header, data, len);
347 if (!data[len]) break;
349 if (data[len++] == '\\')
350 add_char_to_string(header, '/');
351 else
352 add_to_string(header, "%20");
354 data += len;
357 mem_free(string);
360 /* Parse from @end - 1 to @start and set *@value to integer found.
361 * It returns -1 if not a number, 0 otherwise.
362 * @end should be > @start. */
363 static int
364 revstr2num(unsigned char *start, unsigned char *end, int *value)
366 int q = 1, val = 0;
368 do {
369 --end;
370 if (!isdigit(*end)) return -1; /* NaN */
371 val += (*end - '0') * q;
372 q *= 10;
373 } while (end > start);
375 *value = val;
376 return 0;
379 /* This function extracts code, major and minor version from string
380 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
381 * It returns a negative value on error, 0 on success.
383 static int
384 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
386 unsigned char *head = rb->data;
387 unsigned char *start;
389 *code = 0;
390 version->major = 0;
391 version->minor = 0;
393 /* Ignore spaces. */
394 while (*head == ' ') head++;
396 /* HTTP/ */
397 if (toupper(*head) != 'H' || toupper(*++head) != 'T' ||
398 toupper(*++head) != 'T' || toupper(*++head) != 'P'
399 || *++head != '/')
400 return -1;
402 /* Version */
403 start = ++head;
404 /* Find next '.' */
405 while (*head && *head != '.') head++;
406 /* Sanity check. */
407 if (!*head || !(head - start)
408 || (head - start) > 4
409 || !isdigit(*(head + 1)))
410 return -2;
412 /* Extract major version number. */
413 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
415 start = head + 1;
417 /* Find next ' '. */
418 while (*head && *head != ' ') head++;
419 /* Sanity check. */
420 if (!*head || !(head - start) || (head - start) > 4) return -4;
422 /* Extract minor version number. */
423 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
425 /* Ignore spaces. */
426 while (*head == ' ') head++;
428 /* Sanity check for code. */
429 if (head[0] < '1' || head[0] > '9' ||
430 !isdigit(head[1]) ||
431 !isdigit(head[2]))
432 return -6; /* Invalid code. */
434 /* Extract code. */
435 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
437 return 0;
440 static int
441 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
442 unsigned char *head)
444 unsigned char *server;
445 const unsigned char *const *s;
446 static const unsigned char *const buggy_servers[] = {
447 "mod_czech/3.1.0",
448 "Purveyor",
449 "Netscape-Enterprise",
450 NULL
453 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
454 || HTTP_1_0(http->sent_version))
455 return 0;
457 server = parse_header(head, "Server", NULL);
458 if (!server)
459 return 0;
461 for (s = buggy_servers; *s; s++) {
462 if (strstr(server, *s)) {
463 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
464 break;
468 mem_free(server);
469 return (*s != NULL);
472 static void
473 http_end_request(struct connection *conn, enum connection_state state,
474 int notrunc)
476 shutdown_connection_stream(conn);
478 if (conn->info && !((struct http_connection_info *) conn->info)->close
479 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
480 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
481 || !conn->uri->post)) {
482 if (state == S_OK && conn->cached)
483 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
484 set_connection_state(conn, state);
485 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
486 } else {
487 abort_connection(conn, state);
491 static void http_send_header(struct socket *);
493 void
494 http_protocol_handler(struct connection *conn)
496 /* setcstate(conn, S_CONN); */
498 if (!has_keepalive_connection(conn)) {
499 make_connection(conn->socket, conn->uri, http_send_header,
500 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
501 } else {
502 http_send_header(conn->socket);
506 void
507 proxy_protocol_handler(struct connection *conn)
509 http_protocol_handler(conn);
512 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
514 #define connection_is_https_proxy(conn) \
515 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
517 struct http_connection_info *
518 init_http_connection_info(struct connection *conn, int major, int minor, int close)
520 struct http_connection_info *http;
522 http = mem_calloc(1, sizeof(*http));
523 if (!http) {
524 http_end_request(conn, S_OUT_OF_MEM, 0);
525 return NULL;
528 http->sent_version.major = major;
529 http->sent_version.minor = minor;
530 http->close = close;
532 /* The CGI code uses this too and blacklisting expects a host name. */
533 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
534 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
536 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
537 || get_opt_bool("protocol.http.bugs.http10")) {
538 http->sent_version.major = 1;
539 http->sent_version.minor = 0;
542 /* If called from HTTPS proxy connection the connection info might have
543 * already been allocated. */
544 mem_free_set(&conn->info, http);
546 return http;
549 static void
550 http_send_header(struct socket *socket)
552 struct connection *conn = socket->conn;
553 struct http_connection_info *http;
554 int trace = get_opt_bool("protocol.http.trace");
555 struct string header;
556 unsigned char *post_data = NULL;
557 struct auth_entry *entry = NULL;
558 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
559 unsigned char *optstr;
560 int use_connect, talking_to_proxy;
562 /* Sanity check for a host */
563 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
564 http_end_request(conn, S_BAD_URL, 0);
565 return;
568 http = init_http_connection_info(conn, 1, 1, 0);
569 if (!http) return;
571 if (!init_string(&header)) {
572 http_end_request(conn, S_OUT_OF_MEM, 0);
573 return;
576 if (!conn->cached) conn->cached = find_in_cache(uri);
578 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
579 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
581 if (trace) {
582 add_to_string(&header, "TRACE ");
583 } else if (use_connect) {
584 add_to_string(&header, "CONNECT ");
585 /* In CONNECT requests, we send only a subset of the
586 * headers to the proxy. See the "CONNECT:" comments
587 * below. After the CONNECT request succeeds, we
588 * negotiate TLS with the real server and make a new
589 * HTTP request that includes all the headers. */
590 } else if (uri->post) {
591 add_to_string(&header, "POST ");
592 conn->unrestartable = 1;
593 } else {
594 add_to_string(&header, "GET ");
597 if (!talking_to_proxy) {
598 add_char_to_string(&header, '/');
601 if (use_connect) {
602 /* Add port if it was specified or the default port */
603 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
604 } else {
605 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
606 add_url_to_http_string(&header, uri, URI_DATA);
608 } else if (talking_to_proxy) {
609 add_url_to_http_string(&header, uri, URI_PROXY);
611 } else {
612 add_url_to_http_string(&header, conn->uri, URI_DATA);
616 add_to_string(&header, " HTTP/");
617 add_long_to_string(&header, http->sent_version.major);
618 add_char_to_string(&header, '.');
619 add_long_to_string(&header, http->sent_version.minor);
620 add_crlf_to_string(&header);
622 /* CONNECT: Sending a Host header seems pointless as the same
623 * information is already in the CONNECT line. It's harmless
624 * though and Mozilla does it too. */
625 add_to_string(&header, "Host: ");
626 add_uri_to_string(&header, uri, URI_HTTP_HOST);
627 add_crlf_to_string(&header);
629 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
630 if (talking_to_proxy) {
631 unsigned char *user = get_opt_str("protocol.http.proxy.user");
632 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
634 if (proxy_auth.digest) {
635 unsigned char *response;
636 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
637 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
639 if (userlen)
640 memcpy(proxy_auth.user, user, userlen);
641 proxy_auth.user[userlen] = '\0';
642 if (passwordlen)
643 memcpy(proxy_auth.password, passwd, passwordlen);
644 proxy_auth.password[passwordlen] = '\0';
646 /* FIXME: @uri is the proxied URI. Maybe the passed URI
647 * should be the proxy URI aka conn->uri. --jonas */
648 response = get_http_auth_digest_response(&proxy_auth, uri);
649 if (response) {
650 add_to_string(&header, "Proxy-Authorization: Digest ");
651 add_to_string(&header, response);
652 add_crlf_to_string(&header);
654 mem_free(response);
657 } else {
658 if (user[0]) {
659 unsigned char *proxy_data;
661 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
662 if (proxy_data) {
663 unsigned char *proxy_64 = base64_encode(proxy_data);
665 if (proxy_64) {
666 add_to_string(&header, "Proxy-Authorization: Basic ");
667 add_to_string(&header, proxy_64);
668 add_crlf_to_string(&header);
669 mem_free(proxy_64);
671 mem_free(proxy_data);
677 /* CONNECT: User-Agent does not reveal anything about the
678 * resource we're fetching, and it may help the proxy return
679 * better error messages. */
680 optstr = get_opt_str("protocol.http.user_agent");
681 if (*optstr && strcmp(optstr, " ")) {
682 unsigned char *ustr, ts[64] = "";
684 add_to_string(&header, "User-Agent: ");
686 if (!list_empty(terminals)) {
687 unsigned int tslen = 0;
688 struct terminal *term = terminals.prev;
690 ulongcat(ts, &tslen, term->width, 3, 0);
691 ts[tslen++] = 'x';
692 ulongcat(ts, &tslen, term->height, 3, 0);
694 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
695 ts);
697 if (ustr) {
698 add_to_string(&header, ustr);
699 mem_free(ustr);
702 add_crlf_to_string(&header);
705 /* CONNECT: Referer probably is a secret page in the HTTPS
706 * server, so don't reveal it to the proxy. */
707 if (!use_connect) {
708 switch (get_opt_int("protocol.http.referer.policy")) {
709 case REFERER_NONE:
710 /* oh well */
711 break;
713 case REFERER_FAKE:
714 optstr = get_opt_str("protocol.http.referer.fake");
715 if (!optstr[0]) break;
716 add_to_string(&header, "Referer: ");
717 add_to_string(&header, optstr);
718 add_crlf_to_string(&header);
719 break;
721 case REFERER_TRUE:
722 if (!conn->referrer) break;
723 add_to_string(&header, "Referer: ");
724 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
725 add_crlf_to_string(&header);
726 break;
728 case REFERER_SAME_URL:
729 add_to_string(&header, "Referer: ");
730 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
731 add_crlf_to_string(&header);
732 break;
736 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
737 * because they do not reveal anything about the resource
738 * we're going to request via TLS, and they may affect the
739 * error message if the CONNECT request fails.
741 * If ELinks is ever changed to vary its Accept headers based
742 * on what it intends to do with the returned resource, e.g.
743 * sending "Accept: text/css" when it wants an external
744 * stylesheet, then it should do that only in the inner GET
745 * and not in the outer CONNECT. */
746 add_to_string(&header, "Accept: */*");
747 add_crlf_to_string(&header);
749 accept_encoding_header(&header);
751 if (!accept_charset) {
752 init_accept_charset();
755 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
756 && !get_opt_bool("protocol.http.bugs.accept_charset")
757 && accept_charset) {
758 add_to_string(&header, accept_charset);
761 optstr = get_opt_str("protocol.http.accept_language");
762 if (optstr[0]) {
763 add_to_string(&header, "Accept-Language: ");
764 add_to_string(&header, optstr);
765 add_crlf_to_string(&header);
767 #ifdef CONFIG_NLS
768 else if (get_opt_bool("protocol.http.accept_ui_language")) {
769 unsigned char *code = language_to_iso639(current_language);
771 if (code) {
772 add_to_string(&header, "Accept-Language: ");
773 add_to_string(&header, code);
774 add_crlf_to_string(&header);
777 #endif
779 /* CONNECT: Proxy-Connection is intended to be seen by the
780 * proxy. If the CONNECT request succeeds, then the proxy
781 * will forward the remainder of the TCP connection to the
782 * origin server, and Proxy-Connection does not matter; but
783 * if the request fails, then Proxy-Connection may matter. */
784 /* FIXME: What about post-HTTP/1.1?? --Zas */
785 if (HTTP_1_1(http->sent_version)) {
786 if (!IS_PROXY_URI(conn->uri)) {
787 add_to_string(&header, "Connection: ");
788 } else {
789 add_to_string(&header, "Proxy-Connection: ");
792 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
793 add_to_string(&header, "Keep-Alive");
794 } else {
795 add_to_string(&header, "close");
797 add_crlf_to_string(&header);
800 /* CONNECT: Do not tell the proxy anything we have cached
801 * about the resource. */
802 if (!use_connect && conn->cached) {
803 if (!conn->cached->incomplete && conn->cached->head
804 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
805 if (conn->cached->last_modified) {
806 add_to_string(&header, "If-Modified-Since: ");
807 add_to_string(&header, conn->cached->last_modified);
808 add_crlf_to_string(&header);
810 if (conn->cached->etag) {
811 add_to_string(&header, "If-None-Match: ");
812 add_to_string(&header, conn->cached->etag);
813 add_crlf_to_string(&header);
818 /* CONNECT: Let's send cache control headers to the proxy too;
819 * they may affect DNS caching. */
820 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
821 add_to_string(&header, "Pragma: no-cache");
822 add_crlf_to_string(&header);
823 add_to_string(&header, "Cache-Control: no-cache");
824 add_crlf_to_string(&header);
827 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
828 * do anything good with that information anyway. */
829 if (!use_connect && (conn->from || conn->progress->start > 0)) {
830 /* conn->from takes precedence. conn->progress.start is set only the first
831 * time, then conn->from gets updated and in case of any retries
832 * etc we have everything interesting in conn->from already. */
833 add_to_string(&header, "Range: bytes=");
834 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
835 add_char_to_string(&header, '-');
836 add_crlf_to_string(&header);
839 /* CONNECT: The Authorization header is for the origin server only. */
840 if (!use_connect) {
841 #ifdef CONFIG_GSSAPI
842 if (http_negotiate_output(uri, &header) != 0)
843 #endif
844 entry = find_auth(uri);
847 if (entry) {
848 if (entry->digest) {
849 unsigned char *response;
851 response = get_http_auth_digest_response(entry, uri);
852 if (response) {
853 add_to_string(&header, "Authorization: Digest ");
854 add_to_string(&header, response);
855 add_crlf_to_string(&header);
857 mem_free(response);
860 } else {
861 /* RFC2617 section 2 [Basic Authentication Scheme]
863 * To receive authorization, the client sends the userid
864 * and password, separated by a single colon (":")
865 * character, within a base64 [7] encoded string in the
866 * credentials. */
867 unsigned char *id;
869 /* Create base64 encoded string. */
870 id = straconcat(entry->user, ":", entry->password,
871 (unsigned char *) NULL);
872 if (id) {
873 unsigned char *base64 = base64_encode(id);
875 mem_free_set(&id, base64);
878 if (id) {
879 add_to_string(&header, "Authorization: Basic ");
880 add_to_string(&header, id);
881 add_crlf_to_string(&header);
882 mem_free(id);
887 /* CONNECT: Any POST data is for the origin server only. */
888 if (!use_connect && uri->post) {
889 /* We search for first '\n' in uri->post to get content type
890 * as set by get_form_uri(). This '\n' is dropped if any
891 * and replaced by correct '\r\n' termination here. */
892 unsigned char *postend = strchr(uri->post, '\n');
894 if (postend) {
895 add_to_string(&header, "Content-Type: ");
896 add_bytes_to_string(&header, uri->post, postend - uri->post);
897 add_crlf_to_string(&header);
900 post_data = postend ? postend + 1 : uri->post;
901 add_to_string(&header, "Content-Length: ");
902 add_long_to_string(&header, strlen(post_data) / 2);
903 add_crlf_to_string(&header);
906 #ifdef CONFIG_COOKIES
907 /* CONNECT: Cookies are for the origin server only. */
908 if (!use_connect) {
909 struct string *cookies = send_cookies(uri);
911 if (cookies) {
912 add_to_string(&header, "Cookie: ");
913 add_string_to_string(&header, cookies);
914 add_crlf_to_string(&header);
915 done_string(cookies);
918 #endif
920 add_crlf_to_string(&header);
922 /* CONNECT: Any POST data is for the origin server only.
923 * This was already checked above and post_data is NULL
924 * in that case. Verified with an assertion below. */
925 if (post_data) {
926 #define POST_BUFFER_SIZE 4096
927 unsigned char *post = post_data;
928 unsigned char buffer[POST_BUFFER_SIZE];
929 int n = 0;
931 assert(!use_connect); /* see comment above */
933 while (post[0] && post[1]) {
934 int h1, h2;
936 h1 = unhx(post[0]);
937 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
938 if_assert_failed h1 = 0;
940 h2 = unhx(post[1]);
941 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
942 if_assert_failed h2 = 0;
944 buffer[n++] = (h1<<4) + h2;
945 post += 2;
946 if (n == POST_BUFFER_SIZE) {
947 add_bytes_to_string(&header, buffer, n);
948 n = 0;
952 if (n)
953 add_bytes_to_string(&header, buffer, n);
954 #undef POST_BUFFER_SIZE
957 request_from_socket(socket, header.source, header.length, S_SENT,
958 SOCKET_END_ONCLOSE, http_got_header);
959 done_string(&header);
963 /* This function decompresses the data block given in @data (if it was
964 * compressed), which is long @len bytes. The decompressed data block is given
965 * back to the world as the return value and its length is stored into
966 * @new_len.
968 * In this function, value of either http->chunk_remaining or http->length is
969 * being changed (it depends on if chunked mode is used or not).
971 * Note that the function is still a little esotheric for me. Don't take it
972 * lightly and don't mess with it without grave reason! If you dare to touch
973 * this without testing the changes on slashdot, freshmeat and cvsweb
974 * (including revision history), don't dare to send me any patches! ;) --pasky
976 * This function gotta die. */
977 static unsigned char *
978 decompress_data(struct connection *conn, unsigned char *data, int len,
979 int *new_len)
981 struct http_connection_info *http = conn->info;
982 enum { NORMAL, FINISHING } state = NORMAL;
983 int did_read = 0;
984 int *length_of_block;
985 unsigned char *output = NULL;
987 #define BIG_READ 65536
989 if (http->length == LEN_CHUNKED) {
990 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
991 state = FINISHING;
992 length_of_block = &http->chunk_remaining;
993 } else {
994 length_of_block = &http->length;
995 if (!*length_of_block) {
996 /* Going to finish this decoding bussiness. */
997 state = FINISHING;
1001 if (conn->content_encoding == ENCODING_NONE) {
1002 *new_len = len;
1003 if (*length_of_block > 0) *length_of_block -= len;
1004 return data;
1007 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1009 if (conn->stream_pipes[0] == -1
1010 && (c_pipe(conn->stream_pipes) < 0
1011 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1012 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1013 return NULL;
1016 do {
1017 unsigned char *tmp;
1019 if (state == NORMAL) {
1020 /* ... we aren't finishing yet. */
1021 int written = safe_write(conn->stream_pipes[1], data, len);
1023 if (written >= 0) {
1024 data += written;
1025 len -= written;
1027 /* In non-keep-alive connections http->length == -1, so the test below */
1028 if (*length_of_block > 0)
1029 *length_of_block -= written;
1030 /* http->length is 0 at the end of block for all modes: keep-alive,
1031 * non-keep-alive and chunked */
1032 if (!http->length) {
1033 /* That's all, folks - let's finish this. */
1034 state = FINISHING;
1035 } else if (!len) {
1036 /* We've done for this round (but not done
1037 * completely). Thus we will get out with
1038 * what we have and leave what we wrote to
1039 * the next round - we have to do that since
1040 * we MUST NOT ever empty the pipe completely
1041 * - this would cause a disaster for
1042 * read_encoded(), which would simply not
1043 * work right then. */
1044 return output;
1049 if (!conn->stream) {
1050 conn->stream = open_encoded(conn->stream_pipes[0],
1051 conn->content_encoding);
1052 if (!conn->stream) return NULL;
1055 tmp = mem_realloc(output, *new_len + BIG_READ);
1056 if (!tmp) break;
1057 output = tmp;
1059 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1061 if (did_read > 0) *new_len += did_read;
1062 else {
1063 if (did_read < 0) state = FINISHING;
1064 break;
1066 } while (len || (did_read == BIG_READ));
1068 if (state == FINISHING) shutdown_connection_stream(conn);
1069 return output;
1072 static int
1073 is_line_in_buffer(struct read_buffer *rb)
1075 int l;
1077 for (l = 0; l < rb->length; l++) {
1078 unsigned char a0 = rb->data[l];
1080 if (a0 == ASCII_LF)
1081 return l + 1;
1082 if (a0 == ASCII_CR) {
1083 if (rb->data[l + 1] == ASCII_LF
1084 && l < rb->length - 1)
1085 return l + 2;
1086 if (l == rb->length - 1)
1087 return 0;
1089 if (a0 < ' ')
1090 return -1;
1092 return 0;
1095 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1097 static void
1098 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1099 int already_got_anything)
1101 enum connection_state state = already_got_anything ? S_TRANS : conn->state;
1103 read_from_socket(conn->socket, rb, state, read_http_data);
1106 static void
1107 read_http_data_done(struct connection *conn)
1109 struct http_connection_info *http = conn->info;
1111 /* There's no content but an error so just print
1112 * that instead of nothing. */
1113 if (!conn->from) {
1114 if (http->code >= 400) {
1115 http_error_document(conn, http->code);
1117 } else {
1118 /* This is not an error, thus fine. No need generate any
1119 * document, as this may be empty and it's not a problem.
1120 * In case of 3xx, we're probably just getting kicked to
1121 * another page anyway. And in case of 2xx, the document
1122 * may indeed be empty and thus the user should see it so. */
1126 http_end_request(conn, S_OK, 0);
1129 /* Returns:
1130 * -1 on error
1131 * 0 if more to read
1132 * 1 if done
1134 static int
1135 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1137 struct http_connection_info *http = conn->info;
1138 int total_data_len = 0;
1140 while (1) {
1141 /* Chunked. Good luck! */
1142 /* See RFC2616, section 3.6.1. Basically, it looks like:
1143 * 1234 ; a = b ; c = d\r\n
1144 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1145 * 0\r\n
1146 * \r\n */
1147 if (http->chunk_remaining == CHUNK_DATA_END) {
1148 int l = is_line_in_buffer(rb);
1150 if (l) {
1151 if (l == -1) {
1152 /* Invalid character in buffer. */
1153 return -1;
1156 /* Remove everything to the EOLN. */
1157 kill_buffer_data(rb, l);
1158 if (l <= 2) {
1159 /* Empty line. */
1160 return 2;
1162 continue;
1165 } else if (http->chunk_remaining == CHUNK_SIZE) {
1166 int l = is_line_in_buffer(rb);
1168 if (l) {
1169 unsigned char *de;
1170 int n = 0;
1172 if (l != -1) {
1173 errno = 0;
1174 n = strtol(rb->data, (char **) &de, 16);
1175 if (errno || !*de) {
1176 return -1;
1180 if (l == -1 || de == rb->data) {
1181 return -1;
1184 /* Remove everything to the EOLN. */
1185 kill_buffer_data(rb, l);
1186 http->chunk_remaining = n;
1187 if (!http->chunk_remaining)
1188 http->chunk_remaining = CHUNK_ZERO_SIZE;
1189 continue;
1192 } else {
1193 unsigned char *data;
1194 int data_len;
1195 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1196 int len = zero ? 0 : http->chunk_remaining;
1198 /* Maybe everything necessary didn't come yet.. */
1199 int_upper_bound(&len, rb->length);
1200 conn->received += len;
1202 data = decompress_data(conn, rb->data, len, &data_len);
1204 if (add_fragment(conn->cached, conn->from,
1205 data, data_len) == 1)
1206 conn->tries = 0;
1208 if (data && data != rb->data) mem_free(data);
1210 conn->from += data_len;
1211 total_data_len += data_len;
1213 kill_buffer_data(rb, len);
1215 if (zero) {
1216 /* Last chunk has zero length, so this is last
1217 * chunk, we finished decompression just now
1218 * and now we can happily finish reading this
1219 * stuff. */
1220 http->chunk_remaining = CHUNK_DATA_END;
1221 continue;
1224 if (!http->chunk_remaining && rb->length > 0) {
1225 /* Eat newline succeeding each chunk. */
1226 if (rb->data[0] == ASCII_LF) {
1227 kill_buffer_data(rb, 1);
1228 } else {
1229 if (rb->data[0] != ASCII_CR
1230 || (rb->length >= 2
1231 && rb->data[1] != ASCII_LF)) {
1232 return -1;
1234 if (rb->length < 2) break;
1235 kill_buffer_data(rb, 2);
1237 http->chunk_remaining = CHUNK_SIZE;
1238 continue;
1241 break;
1244 /* More to read. */
1245 return !!total_data_len;
1248 /* Returns 0 if more data, 1 if done. */
1249 static int
1250 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1252 struct http_connection_info *http = conn->info;
1253 unsigned char *data;
1254 int data_len;
1255 int len = rb->length;
1257 if (http->length >= 0 && http->length < len) {
1258 /* We won't read more than we have to go. */
1259 len = http->length;
1262 conn->received += len;
1264 data = decompress_data(conn, rb->data, len, &data_len);
1266 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1267 conn->tries = 0;
1269 if (data && data != rb->data) mem_free(data);
1271 conn->from += data_len;
1273 kill_buffer_data(rb, len);
1275 if (!http->length && conn->socket->state == SOCKET_RETRY_ONCLOSE) {
1276 return 2;
1279 return !!data_len;
1282 static void
1283 read_http_data(struct socket *socket, struct read_buffer *rb)
1285 struct connection *conn = socket->conn;
1286 struct http_connection_info *http = conn->info;
1287 int ret;
1289 if (socket->state == SOCKET_CLOSED) {
1290 if (conn->content_encoding && http->length == -1) {
1291 /* Flush decompression first. */
1292 http->length = 0;
1293 } else {
1294 read_http_data_done(conn);
1295 return;
1299 if (http->length != LEN_CHUNKED) {
1300 ret = read_normal_http_data(conn, rb);
1302 } else {
1303 ret = read_chunked_http_data(conn, rb);
1306 switch (ret) {
1307 case 0:
1308 read_more_http_data(conn, rb, 0);
1309 break;
1310 case 1:
1311 read_more_http_data(conn, rb, 1);
1312 break;
1313 case 2:
1314 read_http_data_done(conn);
1315 break;
1316 default:
1317 assertm(ret == -1, "Unexpected return value: %d", ret);
1318 abort_connection(conn, S_HTTP_ERROR);
1322 /* Returns offset of the header end, zero if more data is needed, -1 when
1323 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1324 * come. */
1325 static int
1326 get_header(struct read_buffer *rb)
1328 int i;
1330 /* XXX: We will have to do some guess about whether an HTTP header is
1331 * coming or not, in order to support HTTP/0.9 reply correctly. This
1332 * means a little code duplication with get_http_code(). --pasky */
1333 if (rb->length > 4 && strncasecmp(rb->data, "HTTP/", 5))
1334 return -2;
1336 for (i = 0; i < rb->length; i++) {
1337 unsigned char a0 = rb->data[i];
1338 unsigned char a1 = rb->data[i + 1];
1340 if (a0 == 0) {
1341 rb->data[i] = ' ';
1342 continue;
1344 if (a0 == ASCII_LF && a1 == ASCII_LF
1345 && i < rb->length - 1)
1346 return i + 2;
1347 if (a0 == ASCII_CR && i < rb->length - 3) {
1348 if (a1 == ASCII_CR) continue;
1349 if (a1 != ASCII_LF) return -1;
1350 if (rb->data[i + 2] == ASCII_CR) {
1351 if (rb->data[i + 3] != ASCII_LF) return -1;
1352 return i + 4;
1357 return 0;
1360 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1361 static int
1362 check_http_authentication(struct connection *conn, struct uri *uri,
1363 unsigned char *header, unsigned char *header_field)
1365 unsigned char *str, *d;
1366 int ret = 0;
1368 d = parse_header(header, header_field, &str);
1369 while (d) {
1370 if (!strncasecmp(d, "Basic", 5)) {
1371 unsigned char *realm = get_header_param(d, "realm");
1373 if (realm) {
1374 add_auth_entry(uri, realm, NULL, NULL, 0);
1375 mem_free(realm);
1376 mem_free(d);
1377 break;
1379 } else if (!strncasecmp(d, "Digest", 6)) {
1380 unsigned char *realm = get_header_param(d, "realm");
1381 unsigned char *nonce = get_header_param(d, "nonce");
1382 unsigned char *opaque = get_header_param(d, "opaque");
1384 add_auth_entry(uri, realm, nonce, opaque, 1);
1386 mem_free_if(realm);
1387 mem_free_if(nonce);
1388 mem_free_if(opaque);
1389 mem_free(d);
1390 break;
1392 #ifdef CONFIG_GSSAPI
1393 else if (!strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1394 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1395 ret = 1;
1396 mem_free(d);
1397 break;
1399 else if (!strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1400 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1401 ret = 1;
1402 mem_free(d);
1403 break;
1405 #endif
1406 mem_free(d);
1407 d = parse_header(str, header_field, &str);
1409 return ret;
1413 void
1414 http_got_header(struct socket *socket, struct read_buffer *rb)
1416 struct connection *conn = socket->conn;
1417 struct http_connection_info *http = conn->info;
1418 unsigned char *head;
1419 #ifdef CONFIG_COOKIES
1420 unsigned char *cookie, *ch;
1421 #endif
1422 unsigned char *d;
1423 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1424 struct http_version version;
1425 enum connection_state state = (conn->state != S_PROC ? S_GETH : S_PROC);
1426 int a, h = 200;
1427 int cf;
1429 if (socket->state == SOCKET_CLOSED) {
1430 if (!conn->tries && uri->host) {
1431 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1432 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1433 } else {
1434 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1435 conn->tries = -1;
1438 retry_connection(conn, S_CANT_READ);
1439 return;
1441 socket->state = SOCKET_RETRY_ONCLOSE;
1443 again:
1444 a = get_header(rb);
1445 if (a == -1) {
1446 abort_connection(conn, S_HTTP_ERROR);
1447 return;
1449 if (!a) {
1450 read_from_socket(conn->socket, rb, state, http_got_header);
1451 return;
1453 if (a == -2) a = 0;
1454 if ((a && get_http_code(rb, &h, &version))
1455 || h == 101) {
1456 abort_connection(conn, S_HTTP_ERROR);
1457 return;
1460 /* When no header, HTTP/0.9 document. That's always text/html,
1461 * according to
1462 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1463 /* FIXME: This usage of fake protocol headers for setting up the
1464 * content type has been obsoleted by the @content_type member of
1465 * {struct cache_entry}. */
1466 head = (a ? memacpy(rb->data, a)
1467 : stracpy("\r\nContent-Type: text/html\r\n"));
1468 if (!head) {
1469 abort_connection(conn, S_OUT_OF_MEM);
1470 return;
1473 if (check_http_server_bugs(uri, http, head)) {
1474 mem_free(head);
1475 retry_connection(conn, S_RESTART);
1476 return;
1479 #ifdef CONFIG_CGI
1480 if (uri->protocol == PROTOCOL_FILE) {
1481 /* ``Status'' is not a standard HTTP header field although some
1482 * HTTP servers like www.php.net uses it for some reason. It should
1483 * only be used for CGI scripts so that it does not interfere
1484 * with status code depended handling for ``normal'' HTTP like
1485 * redirects. */
1486 d = parse_header(head, "Status", NULL);
1487 if (d) {
1488 int h2 = atoi(d);
1490 mem_free(d);
1491 if (h2 >= 100 && h2 < 600) h = h2;
1492 if (h == 101) {
1493 mem_free(head);
1494 abort_connection(conn, S_HTTP_ERROR);
1495 return;
1499 #endif
1501 #ifdef CONFIG_COOKIES
1502 ch = head;
1503 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1504 set_cookie(uri, cookie);
1505 mem_free(cookie);
1507 #endif
1508 http->code = h;
1510 if (h == 100) {
1511 mem_free(head);
1512 state = S_PROC;
1513 kill_buffer_data(rb, a);
1514 goto again;
1516 if (h < 200) {
1517 mem_free(head);
1518 abort_connection(conn, S_HTTP_ERROR);
1519 return;
1521 if (h == 304) {
1522 mem_free(head);
1523 http_end_request(conn, S_OK, 1);
1524 return;
1526 if (h == 204) {
1527 mem_free(head);
1528 http_end_request(conn, S_HTTP_204, 0);
1529 return;
1531 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1532 #ifdef CONFIG_SSL
1533 mem_free(head);
1534 socket->need_ssl = 1;
1535 complete_connect_socket(socket, uri, http_send_header);
1536 #else
1537 abort_connection(conn, S_SSL_ERROR);
1538 #endif
1539 return;
1542 conn->cached = get_cache_entry(conn->uri);
1543 if (!conn->cached) {
1544 mem_free(head);
1545 abort_connection(conn, S_OUT_OF_MEM);
1546 return;
1548 mem_free_set(&conn->cached->head, head);
1550 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1551 struct cache_entry *cached = conn->cached;
1553 /* I am not entirely sure in what order we should process these
1554 * headers and if we should still process Cache-Control max-age
1555 * if we already set max age to date mentioned in Expires.
1556 * --jonas */
1557 /* Ensure that when ever cached->max_age is set, cached->expired
1558 * is also set, so the cache management knows max_age contains a
1559 * valid time. If on the other hand no caching is requested
1560 * cached->expire should be set to zero. */
1561 if ((d = parse_header(cached->head, "Expires", NULL))) {
1562 /* Convert date to seconds. */
1563 time_t expires = parse_date(&d, NULL, 0, 1);
1565 mem_free(d);
1567 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1568 timeval_from_seconds(&cached->max_age, expires);
1569 cached->expire = 1;
1573 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1574 if (strstr(d, "no-cache")) {
1575 cached->cache_mode = CACHE_MODE_NEVER;
1576 cached->expire = 0;
1578 mem_free(d);
1581 if (cached->cache_mode != CACHE_MODE_NEVER
1582 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1583 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1584 cached->cache_mode = CACHE_MODE_NEVER;
1585 cached->expire = 0;
1587 } else {
1588 unsigned char *pos = strstr(d, "max-age=");
1590 assert(cached->cache_mode != CACHE_MODE_NEVER);
1592 if (pos) {
1593 /* Grab the number of seconds. */
1594 timeval_T max_age;
1596 timeval_from_seconds(&max_age, atol(pos + 8));
1597 timeval_now(&cached->max_age);
1598 timeval_add_interval(&cached->max_age, &max_age);
1600 cached->expire = 1;
1604 mem_free(d);
1608 /* XXX: Is there some reason why NOT to follow the Location header
1609 * for any status? If the server didn't mean it, it wouldn't send
1610 * it, after all...? --pasky */
1611 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1612 d = parse_header(conn->cached->head, "Location", NULL);
1613 if (d) {
1614 int use_get_method = (h == 303);
1616 /* A note from RFC 2616 section 10.3.3:
1617 * RFC 1945 and RFC 2068 specify that the client is not
1618 * allowed to change the method on the redirected
1619 * request. However, most existing user agent
1620 * implementations treat 302 as if it were a 303
1621 * response, performing a GET on the Location
1622 * field-value regardless of the original request
1623 * method. */
1624 /* So POST must not be redirected to GET, but some
1625 * BUGGY message boards rely on it :-( */
1626 if (h == 302
1627 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1628 use_get_method = 1;
1630 redirect_cache(conn->cached, d, use_get_method, -1);
1631 mem_free(d);
1635 if (h == 401) {
1636 if (check_http_authentication(conn, uri,
1637 conn->cached->head, "WWW-Authenticate")) {
1638 retry_connection(conn, S_RESTART);
1639 return;
1643 if (h == 407) {
1644 unsigned char *str;
1646 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1647 while (d) {
1648 if (!strncasecmp(d, "Basic", 5)) {
1649 unsigned char *realm = get_header_param(d, "realm");
1651 if (realm) {
1652 mem_free_set(&proxy_auth.realm, realm);
1653 proxy_auth.digest = 0;
1654 mem_free(d);
1655 break;
1658 } else if (!strncasecmp(d, "Digest", 6)) {
1659 unsigned char *realm = get_header_param(d, "realm");
1660 unsigned char *nonce = get_header_param(d, "nonce");
1661 unsigned char *opaque = get_header_param(d, "opaque");
1663 mem_free_set(&proxy_auth.realm, realm);
1664 mem_free_set(&proxy_auth.nonce, nonce);
1665 mem_free_set(&proxy_auth.opaque, opaque);
1666 proxy_auth.digest = 1;
1668 mem_free(d);
1669 break;
1672 mem_free(d);
1673 d = parse_header(str, "Proxy-Authenticate", &str);
1677 kill_buffer_data(rb, a);
1678 http->close = 0;
1679 http->length = -1;
1680 http->recv_version = version;
1682 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1683 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1684 if (!strcasecmp(d, "close")) http->close = 1;
1685 mem_free(d);
1686 } else if (PRE_HTTP_1_1(version)) {
1687 http->close = 1;
1690 cf = conn->from;
1691 conn->from = 0;
1692 d = parse_header(conn->cached->head, "Content-Range", NULL);
1693 if (d) {
1694 if (strlen(d) > 6) {
1695 d[5] = 0;
1696 if (isdigit(d[6]) && !strcasecmp(d, "bytes")) {
1697 int f;
1699 errno = 0;
1700 f = strtol(d + 6, NULL, 10);
1702 if (!errno && f >= 0) conn->from = f;
1705 mem_free(d);
1707 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1708 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1709 /* We don't want this if conn->progress.start because then conn->from will
1710 * be probably value of conn->progress.start, while cf is 0. */
1711 abort_connection(conn, S_HTTP_ERROR);
1712 return;
1715 #if 0
1717 struct status *s;
1718 foreach (s, conn->downloads) {
1719 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1720 conn, s, s->pri, s->state, s->prev_error,
1721 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1724 #endif
1726 if (conn->progress->start >= 0) {
1727 /* Update to the real value which we've got from Content-Range. */
1728 conn->progress->seek = conn->from;
1730 conn->progress->start = conn->from;
1732 d = parse_header(conn->cached->head, "Content-Length", NULL);
1733 if (d) {
1734 unsigned char *ep;
1735 int l;
1737 errno = 0;
1738 l = strtol(d, (char **) &ep, 10);
1740 if (!errno && !*ep && l >= 0) {
1741 if (!http->close || POST_HTTP_1_0(version))
1742 http->length = l;
1743 conn->est_length = conn->from + l;
1745 mem_free(d);
1748 if (!conn->unrestartable) {
1749 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1751 if (d) {
1752 if (!strcasecmp(d, "none"))
1753 conn->unrestartable = 1;
1754 mem_free(d);
1755 } else {
1756 if (!conn->from)
1757 conn->unrestartable = 1;
1761 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1762 if (d) {
1763 if (!strcasecmp(d, "chunked")) {
1764 http->length = LEN_CHUNKED;
1765 http->chunk_remaining = CHUNK_SIZE;
1767 mem_free(d);
1769 if (!http->close && http->length == -1) http->close = 1;
1771 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1772 if (d) {
1773 if (conn->cached->last_modified && strcasecmp(conn->cached->last_modified, d)) {
1774 delete_entry_content(conn->cached);
1775 if (conn->from) {
1776 conn->from = 0;
1777 mem_free(d);
1778 retry_connection(conn, S_MODIFIED);
1779 return;
1782 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1783 else mem_free(d);
1785 if (!conn->cached->last_modified) {
1786 d = parse_header(conn->cached->head, "Date", NULL);
1787 if (d) conn->cached->last_modified = d;
1790 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1791 d = parse_header(conn->cached->head, "ETag", NULL);
1792 if (d) {
1793 if (conn->cached->etag) {
1794 unsigned char *old_tag = conn->cached->etag;
1795 unsigned char *new_tag = d;
1797 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1799 if (new_tag[0] == 'W' && new_tag[1] == '/')
1800 new_tag += 2;
1802 if (old_tag[0] == 'W' && old_tag[1] == '/')
1803 old_tag += 2;
1805 if (strcmp(new_tag, old_tag)) {
1806 delete_entry_content(conn->cached);
1807 if (conn->from) {
1808 conn->from = 0;
1809 mem_free(d);
1810 retry_connection(conn, S_MODIFIED);
1811 return;
1816 if (!conn->cached->etag)
1817 conn->cached->etag = d;
1818 else
1819 mem_free(d);
1822 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1823 if (d) {
1824 unsigned char *extension = get_extension_from_uri(uri);
1825 enum stream_encoding file_encoding;
1827 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1828 mem_free_if(extension);
1830 /* If the content is encoded, we want to preserve the encoding
1831 * if it is implied by the extension, so that saving the URI
1832 * will leave the saved file with the correct encoding. */
1833 #ifdef CONFIG_GZIP
1834 if (file_encoding != ENCODING_GZIP
1835 && (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip")))
1836 conn->content_encoding = ENCODING_GZIP;
1837 if (!strcasecmp(d, "deflate") || !strcasecmp(d, "x-deflate"))
1838 conn->content_encoding = ENCODING_DEFLATE;
1839 #endif
1841 #ifdef CONFIG_BZIP2
1842 if (file_encoding != ENCODING_BZIP2
1843 && (!strcasecmp(d, "bzip2") || !strcasecmp(d, "x-bzip2")))
1844 conn->content_encoding = ENCODING_BZIP2;
1845 #endif
1847 #ifdef CONFIG_LZMA
1848 if (file_encoding != ENCODING_LZMA
1849 && (!strcasecmp(d, "lzma") || !strcasecmp(d, "x-lzma")))
1850 conn->content_encoding = ENCODING_LZMA;
1851 #endif
1852 mem_free(d);
1855 if (conn->content_encoding != ENCODING_NONE) {
1856 mem_free_if(conn->cached->encoding_info);
1857 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1860 if (http->length == -1 || http->close)
1861 socket->state = SOCKET_END_ONCLOSE;
1863 read_http_data(socket, rb);