The gzip decompression simplification. gzclearerr does the job.
[elinks.git] / src / protocol / http / http.c
blob3c52195c0dfaa26c8bcee7a14b5b6b249b73ebfb
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
50 #ifdef CONFIG_GSSAPI
51 #include "http_negotiate.h"
52 #endif
54 struct http_version {
55 int major;
56 int minor;
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info {
69 enum blacklist_flags bl_flags;
70 struct http_version recv_version;
71 struct http_version sent_version;
73 int close;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
77 int length;
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
82 #define CHUNK_SIZE -1
83 int chunk_remaining;
85 int code;
89 static struct auth_entry proxy_auth;
91 static unsigned char *accept_charset = NULL;
94 static struct option_info http_options[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
96 "http", 0,
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
101 "bugs", 0,
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it can trigger\n"
107 "bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
116 "This is a problem for a lot of web discussion boards and the like.\n"
117 "If they will do strange things to you, try to play with this.")),
119 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
120 "post_no_keepalive", 0, 0,
121 N_("Disable keepalive connection after POST request.")),
123 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
124 "http10", 0, 0,
125 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
127 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
128 "proxy", 0,
129 N_("HTTP proxy configuration.")),
131 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
132 "host", 0, "",
133 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
134 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
136 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
137 "user", 0, "",
138 N_("Proxy authentication username.")),
140 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
141 "passwd", 0, "",
142 N_("Proxy authentication password.")),
145 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
146 "referer", 0,
147 N_("HTTP referer sending options. HTTP referer is a special header\n"
148 "sent in the HTTP requests, which is supposed to contain the previous\n"
149 "page visited by the browser. This way, the server can know what link\n"
150 "did you follow when accessing that page. However, this behaviour\n"
151 "can unfortunately considerably affect privacy and can lead even to a\n"
152 "security problem on some badly designed web pages.")),
154 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
155 "policy", 0,
156 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
157 N_("Mode of sending HTTP referer:\n"
158 "0 is send no referer\n"
159 "1 is send current URL as referer\n"
160 "2 is send fixed fake referer\n"
161 "3 is send previous URL as referer (correct, but insecure)")),
163 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
164 "fake", 0, "",
165 N_("Fake referer to be sent when policy is 2.")),
168 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
169 "accept_language", 0, "",
170 N_("Send Accept-Language header.")),
172 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
173 "accept_ui_language", 0, 1,
174 N_("Request localised versions of documents from web-servers (using the\n"
175 "Accept-Language header) using the language you have configured for\n"
176 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
177 "value available to scripts). Note that some see this as a potential\n"
178 "security risk because it tells web-masters and the FBI sniffers about\n"
179 "your language preference.")),
181 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
182 "trace", 0, 0,
183 N_("If active, all HTTP requests are sent with TRACE as their method\n"
184 "rather than GET or POST. This is useful for debugging of both ELinks\n"
185 "and various server-side scripts --- the server only returns the client's\n"
186 "request back to the client verbatim. Note that this type of request may\n"
187 "not be enabled on all servers.")),
189 /* OSNews.com is supposed to be relying on the textmode token, at least. */
190 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
191 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
192 N_("Change the User Agent ID. That means identification string, which\n"
193 "is sent to HTTP server when a document is requested. The 'textmode'\n"
194 "token in the first field is our silent attempt to establish this as\n"
195 "a standard for new textmode user agents, so that the webmasters can\n"
196 "have just a single uniform test for these if they are ie. pushing\n"
197 "some lite version to them automagically.\n"
198 "%v in the string means ELinks version\n"
199 "%s in the string means system identification\n"
200 "%t in the string means size of the terminal\n"
201 "%b in the string means number of bars displayed by ELinks\n"
202 "Use \" \" if you don't want any User-Agent header to be sent at all.")),
205 INIT_OPT_TREE("protocol", N_("HTTPS"),
206 "https", 0,
207 N_("HTTPS-specific options.")),
209 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
210 "proxy", 0,
211 N_("HTTPS proxy configuration.")),
213 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
214 "host", 0, "",
215 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
216 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
217 NULL_OPTION_INFO,
220 static void done_http();
222 struct module http_protocol_module = struct_module(
223 /* name: */ N_("HTTP"),
224 /* options: */ http_options,
225 /* hooks: */ NULL,
226 /* submodules: */ NULL,
227 /* data: */ NULL,
228 /* init: */ NULL,
229 /* done: */ done_http
233 static void
234 done_http(void)
236 mem_free_if(proxy_auth.realm);
237 mem_free_if(proxy_auth.nonce);
238 mem_free_if(proxy_auth.opaque);
240 free_blacklist();
242 if (accept_charset)
243 mem_free(accept_charset);
246 static void
247 init_accept_charset(void)
249 struct string ac;
250 unsigned char *cs;
251 int i;
253 if (!init_string(&ac)) return;
255 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
256 if (ac.length) {
257 add_to_string(&ac, ", ");
258 } else {
259 add_to_string(&ac, "Accept-Charset: ");
261 add_to_string(&ac, cs);
264 if (ac.length) {
265 add_crlf_to_string(&ac);
268 accept_charset = squeezastring(&ac);
270 done_string(&ac);
274 unsigned char *
275 subst_user_agent(unsigned char *fmt, unsigned char *version,
276 unsigned char *sysname, unsigned char *termsize)
278 struct string agent;
280 if (!init_string(&agent)) return NULL;
282 while (*fmt) {
283 int p;
285 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
287 add_bytes_to_string(&agent, fmt, p);
288 fmt += p;
290 if (*fmt != '%') continue;
292 fmt++;
293 switch (*fmt) {
294 case 'b':
295 if (!list_empty(sessions)) {
296 unsigned char bs[4] = "";
297 int blen = 0;
298 struct session *ses = sessions.prev;
299 int bars = ses->status.show_status_bar
300 + ses->status.show_tabs_bar
301 + ses->status.show_title_bar;
303 ulongcat(bs, &blen, bars, 2, 0);
304 add_to_string(&agent, bs);
306 break;
307 case 'v':
308 add_to_string(&agent, version);
309 break;
310 case 's':
311 add_to_string(&agent, sysname);
312 break;
313 case 't':
314 if (termsize)
315 add_to_string(&agent, termsize);
316 break;
317 default:
318 add_bytes_to_string(&agent, fmt - 1, 2);
319 break;
321 if (*fmt) fmt++;
324 return agent.source;
327 static void
328 add_url_to_http_string(struct string *header, struct uri *uri, int components)
330 /* This block substitues spaces in URL by %20s. This is
331 * certainly not the right place where to do it, but now the
332 * behaviour is at least improved compared to what we had
333 * before. We should probably encode all URLs as early as
334 * possible, and possibly decode them back in protocol
335 * backends. --pasky */
336 unsigned char *string = get_uri_string(uri, components);
337 unsigned char *data = string;
339 if (!string) return;
341 while (*data) {
342 int len = strcspn(data, " \t\r\n\\");
344 add_bytes_to_string(header, data, len);
346 if (!data[len]) break;
348 if (data[len++] == '\\')
349 add_char_to_string(header, '/');
350 else
351 add_to_string(header, "%20");
353 data += len;
356 mem_free(string);
359 /* Parse from @end - 1 to @start and set *@value to integer found.
360 * It returns -1 if not a number, 0 otherwise.
361 * @end should be > @start. */
362 static int
363 revstr2num(unsigned char *start, unsigned char *end, int *value)
365 int q = 1, val = 0;
367 do {
368 --end;
369 if (!isdigit(*end)) return -1; /* NaN */
370 val += (*end - '0') * q;
371 q *= 10;
372 } while (end > start);
374 *value = val;
375 return 0;
378 /* This function extracts code, major and minor version from string
379 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
380 * It returns a negative value on error, 0 on success.
382 static int
383 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
385 unsigned char *head = rb->data;
386 unsigned char *start;
388 *code = 0;
389 version->major = 0;
390 version->minor = 0;
392 /* Ignore spaces. */
393 while (*head == ' ') head++;
395 /* HTTP/ */
396 if (toupper(*head) != 'H' || toupper(*++head) != 'T' ||
397 toupper(*++head) != 'T' || toupper(*++head) != 'P'
398 || *++head != '/')
399 return -1;
401 /* Version */
402 start = ++head;
403 /* Find next '.' */
404 while (*head && *head != '.') head++;
405 /* Sanity check. */
406 if (!*head || !(head - start)
407 || (head - start) > 4
408 || !isdigit(*(head + 1)))
409 return -2;
411 /* Extract major version number. */
412 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
414 start = head + 1;
416 /* Find next ' '. */
417 while (*head && *head != ' ') head++;
418 /* Sanity check. */
419 if (!*head || !(head - start) || (head - start) > 4) return -4;
421 /* Extract minor version number. */
422 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
424 /* Ignore spaces. */
425 while (*head == ' ') head++;
427 /* Sanity check for code. */
428 if (head[0] < '1' || head[0] > '9' ||
429 !isdigit(head[1]) ||
430 !isdigit(head[2]))
431 return -6; /* Invalid code. */
433 /* Extract code. */
434 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
436 return 0;
439 static int
440 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
441 unsigned char *head)
443 unsigned char *server;
444 const unsigned char *const *s;
445 static const unsigned char *const buggy_servers[] = {
446 "mod_czech/3.1.0",
447 "Purveyor",
448 "Netscape-Enterprise",
449 NULL
452 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
453 || HTTP_1_0(http->sent_version))
454 return 0;
456 server = parse_header(head, "Server", NULL);
457 if (!server)
458 return 0;
460 for (s = buggy_servers; *s; s++) {
461 if (strstr(server, *s)) {
462 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
463 break;
467 mem_free(server);
468 return (*s != NULL);
471 static void
472 http_end_request(struct connection *conn, enum connection_state state,
473 int notrunc)
475 shutdown_connection_stream(conn);
477 if (conn->info && !((struct http_connection_info *) conn->info)->close
478 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
480 || !conn->uri->post)) {
481 if (state == S_OK && conn->cached)
482 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
483 set_connection_state(conn, state);
484 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
485 } else {
486 abort_connection(conn, state);
490 static void http_send_header(struct socket *);
492 void
493 http_protocol_handler(struct connection *conn)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn)) {
498 make_connection(conn->socket, conn->uri, http_send_header,
499 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
500 } else {
501 http_send_header(conn->socket);
505 void
506 proxy_protocol_handler(struct connection *conn)
508 http_protocol_handler(conn);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 struct http_connection_info *
517 init_http_connection_info(struct connection *conn, int major, int minor, int close)
519 struct http_connection_info *http;
521 http = mem_calloc(1, sizeof(*http));
522 if (!http) {
523 http_end_request(conn, S_OUT_OF_MEM, 0);
524 return NULL;
527 http->sent_version.major = major;
528 http->sent_version.minor = minor;
529 http->close = close;
531 /* The CGI code uses this too and blacklisting expects a host name. */
532 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
533 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
535 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
536 || get_opt_bool("protocol.http.bugs.http10")) {
537 http->sent_version.major = 1;
538 http->sent_version.minor = 0;
541 /* If called from HTTPS proxy connection the connection info might have
542 * already been allocated. */
543 mem_free_set(&conn->info, http);
545 return http;
548 static void
549 http_send_header(struct socket *socket)
551 struct connection *conn = socket->conn;
552 struct http_connection_info *http;
553 int trace = get_opt_bool("protocol.http.trace");
554 struct string header;
555 unsigned char *post_data = NULL;
556 struct auth_entry *entry = NULL;
557 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
558 unsigned char *optstr;
559 int use_connect, talking_to_proxy;
561 /* Sanity check for a host */
562 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
563 http_end_request(conn, S_BAD_URL, 0);
564 return;
567 http = init_http_connection_info(conn, 1, 1, 0);
568 if (!http) return;
570 if (!init_string(&header)) {
571 http_end_request(conn, S_OUT_OF_MEM, 0);
572 return;
575 if (!conn->cached) conn->cached = find_in_cache(uri);
577 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
578 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
580 if (trace) {
581 add_to_string(&header, "TRACE ");
582 } else if (use_connect) {
583 add_to_string(&header, "CONNECT ");
584 } else if (uri->post) {
585 add_to_string(&header, "POST ");
586 conn->unrestartable = 1;
587 } else {
588 add_to_string(&header, "GET ");
591 if (!talking_to_proxy) {
592 add_char_to_string(&header, '/');
595 if (use_connect) {
596 /* Add port if it was specified or the default port */
597 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
598 } else {
599 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
600 add_url_to_http_string(&header, uri, URI_DATA);
602 } else if (talking_to_proxy) {
603 add_url_to_http_string(&header, uri, URI_PROXY);
605 } else {
606 add_url_to_http_string(&header, conn->uri, URI_DATA);
610 add_to_string(&header, " HTTP/");
611 add_long_to_string(&header, http->sent_version.major);
612 add_char_to_string(&header, '.');
613 add_long_to_string(&header, http->sent_version.minor);
614 add_crlf_to_string(&header);
616 add_to_string(&header, "Host: ");
617 add_uri_to_string(&header, uri, URI_HTTP_HOST);
618 add_crlf_to_string(&header);
620 if (talking_to_proxy) {
621 unsigned char *user = get_opt_str("protocol.http.proxy.user");
622 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
624 if (proxy_auth.digest) {
625 unsigned char *response;
626 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
627 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
629 if (userlen)
630 memcpy(proxy_auth.user, user, userlen);
631 proxy_auth.user[userlen] = '\0';
632 if (passwordlen)
633 memcpy(proxy_auth.password, passwd, passwordlen);
634 proxy_auth.password[passwordlen] = '\0';
636 /* FIXME: @uri is the proxied URI. Maybe the passed URI
637 * should be the proxy URI aka conn->uri. --jonas */
638 response = get_http_auth_digest_response(&proxy_auth, uri);
639 if (response) {
640 add_to_string(&header, "Proxy-Authorization: Digest ");
641 add_to_string(&header, response);
642 add_crlf_to_string(&header);
644 mem_free(response);
647 } else {
648 if (user[0]) {
649 unsigned char *proxy_data;
651 proxy_data = straconcat(user, ":", passwd, NULL);
652 if (proxy_data) {
653 unsigned char *proxy_64 = base64_encode(proxy_data);
655 if (proxy_64) {
656 add_to_string(&header, "Proxy-Authorization: Basic ");
657 add_to_string(&header, proxy_64);
658 add_crlf_to_string(&header);
659 mem_free(proxy_64);
661 mem_free(proxy_data);
667 optstr = get_opt_str("protocol.http.user_agent");
668 if (*optstr && strcmp(optstr, " ")) {
669 unsigned char *ustr, ts[64] = "";
671 add_to_string(&header, "User-Agent: ");
673 if (!list_empty(terminals)) {
674 unsigned int tslen = 0;
675 struct terminal *term = terminals.prev;
677 ulongcat(ts, &tslen, term->width, 3, 0);
678 ts[tslen++] = 'x';
679 ulongcat(ts, &tslen, term->height, 3, 0);
681 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
682 ts);
684 if (ustr) {
685 add_to_string(&header, ustr);
686 mem_free(ustr);
689 add_crlf_to_string(&header);
692 switch (get_opt_int("protocol.http.referer.policy")) {
693 case REFERER_NONE:
694 /* oh well */
695 break;
697 case REFERER_FAKE:
698 optstr = get_opt_str("protocol.http.referer.fake");
699 if (!optstr[0]) break;
700 add_to_string(&header, "Referer: ");
701 add_to_string(&header, optstr);
702 add_crlf_to_string(&header);
703 break;
705 case REFERER_TRUE:
706 if (!conn->referrer) break;
707 add_to_string(&header, "Referer: ");
708 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
709 add_crlf_to_string(&header);
710 break;
712 case REFERER_SAME_URL:
713 add_to_string(&header, "Referer: ");
714 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
715 add_crlf_to_string(&header);
716 break;
719 add_to_string(&header, "Accept: */*");
720 add_crlf_to_string(&header);
722 /* TODO: Make this encoding.c function. */
723 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2)
724 add_to_string(&header, "Accept-Encoding: ");
726 #ifdef BUG_517
727 #ifdef CONFIG_BZIP2
728 add_to_string(&header, "bzip2");
729 #endif
730 #endif
732 #ifdef CONFIG_GZIP
734 #ifdef BUG_517
735 #ifdef CONFIG_BZIP2
736 add_to_string(&header, ", ");
737 #endif
738 #endif
740 add_to_string(&header, "gzip");
741 #endif
742 add_crlf_to_string(&header);
743 #endif
745 if (!accept_charset) {
746 init_accept_charset();
749 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
750 && !get_opt_bool("protocol.http.bugs.accept_charset")
751 && accept_charset) {
752 add_to_string(&header, accept_charset);
755 optstr = get_opt_str("protocol.http.accept_language");
756 if (optstr[0]) {
757 add_to_string(&header, "Accept-Language: ");
758 add_to_string(&header, optstr);
759 add_crlf_to_string(&header);
761 #ifdef CONFIG_NLS
762 else if (get_opt_bool("protocol.http.accept_ui_language")) {
763 unsigned char *code = language_to_iso639(current_language);
765 if (code) {
766 add_to_string(&header, "Accept-Language: ");
767 add_to_string(&header, code);
768 add_crlf_to_string(&header);
771 #endif
773 /* FIXME: What about post-HTTP/1.1?? --Zas */
774 if (HTTP_1_1(http->sent_version)) {
775 if (!IS_PROXY_URI(conn->uri)) {
776 add_to_string(&header, "Connection: ");
777 } else {
778 add_to_string(&header, "Proxy-Connection: ");
781 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
782 add_to_string(&header, "Keep-Alive");
783 } else {
784 add_to_string(&header, "close");
786 add_crlf_to_string(&header);
789 if (conn->cached) {
790 if (!conn->cached->incomplete && conn->cached->head
791 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
792 if (conn->cached->last_modified) {
793 add_to_string(&header, "If-Modified-Since: ");
794 add_to_string(&header, conn->cached->last_modified);
795 add_crlf_to_string(&header);
797 if (conn->cached->etag) {
798 add_to_string(&header, "If-None-Match: ");
799 add_to_string(&header, conn->cached->etag);
800 add_crlf_to_string(&header);
805 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
806 add_to_string(&header, "Pragma: no-cache");
807 add_crlf_to_string(&header);
808 add_to_string(&header, "Cache-Control: no-cache");
809 add_crlf_to_string(&header);
812 if (conn->from || conn->progress->start > 0) {
813 /* conn->from takes precedence. conn->progress.start is set only the first
814 * time, then conn->from gets updated and in case of any retries
815 * etc we have everything interesting in conn->from already. */
816 add_to_string(&header, "Range: bytes=");
817 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
818 add_char_to_string(&header, '-');
819 add_crlf_to_string(&header);
822 #ifdef CONFIG_GSSAPI
823 if (http_negotiate_output(uri, &header) != 0)
824 #endif
825 entry = find_auth(uri);
827 if (entry) {
828 if (entry->digest) {
829 unsigned char *response;
831 response = get_http_auth_digest_response(entry, uri);
832 if (response) {
833 add_to_string(&header, "Authorization: Digest ");
834 add_to_string(&header, response);
835 add_crlf_to_string(&header);
837 mem_free(response);
840 } else {
841 /* RFC2617 section 2 [Basic Authentication Scheme]
843 * To receive authorization, the client sends the userid
844 * and password, separated by a single colon (":")
845 * character, within a base64 [7] encoded string in the
846 * credentials. */
847 unsigned char *id;
849 /* Create base64 encoded string. */
850 id = straconcat(entry->user, ":", entry->password, NULL);
851 if (id) {
852 unsigned char *base64 = base64_encode(id);
854 mem_free_set(&id, base64);
857 if (id) {
858 add_to_string(&header, "Authorization: Basic ");
859 add_to_string(&header, id);
860 add_crlf_to_string(&header);
861 mem_free(id);
866 if (uri->post) {
867 /* We search for first '\n' in uri->post to get content type
868 * as set by get_form_uri(). This '\n' is dropped if any
869 * and replaced by correct '\r\n' termination here. */
870 unsigned char *postend = strchr(uri->post, '\n');
872 if (postend) {
873 add_to_string(&header, "Content-Type: ");
874 add_bytes_to_string(&header, uri->post, postend - uri->post);
875 add_crlf_to_string(&header);
878 post_data = postend ? postend + 1 : uri->post;
879 add_to_string(&header, "Content-Length: ");
880 add_long_to_string(&header, strlen(post_data) / 2);
881 add_crlf_to_string(&header);
884 #ifdef CONFIG_COOKIES
886 struct string *cookies = send_cookies(uri);
888 if (cookies) {
889 add_to_string(&header, "Cookie: ");
890 add_string_to_string(&header, cookies);
891 add_crlf_to_string(&header);
892 done_string(cookies);
895 #endif
897 add_crlf_to_string(&header);
899 if (post_data) {
900 #define POST_BUFFER_SIZE 4096
901 unsigned char *post = post_data;
902 unsigned char buffer[POST_BUFFER_SIZE];
903 int n = 0;
905 while (post[0] && post[1]) {
906 int h1, h2;
908 h1 = unhx(post[0]);
909 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
910 if_assert_failed h1 = 0;
912 h2 = unhx(post[1]);
913 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
914 if_assert_failed h2 = 0;
916 buffer[n++] = (h1<<4) + h2;
917 post += 2;
918 if (n == POST_BUFFER_SIZE) {
919 add_bytes_to_string(&header, buffer, n);
920 n = 0;
924 if (n)
925 add_bytes_to_string(&header, buffer, n);
926 #undef POST_BUFFER_SIZE
929 request_from_socket(socket, header.source, header.length, S_SENT,
930 SOCKET_END_ONCLOSE, http_got_header);
931 done_string(&header);
935 /* This function decompresses the data block given in @data (if it was
936 * compressed), which is long @len bytes. The decompressed data block is given
937 * back to the world as the return value and its length is stored into
938 * @new_len.
940 * In this function, value of either http->chunk_remaining or http->length is
941 * being changed (it depends on if chunked mode is used or not).
943 * Note that the function is still a little esotheric for me. Don't take it
944 * lightly and don't mess with it without grave reason! If you dare to touch
945 * this without testing the changes on slashdot, freshmeat and cvsweb
946 * (including revision history), don't dare to send me any patches! ;) --pasky
948 * This function gotta die. */
949 static unsigned char *
950 decompress_data(struct connection *conn, unsigned char *data, int len,
951 int *new_len)
953 struct http_connection_info *http = conn->info;
954 /* to_read is number of bytes to be read from the decoder. It is 65536
955 * (then we are just emptying the decoder buffer as we finished the walk
956 * through the incoming stream already) or PIPE_BUF / 2 (when we are
957 * still walking through the stream - then we write PIPE_BUF / 2 to the
958 * pipe and read it back to the decoder ASAP; the point is that we can't
959 * write more than PIPE_BUF to the pipe at once, but we also have to
960 * never let read_encoded() (gzread(), in fact) to empty the pipe - that
961 * causes further malfunction of zlib :[ ... so we will make sure that
962 * we will always have at least PIPE_BUF / 2 + 1 in the pipe (returning
963 * early otherwise)). */
964 enum { NORMAL, FINISHING } state = NORMAL;
965 int did_read = 0;
966 int *length_of_block;
967 unsigned char *output = NULL;
969 length_of_block = (http->length == LEN_CHUNKED ? &http->chunk_remaining
970 : &http->length);
972 #define BIG_READ 65536
973 if (!*length_of_block) {
974 /* Going to finish this decoding bussiness. */
975 state = FINISHING;
978 if (conn->content_encoding == ENCODING_NONE) {
979 *new_len = len;
980 if (*length_of_block > 0) *length_of_block -= len;
981 return data;
984 *new_len = 0; /* new_len must be zero if we would ever return NULL */
986 if (conn->stream_pipes[0] == -1
987 && (c_pipe(conn->stream_pipes) < 0
988 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
989 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
990 return NULL;
993 do {
994 /* The initial value is used only when state == NORMAL.
995 * Unconditional initialization avoids a GCC warning. */
996 int to_read = PIPE_BUF / 2;
998 if (state == NORMAL) {
999 /* ... we aren't finishing yet. */
1000 int written;
1002 written = safe_write(conn->stream_pipes[1], data,
1003 len > to_read ? to_read : len);
1005 if (written > 0) {
1006 data += written;
1007 len -= written;
1009 /* In non-keep-alive connections http->length == -1, so the test below */
1010 if (*length_of_block > 0)
1011 *length_of_block -= written;
1012 /* http->length is 0 at the end of block for all modes: keep-alive,
1013 * non-keep-alive and chunked */
1014 if (!http->length) {
1015 /* That's all, folks - let's finish this. */
1016 state = FINISHING;
1017 } else if (!len) {
1018 /* We've done for this round (but not done
1019 * completely). Thus we will get out with
1020 * what we have and leave what we wrote to
1021 * the next round - we have to do that since
1022 * we MUST NOT ever empty the pipe completely
1023 * - this would cause a disaster for
1024 * read_encoded(), which would simply not
1025 * work right then. */
1026 return output;
1031 if (!conn->stream) {
1032 conn->stream = open_encoded(conn->stream_pipes[0],
1033 conn->content_encoding);
1034 if (!conn->stream) return NULL;
1037 output = (unsigned char *) mem_realloc(output, *new_len + BIG_READ);
1038 if (!output) break;
1040 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1042 if (did_read > 0) *new_len += did_read;
1043 else if (did_read == -1) {
1044 mem_free_set(&output, NULL);
1045 *new_len = 0;
1046 break; /* Loop prevention (bug 517), is this correct ? --Zas */
1048 } while (len || did_read == BIG_READ);
1050 shutdown_connection_stream(conn);
1051 return output;
1054 static int
1055 is_line_in_buffer(struct read_buffer *rb)
1057 int l;
1059 for (l = 0; l < rb->length; l++) {
1060 unsigned char a0 = rb->data[l];
1062 if (a0 == ASCII_LF)
1063 return l + 1;
1064 if (a0 == ASCII_CR) {
1065 if (rb->data[l + 1] == ASCII_LF
1066 && l < rb->length - 1)
1067 return l + 2;
1068 if (l == rb->length - 1)
1069 return 0;
1071 if (a0 < ' ')
1072 return -1;
1074 return 0;
1077 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1079 static void
1080 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1081 int already_got_anything)
1083 enum connection_state state = already_got_anything ? S_TRANS : conn->state;
1085 read_from_socket(conn->socket, rb, state, read_http_data);
1088 static void
1089 read_http_data_done(struct connection *conn)
1091 struct http_connection_info *http = conn->info;
1093 /* There's no content but an error so just print
1094 * that instead of nothing. */
1095 if (!conn->from) {
1096 if (http->code >= 400) {
1097 http_error_document(conn, http->code);
1099 } else {
1100 /* This is not an error, thus fine. No need generate any
1101 * document, as this may be empty and it's not a problem.
1102 * In case of 3xx, we're probably just getting kicked to
1103 * another page anyway. And in case of 2xx, the document
1104 * may indeed be empty and thus the user should see it so. */
1108 http_end_request(conn, S_OK, 0);
1111 /* Returns:
1112 * -1 on error
1113 * 0 if more to read
1114 * 1 if done
1116 static int
1117 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1119 struct http_connection_info *http = conn->info;
1120 int total_data_len = 0;
1122 while (1) {
1123 /* Chunked. Good luck! */
1124 /* See RFC2616, section 3.6.1. Basically, it looks like:
1125 * 1234 ; a = b ; c = d\r\n
1126 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1127 * 0\r\n
1128 * \r\n */
1129 if (http->chunk_remaining == CHUNK_DATA_END) {
1130 int l = is_line_in_buffer(rb);
1132 if (l) {
1133 if (l == -1) {
1134 /* Invalid character in buffer. */
1135 return -1;
1138 /* Remove everything to the EOLN. */
1139 kill_buffer_data(rb, l);
1140 if (l <= 2) {
1141 /* Empty line. */
1142 return 2;
1144 continue;
1147 } else if (http->chunk_remaining == CHUNK_SIZE) {
1148 int l = is_line_in_buffer(rb);
1150 if (l) {
1151 unsigned char *de;
1152 int n = 0;
1154 if (l != -1) {
1155 errno = 0;
1156 n = strtol(rb->data, (char **) &de, 16);
1157 if (errno || !*de) {
1158 return -1;
1162 if (l == -1 || de == rb->data) {
1163 return -1;
1166 /* Remove everything to the EOLN. */
1167 kill_buffer_data(rb, l);
1168 http->chunk_remaining = n;
1169 if (!http->chunk_remaining)
1170 http->chunk_remaining = CHUNK_ZERO_SIZE;
1171 continue;
1174 } else {
1175 unsigned char *data;
1176 int data_len;
1177 int len;
1178 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1180 if (zero) http->chunk_remaining = 0;
1181 len = http->chunk_remaining;
1183 /* Maybe everything necessary didn't come yet.. */
1184 int_upper_bound(&len, rb->length);
1185 conn->received += len;
1187 data = decompress_data(conn, rb->data, len, &data_len);
1189 if (add_fragment(conn->cached, conn->from,
1190 data, data_len) == 1)
1191 conn->tries = 0;
1193 if (data && data != rb->data) mem_free(data);
1195 conn->from += data_len;
1196 total_data_len += data_len;
1198 kill_buffer_data(rb, len);
1200 if (zero) {
1201 /* Last chunk has zero length, so this is last
1202 * chunk, we finished decompression just now
1203 * and now we can happily finish reading this
1204 * stuff. */
1205 http->chunk_remaining = CHUNK_DATA_END;
1206 continue;
1209 if (!http->chunk_remaining && rb->length > 0) {
1210 /* Eat newline succeeding each chunk. */
1211 if (rb->data[0] == ASCII_LF) {
1212 kill_buffer_data(rb, 1);
1213 } else {
1214 if (rb->data[0] != ASCII_CR
1215 || (rb->length >= 2
1216 && rb->data[1] != ASCII_LF)) {
1217 return -1;
1219 if (rb->length < 2) break;
1220 kill_buffer_data(rb, 2);
1222 http->chunk_remaining = CHUNK_SIZE;
1223 continue;
1226 break;
1229 /* More to read. */
1230 return !!total_data_len;
1233 /* Returns 0 if more data, 1 if done. */
1234 static int
1235 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1237 struct http_connection_info *http = conn->info;
1238 unsigned char *data;
1239 int data_len;
1240 int len = rb->length;
1242 if (http->length >= 0 && http->length < len) {
1243 /* We won't read more than we have to go. */
1244 len = http->length;
1247 conn->received += len;
1249 data = decompress_data(conn, rb->data, len, &data_len);
1251 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1252 conn->tries = 0;
1254 if (data && data != rb->data) mem_free(data);
1256 conn->from += data_len;
1258 kill_buffer_data(rb, len);
1260 if (!http->length && conn->socket->state == SOCKET_RETRY_ONCLOSE) {
1261 return 2;
1264 return !!data_len;
1267 static void
1268 read_http_data(struct socket *socket, struct read_buffer *rb)
1270 struct connection *conn = socket->conn;
1271 struct http_connection_info *http = conn->info;
1272 int ret;
1274 if (socket->state == SOCKET_CLOSED) {
1275 if (conn->content_encoding && http->length == -1) {
1276 /* Flush decompression first. */
1277 http->length = 0;
1278 } else {
1279 read_http_data_done(conn);
1280 return;
1284 if (http->length != LEN_CHUNKED) {
1285 ret = read_normal_http_data(conn, rb);
1287 } else {
1288 ret = read_chunked_http_data(conn, rb);
1291 switch (ret) {
1292 case 0:
1293 read_more_http_data(conn, rb, 0);
1294 break;
1295 case 1:
1296 read_more_http_data(conn, rb, 1);
1297 break;
1298 case 2:
1299 read_http_data_done(conn);
1300 break;
1301 default:
1302 assertm(ret == -1, "Unexpected return value: %d", ret);
1303 abort_connection(conn, S_HTTP_ERROR);
1307 /* Returns offset of the header end, zero if more data is needed, -1 when
1308 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1309 * come. */
1310 static int
1311 get_header(struct read_buffer *rb)
1313 int i;
1315 /* XXX: We will have to do some guess about whether an HTTP header is
1316 * coming or not, in order to support HTTP/0.9 reply correctly. This
1317 * means a little code duplication with get_http_code(). --pasky */
1318 if (rb->length > 4 && strncasecmp(rb->data, "HTTP/", 5))
1319 return -2;
1321 for (i = 0; i < rb->length; i++) {
1322 unsigned char a0 = rb->data[i];
1323 unsigned char a1 = rb->data[i + 1];
1325 if (a0 == 0) {
1326 rb->data[i] = ' ';
1327 continue;
1329 if (a0 == ASCII_LF && a1 == ASCII_LF
1330 && i < rb->length - 1)
1331 return i + 2;
1332 if (a0 == ASCII_CR && i < rb->length - 3) {
1333 if (a1 == ASCII_CR) continue;
1334 if (a1 != ASCII_LF) return -1;
1335 if (rb->data[i + 2] == ASCII_CR) {
1336 if (rb->data[i + 3] != ASCII_LF) return -1;
1337 return i + 4;
1342 return 0;
1345 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1346 static int
1347 check_http_authentication(struct connection *conn, struct uri *uri,
1348 unsigned char *header, unsigned char *header_field)
1350 unsigned char *str, *d;
1351 int ret = 0;
1353 d = parse_header(header, header_field, &str);
1354 while (d) {
1355 if (!strncasecmp(d, "Basic", 5)) {
1356 unsigned char *realm = get_header_param(d, "realm");
1358 if (realm) {
1359 add_auth_entry(uri, realm, NULL, NULL, 0);
1360 mem_free(realm);
1361 mem_free(d);
1362 break;
1364 } else if (!strncasecmp(d, "Digest", 6)) {
1365 unsigned char *realm = get_header_param(d, "realm");
1366 unsigned char *nonce = get_header_param(d, "nonce");
1367 unsigned char *opaque = get_header_param(d, "opaque");
1369 add_auth_entry(uri, realm, nonce, opaque, 1);
1371 mem_free_if(realm);
1372 mem_free_if(nonce);
1373 mem_free_if(opaque);
1374 mem_free(d);
1375 break;
1377 #ifdef CONFIG_GSSAPI
1378 else if (!strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1379 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1380 ret = 1;
1381 mem_free(d);
1382 break;
1384 else if (!strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1385 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1386 ret = 1;
1387 mem_free(d);
1388 break;
1390 #endif
1391 mem_free(d);
1392 d = parse_header(str, header_field, &str);
1394 return ret;
1398 void
1399 http_got_header(struct socket *socket, struct read_buffer *rb)
1401 struct connection *conn = socket->conn;
1402 struct http_connection_info *http = conn->info;
1403 unsigned char *head;
1404 #ifdef CONFIG_COOKIES
1405 unsigned char *cookie, *ch;
1406 #endif
1407 unsigned char *d;
1408 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1409 struct http_version version;
1410 enum connection_state state = (conn->state != S_PROC ? S_GETH : S_PROC);
1411 int a, h = 200;
1412 int cf;
1414 if (socket->state == SOCKET_CLOSED) {
1415 if (!conn->tries && uri->host) {
1416 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1417 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1418 } else {
1419 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1420 conn->tries = -1;
1423 retry_connection(conn, S_CANT_READ);
1424 return;
1426 socket->state = SOCKET_RETRY_ONCLOSE;
1428 again:
1429 a = get_header(rb);
1430 if (a == -1) {
1431 abort_connection(conn, S_HTTP_ERROR);
1432 return;
1434 if (!a) {
1435 read_from_socket(conn->socket, rb, state, http_got_header);
1436 return;
1438 if (a == -2) a = 0;
1439 if ((a && get_http_code(rb, &h, &version))
1440 || h == 101) {
1441 abort_connection(conn, S_HTTP_ERROR);
1442 return;
1445 /* When no header, HTTP/0.9 document. That's always text/html,
1446 * according to
1447 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1448 /* FIXME: This usage of fake protocol headers for setting up the
1449 * content type has been obsoleted by the @content_type member of
1450 * {struct cache_entry}. */
1451 head = (a ? memacpy(rb->data, a)
1452 : stracpy("\r\nContent-Type: text/html\r\n"));
1453 if (!head) {
1454 abort_connection(conn, S_OUT_OF_MEM);
1455 return;
1458 if (check_http_server_bugs(uri, http, head)) {
1459 mem_free(head);
1460 retry_connection(conn, S_RESTART);
1461 return;
1464 #ifdef CONFIG_CGI
1465 if (uri->protocol == PROTOCOL_FILE) {
1466 /* ``Status'' is not a standard HTTP header field although some
1467 * HTTP servers like www.php.net uses it for some reason. It should
1468 * only be used for CGI scripts so that it does not interfere
1469 * with status code depended handling for ``normal'' HTTP like
1470 * redirects. */
1471 d = parse_header(head, "Status", NULL);
1472 if (d) {
1473 int h2 = atoi(d);
1475 mem_free(d);
1476 if (h2 >= 100 && h2 < 600) h = h2;
1477 if (h == 101) {
1478 mem_free(head);
1479 abort_connection(conn, S_HTTP_ERROR);
1480 return;
1484 #endif
1486 #ifdef CONFIG_COOKIES
1487 ch = head;
1488 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1489 set_cookie(uri, cookie);
1490 mem_free(cookie);
1492 #endif
1493 http->code = h;
1495 if (h == 100) {
1496 mem_free(head);
1497 state = S_PROC;
1498 kill_buffer_data(rb, a);
1499 goto again;
1501 if (h < 200) {
1502 mem_free(head);
1503 abort_connection(conn, S_HTTP_ERROR);
1504 return;
1506 if (h == 304) {
1507 mem_free(head);
1508 http_end_request(conn, S_OK, 1);
1509 return;
1511 if (h == 204) {
1512 mem_free(head);
1513 http_end_request(conn, S_HTTP_204, 0);
1514 return;
1516 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1517 #ifdef CONFIG_SSL
1518 mem_free(head);
1519 socket->need_ssl = 1;
1520 complete_connect_socket(socket, uri, http_send_header);
1521 #else
1522 abort_connection(conn, S_SSL_ERROR);
1523 #endif
1524 return;
1527 conn->cached = get_cache_entry(conn->uri);
1528 if (!conn->cached) {
1529 mem_free(head);
1530 abort_connection(conn, S_OUT_OF_MEM);
1531 return;
1533 mem_free_set(&conn->cached->head, head);
1535 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1536 struct cache_entry *cached = conn->cached;
1538 /* I am not entirely sure in what order we should process these
1539 * headers and if we should still process Cache-Control max-age
1540 * if we already set max age to date mentioned in Expires.
1541 * --jonas */
1542 /* Ensure that when ever cached->max_age is set, cached->expired
1543 * is also set, so the cache management knows max_age contains a
1544 * valid time. If on the other hand no caching is requested
1545 * cached->expire should be set to zero. */
1546 if ((d = parse_header(cached->head, "Expires", NULL))) {
1547 /* Convert date to seconds. */
1548 time_t expires = parse_date(&d, NULL, 0, 1);
1550 mem_free(d);
1552 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1553 timeval_from_seconds(&cached->max_age, expires);
1554 cached->expire = 1;
1558 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1559 if (strstr(d, "no-cache")) {
1560 cached->cache_mode = CACHE_MODE_NEVER;
1561 cached->expire = 0;
1563 mem_free(d);
1566 if (cached->cache_mode != CACHE_MODE_NEVER
1567 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1568 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1569 cached->cache_mode = CACHE_MODE_NEVER;
1570 cached->expire = 0;
1572 } else {
1573 unsigned char *pos = strstr(d, "max-age=");
1575 assert(cached->cache_mode != CACHE_MODE_NEVER);
1577 if (pos) {
1578 /* Grab the number of seconds. */
1579 timeval_T max_age;
1581 timeval_from_seconds(&max_age, atol(pos + 8));
1582 timeval_now(&cached->max_age);
1583 timeval_add_interval(&cached->max_age, &max_age);
1585 cached->expire = 1;
1589 mem_free(d);
1593 /* XXX: Is there some reason why NOT to follow the Location header
1594 * for any status? If the server didn't mean it, it wouldn't send
1595 * it, after all...? --pasky */
1596 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1597 d = parse_header(conn->cached->head, "Location", NULL);
1598 if (d) {
1599 int use_get_method = (h == 303);
1601 /* A note from RFC 2616 section 10.3.3:
1602 * RFC 1945 and RFC 2068 specify that the client is not
1603 * allowed to change the method on the redirected
1604 * request. However, most existing user agent
1605 * implementations treat 302 as if it were a 303
1606 * response, performing a GET on the Location
1607 * field-value regardless of the original request
1608 * method. */
1609 /* So POST must not be redirected to GET, but some
1610 * BUGGY message boards rely on it :-( */
1611 if (h == 302
1612 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1613 use_get_method = 1;
1615 redirect_cache(conn->cached, d, use_get_method, -1);
1616 mem_free(d);
1620 if (h == 401) {
1621 if (check_http_authentication(conn, uri,
1622 conn->cached->head, "WWW-Authenticate")) {
1623 retry_connection(conn, S_RESTART);
1624 return;
1628 if (h == 407) {
1629 unsigned char *str;
1631 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1632 while (d) {
1633 if (!strncasecmp(d, "Basic", 5)) {
1634 unsigned char *realm = get_header_param(d, "realm");
1636 if (realm) {
1637 mem_free_set(&proxy_auth.realm, realm);
1638 proxy_auth.digest = 0;
1639 mem_free(d);
1640 break;
1643 } else if (!strncasecmp(d, "Digest", 6)) {
1644 unsigned char *realm = get_header_param(d, "realm");
1645 unsigned char *nonce = get_header_param(d, "nonce");
1646 unsigned char *opaque = get_header_param(d, "opaque");
1648 mem_free_set(&proxy_auth.realm, realm);
1649 mem_free_set(&proxy_auth.nonce, nonce);
1650 mem_free_set(&proxy_auth.opaque, opaque);
1651 proxy_auth.digest = 1;
1653 mem_free(d);
1654 break;
1657 mem_free(d);
1658 d = parse_header(str, "Proxy-Authenticate", &str);
1662 kill_buffer_data(rb, a);
1663 http->close = 0;
1664 http->length = -1;
1665 http->recv_version = version;
1667 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1668 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1669 if (!strcasecmp(d, "close")) http->close = 1;
1670 mem_free(d);
1671 } else if (PRE_HTTP_1_1(version)) {
1672 http->close = 1;
1675 cf = conn->from;
1676 conn->from = 0;
1677 d = parse_header(conn->cached->head, "Content-Range", NULL);
1678 if (d) {
1679 if (strlen(d) > 6) {
1680 d[5] = 0;
1681 if (isdigit(d[6]) && !strcasecmp(d, "bytes")) {
1682 int f;
1684 errno = 0;
1685 f = strtol(d + 6, NULL, 10);
1687 if (!errno && f >= 0) conn->from = f;
1690 mem_free(d);
1692 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1693 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1694 /* We don't want this if conn->progress.start because then conn->from will
1695 * be probably value of conn->progress.start, while cf is 0. */
1696 abort_connection(conn, S_HTTP_ERROR);
1697 return;
1700 #if 0
1702 struct status *s;
1703 foreach (s, conn->downloads) {
1704 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1705 conn, s, s->pri, s->state, s->prev_error,
1706 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1709 #endif
1711 if (conn->progress->start >= 0) {
1712 /* Update to the real value which we've got from Content-Range. */
1713 conn->progress->seek = conn->from;
1715 conn->progress->start = conn->from;
1717 d = parse_header(conn->cached->head, "Content-Length", NULL);
1718 if (d) {
1719 unsigned char *ep;
1720 int l;
1722 errno = 0;
1723 l = strtol(d, (char **) &ep, 10);
1725 if (!errno && !*ep && l >= 0) {
1726 if (!http->close || POST_HTTP_1_0(version))
1727 http->length = l;
1728 conn->est_length = conn->from + l;
1730 mem_free(d);
1733 if (!conn->unrestartable) {
1734 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1736 if (d) {
1737 if (!strcasecmp(d, "none"))
1738 conn->unrestartable = 1;
1739 mem_free(d);
1740 } else {
1741 if (!conn->from)
1742 conn->unrestartable = 1;
1746 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1747 if (d) {
1748 if (!strcasecmp(d, "chunked")) {
1749 http->length = LEN_CHUNKED;
1750 http->chunk_remaining = CHUNK_SIZE;
1752 mem_free(d);
1754 if (!http->close && http->length == -1) http->close = 1;
1756 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1757 if (d) {
1758 if (conn->cached->last_modified && strcasecmp(conn->cached->last_modified, d)) {
1759 delete_entry_content(conn->cached);
1760 if (conn->from) {
1761 conn->from = 0;
1762 mem_free(d);
1763 retry_connection(conn, S_MODIFIED);
1764 return;
1767 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1768 else mem_free(d);
1770 if (!conn->cached->last_modified) {
1771 d = parse_header(conn->cached->head, "Date", NULL);
1772 if (d) conn->cached->last_modified = d;
1775 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1776 d = parse_header(conn->cached->head, "ETag", NULL);
1777 if (d) {
1778 if (conn->cached->etag) {
1779 unsigned char *old_tag = conn->cached->etag;
1780 unsigned char *new_tag = d;
1782 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1784 if (new_tag[0] == 'W' && new_tag[1] == '/')
1785 new_tag += 2;
1787 if (old_tag[0] == 'W' && old_tag[1] == '/')
1788 old_tag += 2;
1790 if (strcmp(new_tag, old_tag)) {
1791 delete_entry_content(conn->cached);
1792 if (conn->from) {
1793 conn->from = 0;
1794 mem_free(d);
1795 retry_connection(conn, S_MODIFIED);
1796 return;
1801 if (!conn->cached->etag)
1802 conn->cached->etag = d;
1803 else
1804 mem_free(d);
1807 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1808 if (d) {
1809 unsigned char *extension = get_extension_from_uri(uri);
1810 enum stream_encoding file_encoding;
1812 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1813 mem_free_if(extension);
1815 /* If the content is encoded, we want to preserve the encoding
1816 * if it is implied by the extension, so that saving the URI
1817 * will leave the saved file with the correct encoding. */
1818 #ifdef CONFIG_GZIP
1819 if (file_encoding != ENCODING_GZIP
1820 && (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip")))
1821 conn->content_encoding = ENCODING_GZIP;
1822 #endif
1823 #ifdef BUG_517
1824 #ifdef CONFIG_BZIP2
1825 if (file_encoding != ENCODING_BZIP2
1826 && (!strcasecmp(d, "bzip2") || !strcasecmp(d, "x-bzip2")))
1827 conn->content_encoding = ENCODING_BZIP2;
1828 #endif
1829 #endif
1830 mem_free(d);
1833 if (conn->content_encoding != ENCODING_NONE) {
1834 mem_free_if(conn->cached->encoding_info);
1835 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1838 if (http->length == -1
1839 || (PRE_HTTP_1_1(http->recv_version) && http->close))
1840 socket->state = SOCKET_END_ONCLOSE;
1842 read_http_data(socket, rb);