move-link-up-line: segfault when cursor was below last line.
[elinks.git] / src / protocol / http / http.c
blobbf675121c98a599ddc35a4a46d84eee43a58c858
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
50 #ifdef CONFIG_GSSAPI
51 #include "http_negotiate.h"
52 #endif
54 struct http_version {
55 int major;
56 int minor;
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info {
69 enum blacklist_flags bl_flags;
70 struct http_version recv_version;
71 struct http_version sent_version;
73 int close;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
77 int length;
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
82 #define CHUNK_SIZE -1
83 int chunk_remaining;
85 int code;
89 static struct auth_entry proxy_auth;
91 static unsigned char *accept_charset = NULL;
94 static struct option_info http_options[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
96 "http", 0,
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
101 "bugs", 0,
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it can trigger\n"
107 "bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
116 "This is a problem for a lot of web discussion boards and the like.\n"
117 "If they will do strange things to you, try to play with this.")),
119 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
120 "post_no_keepalive", 0, 0,
121 N_("Disable keepalive connection after POST request.")),
123 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
124 "http10", 0, 0,
125 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
127 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
128 "proxy", 0,
129 N_("HTTP proxy configuration.")),
131 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
132 "host", 0, "",
133 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
134 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
136 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
137 "user", 0, "",
138 N_("Proxy authentication username.")),
140 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
141 "passwd", 0, "",
142 N_("Proxy authentication password.")),
145 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
146 "referer", 0,
147 N_("HTTP referer sending options. HTTP referer is a special header\n"
148 "sent in the HTTP requests, which is supposed to contain the previous\n"
149 "page visited by the browser. This way, the server can know what link\n"
150 "did you follow when accessing that page. However, this behaviour\n"
151 "can unfortunately considerably affect privacy and can lead even to a\n"
152 "security problem on some badly designed web pages.")),
154 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
155 "policy", 0,
156 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
157 N_("Mode of sending HTTP referer:\n"
158 "0 is send no referer\n"
159 "1 is send current URL as referer\n"
160 "2 is send fixed fake referer\n"
161 "3 is send previous URL as referer (correct, but insecure)")),
163 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
164 "fake", 0, "",
165 N_("Fake referer to be sent when policy is 2.")),
168 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
169 "accept_language", 0, "",
170 N_("Send Accept-Language header.")),
172 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
173 "accept_ui_language", 0, 1,
174 N_("Request localised versions of documents from web-servers (using the\n"
175 "Accept-Language header) using the language you have configured for\n"
176 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
177 "value available to scripts). Note that some see this as a potential\n"
178 "security risk because it tells web-masters and the FBI sniffers about\n"
179 "your language preference.")),
181 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
182 "trace", 0, 0,
183 N_("If active, all HTTP requests are sent with TRACE as their method\n"
184 "rather than GET or POST. This is useful for debugging of both ELinks\n"
185 "and various server-side scripts --- the server only returns the client's\n"
186 "request back to the client verbatim. Note that this type of request may\n"
187 "not be enabled on all servers.")),
189 /* OSNews.com is supposed to be relying on the textmode token, at least. */
190 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
191 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
192 N_("Change the User Agent ID. That means identification string, which\n"
193 "is sent to HTTP server when a document is requested. The 'textmode'\n"
194 "token in the first field is our silent attempt to establish this as\n"
195 "a standard for new textmode user agents, so that the webmasters can\n"
196 "have just a single uniform test for these if they are e.g. pushing\n"
197 "some lite version to them automagically.\n"
198 "%v in the string means ELinks version\n"
199 "%s in the string means system identification\n"
200 "%t in the string means size of the terminal\n"
201 "%b in the string means number of bars displayed by ELinks\n"
202 "Use \" \" if you don't want any User-Agent header to be sent at all.")),
205 INIT_OPT_TREE("protocol", N_("HTTPS"),
206 "https", 0,
207 N_("HTTPS-specific options.")),
209 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
210 "proxy", 0,
211 N_("HTTPS proxy configuration.")),
213 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
214 "host", 0, "",
215 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
216 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
217 NULL_OPTION_INFO,
220 static void done_http();
222 struct module http_protocol_module = struct_module(
223 /* name: */ N_("HTTP"),
224 /* options: */ http_options,
225 /* hooks: */ NULL,
226 /* submodules: */ NULL,
227 /* data: */ NULL,
228 /* init: */ NULL,
229 /* done: */ done_http
233 static void
234 done_http(void)
236 mem_free_if(proxy_auth.realm);
237 mem_free_if(proxy_auth.nonce);
238 mem_free_if(proxy_auth.opaque);
240 free_blacklist();
242 if (accept_charset)
243 mem_free(accept_charset);
246 static void
247 init_accept_charset(void)
249 struct string ac;
250 unsigned char *cs;
251 int i;
253 if (!init_string(&ac)) return;
255 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
256 if (ac.length) {
257 add_to_string(&ac, ", ");
258 } else {
259 add_to_string(&ac, "Accept-Charset: ");
261 add_to_string(&ac, cs);
264 if (ac.length) {
265 add_crlf_to_string(&ac);
268 accept_charset = squeezastring(&ac);
270 done_string(&ac);
274 unsigned char *
275 subst_user_agent(unsigned char *fmt, unsigned char *version,
276 unsigned char *sysname, unsigned char *termsize)
278 struct string agent;
280 if (!init_string(&agent)) return NULL;
282 while (*fmt) {
283 int p;
285 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
287 add_bytes_to_string(&agent, fmt, p);
288 fmt += p;
290 if (*fmt != '%') continue;
292 fmt++;
293 switch (*fmt) {
294 case 'b':
295 if (!list_empty(sessions)) {
296 unsigned char bs[4] = "";
297 int blen = 0;
298 struct session *ses = sessions.prev;
299 int bars = ses->status.show_status_bar
300 + ses->status.show_tabs_bar
301 + ses->status.show_title_bar;
303 ulongcat(bs, &blen, bars, 2, 0);
304 add_to_string(&agent, bs);
306 break;
307 case 'v':
308 add_to_string(&agent, version);
309 break;
310 case 's':
311 add_to_string(&agent, sysname);
312 break;
313 case 't':
314 if (termsize)
315 add_to_string(&agent, termsize);
316 break;
317 default:
318 add_bytes_to_string(&agent, fmt - 1, 2);
319 break;
321 if (*fmt) fmt++;
324 return agent.source;
327 static void
328 add_url_to_http_string(struct string *header, struct uri *uri, int components)
330 /* This block substitues spaces in URL by %20s. This is
331 * certainly not the right place where to do it, but now the
332 * behaviour is at least improved compared to what we had
333 * before. We should probably encode all URLs as early as
334 * possible, and possibly decode them back in protocol
335 * backends. --pasky */
336 unsigned char *string = get_uri_string(uri, components);
337 unsigned char *data = string;
339 if (!string) return;
341 while (*data) {
342 int len = strcspn(data, " \t\r\n\\");
344 add_bytes_to_string(header, data, len);
346 if (!data[len]) break;
348 if (data[len++] == '\\')
349 add_char_to_string(header, '/');
350 else
351 add_to_string(header, "%20");
353 data += len;
356 mem_free(string);
359 /* Parse from @end - 1 to @start and set *@value to integer found.
360 * It returns -1 if not a number, 0 otherwise.
361 * @end should be > @start. */
362 static int
363 revstr2num(unsigned char *start, unsigned char *end, int *value)
365 int q = 1, val = 0;
367 do {
368 --end;
369 if (!isdigit(*end)) return -1; /* NaN */
370 val += (*end - '0') * q;
371 q *= 10;
372 } while (end > start);
374 *value = val;
375 return 0;
378 /* This function extracts code, major and minor version from string
379 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
380 * It returns a negative value on error, 0 on success.
382 static int
383 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
385 unsigned char *head = rb->data;
386 unsigned char *start;
388 *code = 0;
389 version->major = 0;
390 version->minor = 0;
392 /* Ignore spaces. */
393 while (*head == ' ') head++;
395 /* HTTP/ */
396 if (toupper(*head) != 'H' || toupper(*++head) != 'T' ||
397 toupper(*++head) != 'T' || toupper(*++head) != 'P'
398 || *++head != '/')
399 return -1;
401 /* Version */
402 start = ++head;
403 /* Find next '.' */
404 while (*head && *head != '.') head++;
405 /* Sanity check. */
406 if (!*head || !(head - start)
407 || (head - start) > 4
408 || !isdigit(*(head + 1)))
409 return -2;
411 /* Extract major version number. */
412 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
414 start = head + 1;
416 /* Find next ' '. */
417 while (*head && *head != ' ') head++;
418 /* Sanity check. */
419 if (!*head || !(head - start) || (head - start) > 4) return -4;
421 /* Extract minor version number. */
422 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
424 /* Ignore spaces. */
425 while (*head == ' ') head++;
427 /* Sanity check for code. */
428 if (head[0] < '1' || head[0] > '9' ||
429 !isdigit(head[1]) ||
430 !isdigit(head[2]))
431 return -6; /* Invalid code. */
433 /* Extract code. */
434 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
436 return 0;
439 static int
440 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
441 unsigned char *head)
443 unsigned char *server;
444 const unsigned char *const *s;
445 static const unsigned char *const buggy_servers[] = {
446 "mod_czech/3.1.0",
447 "Purveyor",
448 "Netscape-Enterprise",
449 NULL
452 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
453 || HTTP_1_0(http->sent_version))
454 return 0;
456 server = parse_header(head, "Server", NULL);
457 if (!server)
458 return 0;
460 for (s = buggy_servers; *s; s++) {
461 if (strstr(server, *s)) {
462 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
463 break;
467 mem_free(server);
468 return (*s != NULL);
471 static void
472 http_end_request(struct connection *conn, enum connection_state state,
473 int notrunc)
475 shutdown_connection_stream(conn);
477 if (conn->info && !((struct http_connection_info *) conn->info)->close
478 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
480 || !conn->uri->post)) {
481 if (state == S_OK && conn->cached)
482 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
483 set_connection_state(conn, state);
484 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
485 } else {
486 abort_connection(conn, state);
490 static void http_send_header(struct socket *);
492 void
493 http_protocol_handler(struct connection *conn)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn)) {
498 make_connection(conn->socket, conn->uri, http_send_header,
499 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
500 } else {
501 http_send_header(conn->socket);
505 void
506 proxy_protocol_handler(struct connection *conn)
508 http_protocol_handler(conn);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 struct http_connection_info *
517 init_http_connection_info(struct connection *conn, int major, int minor, int close)
519 struct http_connection_info *http;
521 http = mem_calloc(1, sizeof(*http));
522 if (!http) {
523 http_end_request(conn, S_OUT_OF_MEM, 0);
524 return NULL;
527 http->sent_version.major = major;
528 http->sent_version.minor = minor;
529 http->close = close;
531 /* The CGI code uses this too and blacklisting expects a host name. */
532 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
533 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
535 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
536 || get_opt_bool("protocol.http.bugs.http10")) {
537 http->sent_version.major = 1;
538 http->sent_version.minor = 0;
541 /* If called from HTTPS proxy connection the connection info might have
542 * already been allocated. */
543 mem_free_set(&conn->info, http);
545 return http;
548 static void
549 http_send_header(struct socket *socket)
551 struct connection *conn = socket->conn;
552 struct http_connection_info *http;
553 int trace = get_opt_bool("protocol.http.trace");
554 struct string header;
555 unsigned char *post_data = NULL;
556 struct auth_entry *entry = NULL;
557 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
558 unsigned char *optstr;
559 int use_connect, talking_to_proxy;
561 /* Sanity check for a host */
562 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
563 http_end_request(conn, S_BAD_URL, 0);
564 return;
567 http = init_http_connection_info(conn, 1, 1, 0);
568 if (!http) return;
570 if (!init_string(&header)) {
571 http_end_request(conn, S_OUT_OF_MEM, 0);
572 return;
575 if (!conn->cached) conn->cached = find_in_cache(uri);
577 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
578 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
580 if (trace) {
581 add_to_string(&header, "TRACE ");
582 } else if (use_connect) {
583 add_to_string(&header, "CONNECT ");
584 /* In CONNECT requests, we send only a subset of the
585 * headers to the proxy. See the "CONNECT:" comments
586 * below. After the CONNECT request succeeds, we
587 * negotiate TLS with the real server and make a new
588 * HTTP request that includes all the headers. */
589 } else if (uri->post) {
590 add_to_string(&header, "POST ");
591 conn->unrestartable = 1;
592 } else {
593 add_to_string(&header, "GET ");
596 if (!talking_to_proxy) {
597 add_char_to_string(&header, '/');
600 if (use_connect) {
601 /* Add port if it was specified or the default port */
602 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
603 } else {
604 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
605 add_url_to_http_string(&header, uri, URI_DATA);
607 } else if (talking_to_proxy) {
608 add_url_to_http_string(&header, uri, URI_PROXY);
610 } else {
611 add_url_to_http_string(&header, conn->uri, URI_DATA);
615 add_to_string(&header, " HTTP/");
616 add_long_to_string(&header, http->sent_version.major);
617 add_char_to_string(&header, '.');
618 add_long_to_string(&header, http->sent_version.minor);
619 add_crlf_to_string(&header);
621 /* CONNECT: Sending a Host header seems pointless as the same
622 * information is already in the CONNECT line. It's harmless
623 * though and Mozilla does it too. */
624 add_to_string(&header, "Host: ");
625 add_uri_to_string(&header, uri, URI_HTTP_HOST);
626 add_crlf_to_string(&header);
628 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
629 if (talking_to_proxy) {
630 unsigned char *user = get_opt_str("protocol.http.proxy.user");
631 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
633 if (proxy_auth.digest) {
634 unsigned char *response;
635 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
636 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
638 if (userlen)
639 memcpy(proxy_auth.user, user, userlen);
640 proxy_auth.user[userlen] = '\0';
641 if (passwordlen)
642 memcpy(proxy_auth.password, passwd, passwordlen);
643 proxy_auth.password[passwordlen] = '\0';
645 /* FIXME: @uri is the proxied URI. Maybe the passed URI
646 * should be the proxy URI aka conn->uri. --jonas */
647 response = get_http_auth_digest_response(&proxy_auth, uri);
648 if (response) {
649 add_to_string(&header, "Proxy-Authorization: Digest ");
650 add_to_string(&header, response);
651 add_crlf_to_string(&header);
653 mem_free(response);
656 } else {
657 if (user[0]) {
658 unsigned char *proxy_data;
660 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
661 if (proxy_data) {
662 unsigned char *proxy_64 = base64_encode(proxy_data);
664 if (proxy_64) {
665 add_to_string(&header, "Proxy-Authorization: Basic ");
666 add_to_string(&header, proxy_64);
667 add_crlf_to_string(&header);
668 mem_free(proxy_64);
670 mem_free(proxy_data);
676 /* CONNECT: User-Agent does not reveal anything about the
677 * resource we're fetching, and it may help the proxy return
678 * better error messages. */
679 optstr = get_opt_str("protocol.http.user_agent");
680 if (*optstr && strcmp(optstr, " ")) {
681 unsigned char *ustr, ts[64] = "";
683 add_to_string(&header, "User-Agent: ");
685 if (!list_empty(terminals)) {
686 unsigned int tslen = 0;
687 struct terminal *term = terminals.prev;
689 ulongcat(ts, &tslen, term->width, 3, 0);
690 ts[tslen++] = 'x';
691 ulongcat(ts, &tslen, term->height, 3, 0);
693 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
694 ts);
696 if (ustr) {
697 add_to_string(&header, ustr);
698 mem_free(ustr);
701 add_crlf_to_string(&header);
704 /* CONNECT: Referer probably is a secret page in the HTTPS
705 * server, so don't reveal it to the proxy. */
706 if (!use_connect) {
707 switch (get_opt_int("protocol.http.referer.policy")) {
708 case REFERER_NONE:
709 /* oh well */
710 break;
712 case REFERER_FAKE:
713 optstr = get_opt_str("protocol.http.referer.fake");
714 if (!optstr[0]) break;
715 add_to_string(&header, "Referer: ");
716 add_to_string(&header, optstr);
717 add_crlf_to_string(&header);
718 break;
720 case REFERER_TRUE:
721 if (!conn->referrer) break;
722 add_to_string(&header, "Referer: ");
723 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
724 add_crlf_to_string(&header);
725 break;
727 case REFERER_SAME_URL:
728 add_to_string(&header, "Referer: ");
729 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
730 add_crlf_to_string(&header);
731 break;
735 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
736 * because they do not reveal anything about the resource
737 * we're going to request via TLS, and they may affect the
738 * error message if the CONNECT request fails.
740 * If ELinks is ever changed to vary its Accept headers based
741 * on what it intends to do with the returned resource, e.g.
742 * sending "Accept: text/css" when it wants an external
743 * stylesheet, then it should do that only in the inner GET
744 * and not in the outer CONNECT. */
745 add_to_string(&header, "Accept: */*");
746 add_crlf_to_string(&header);
748 /* TODO: Make this encoding.c function. */
749 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2)
750 add_to_string(&header, "Accept-Encoding: ");
752 #ifdef CONFIG_BZIP2
753 add_to_string(&header, "bzip2");
754 #endif
756 #ifdef CONFIG_GZIP
758 #ifdef CONFIG_BZIP2
759 add_to_string(&header, ", ");
760 #endif
762 add_to_string(&header, "gzip");
763 #endif
764 add_crlf_to_string(&header);
765 #endif
767 if (!accept_charset) {
768 init_accept_charset();
771 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
772 && !get_opt_bool("protocol.http.bugs.accept_charset")
773 && accept_charset) {
774 add_to_string(&header, accept_charset);
777 optstr = get_opt_str("protocol.http.accept_language");
778 if (optstr[0]) {
779 add_to_string(&header, "Accept-Language: ");
780 add_to_string(&header, optstr);
781 add_crlf_to_string(&header);
783 #ifdef CONFIG_NLS
784 else if (get_opt_bool("protocol.http.accept_ui_language")) {
785 unsigned char *code = language_to_iso639(current_language);
787 if (code) {
788 add_to_string(&header, "Accept-Language: ");
789 add_to_string(&header, code);
790 add_crlf_to_string(&header);
793 #endif
795 /* CONNECT: Proxy-Connection is intended to be seen by the
796 * proxy. If the CONNECT request succeeds, then the proxy
797 * will forward the remainder of the TCP connection to the
798 * origin server, and Proxy-Connection does not matter; but
799 * if the request fails, then Proxy-Connection may matter. */
800 /* FIXME: What about post-HTTP/1.1?? --Zas */
801 if (HTTP_1_1(http->sent_version)) {
802 if (!IS_PROXY_URI(conn->uri)) {
803 add_to_string(&header, "Connection: ");
804 } else {
805 add_to_string(&header, "Proxy-Connection: ");
808 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
809 add_to_string(&header, "Keep-Alive");
810 } else {
811 add_to_string(&header, "close");
813 add_crlf_to_string(&header);
816 /* CONNECT: Do not tell the proxy anything we have cached
817 * about the resource. */
818 if (!use_connect && conn->cached) {
819 if (!conn->cached->incomplete && conn->cached->head
820 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
821 if (conn->cached->last_modified) {
822 add_to_string(&header, "If-Modified-Since: ");
823 add_to_string(&header, conn->cached->last_modified);
824 add_crlf_to_string(&header);
826 if (conn->cached->etag) {
827 add_to_string(&header, "If-None-Match: ");
828 add_to_string(&header, conn->cached->etag);
829 add_crlf_to_string(&header);
834 /* CONNECT: Let's send cache control headers to the proxy too;
835 * they may affect DNS caching. */
836 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
837 add_to_string(&header, "Pragma: no-cache");
838 add_crlf_to_string(&header);
839 add_to_string(&header, "Cache-Control: no-cache");
840 add_crlf_to_string(&header);
843 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
844 * do anything good with that information anyway. */
845 if (!use_connect && (conn->from || conn->progress->start > 0)) {
846 /* conn->from takes precedence. conn->progress.start is set only the first
847 * time, then conn->from gets updated and in case of any retries
848 * etc we have everything interesting in conn->from already. */
849 add_to_string(&header, "Range: bytes=");
850 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
851 add_char_to_string(&header, '-');
852 add_crlf_to_string(&header);
855 /* CONNECT: The Authorization header is for the origin server only. */
856 if (!use_connect) {
857 #ifdef CONFIG_GSSAPI
858 if (http_negotiate_output(uri, &header) != 0)
859 #endif
860 entry = find_auth(uri);
863 if (entry) {
864 if (entry->digest) {
865 unsigned char *response;
867 response = get_http_auth_digest_response(entry, uri);
868 if (response) {
869 add_to_string(&header, "Authorization: Digest ");
870 add_to_string(&header, response);
871 add_crlf_to_string(&header);
873 mem_free(response);
876 } else {
877 /* RFC2617 section 2 [Basic Authentication Scheme]
879 * To receive authorization, the client sends the userid
880 * and password, separated by a single colon (":")
881 * character, within a base64 [7] encoded string in the
882 * credentials. */
883 unsigned char *id;
885 /* Create base64 encoded string. */
886 id = straconcat(entry->user, ":", entry->password,
887 (unsigned char *) NULL);
888 if (id) {
889 unsigned char *base64 = base64_encode(id);
891 mem_free_set(&id, base64);
894 if (id) {
895 add_to_string(&header, "Authorization: Basic ");
896 add_to_string(&header, id);
897 add_crlf_to_string(&header);
898 mem_free(id);
903 /* CONNECT: Any POST data is for the origin server only. */
904 if (!use_connect && uri->post) {
905 /* We search for first '\n' in uri->post to get content type
906 * as set by get_form_uri(). This '\n' is dropped if any
907 * and replaced by correct '\r\n' termination here. */
908 unsigned char *postend = strchr(uri->post, '\n');
910 if (postend) {
911 add_to_string(&header, "Content-Type: ");
912 add_bytes_to_string(&header, uri->post, postend - uri->post);
913 add_crlf_to_string(&header);
916 post_data = postend ? postend + 1 : uri->post;
917 add_to_string(&header, "Content-Length: ");
918 add_long_to_string(&header, strlen(post_data) / 2);
919 add_crlf_to_string(&header);
922 #ifdef CONFIG_COOKIES
923 /* CONNECT: Cookies are for the origin server only. */
924 if (!use_connect) {
925 struct string *cookies = send_cookies(uri);
927 if (cookies) {
928 add_to_string(&header, "Cookie: ");
929 add_string_to_string(&header, cookies);
930 add_crlf_to_string(&header);
931 done_string(cookies);
934 #endif
936 add_crlf_to_string(&header);
938 /* CONNECT: Any POST data is for the origin server only.
939 * This was already checked above and post_data is NULL
940 * in that case. Verified with an assertion below. */
941 if (post_data) {
942 #define POST_BUFFER_SIZE 4096
943 unsigned char *post = post_data;
944 unsigned char buffer[POST_BUFFER_SIZE];
945 int n = 0;
947 assert(!use_connect); /* see comment above */
949 while (post[0] && post[1]) {
950 int h1, h2;
952 h1 = unhx(post[0]);
953 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
954 if_assert_failed h1 = 0;
956 h2 = unhx(post[1]);
957 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
958 if_assert_failed h2 = 0;
960 buffer[n++] = (h1<<4) + h2;
961 post += 2;
962 if (n == POST_BUFFER_SIZE) {
963 add_bytes_to_string(&header, buffer, n);
964 n = 0;
968 if (n)
969 add_bytes_to_string(&header, buffer, n);
970 #undef POST_BUFFER_SIZE
973 request_from_socket(socket, header.source, header.length, S_SENT,
974 SOCKET_END_ONCLOSE, http_got_header);
975 done_string(&header);
979 /* This function decompresses the data block given in @data (if it was
980 * compressed), which is long @len bytes. The decompressed data block is given
981 * back to the world as the return value and its length is stored into
982 * @new_len.
984 * In this function, value of either http->chunk_remaining or http->length is
985 * being changed (it depends on if chunked mode is used or not).
987 * Note that the function is still a little esotheric for me. Don't take it
988 * lightly and don't mess with it without grave reason! If you dare to touch
989 * this without testing the changes on slashdot, freshmeat and cvsweb
990 * (including revision history), don't dare to send me any patches! ;) --pasky
992 * This function gotta die. */
993 static unsigned char *
994 decompress_data(struct connection *conn, unsigned char *data, int len,
995 int *new_len)
997 struct http_connection_info *http = conn->info;
998 /* to_read is number of bytes to be read from the decoder. It is 65536
999 * (then we are just emptying the decoder buffer as we finished the walk
1000 * through the incoming stream already) or PIPE_BUF / 2 (when we are
1001 * still walking through the stream - then we write PIPE_BUF / 2 to the
1002 * pipe and read it back to the decoder ASAP; the point is that we can't
1003 * write more than PIPE_BUF to the pipe at once, but we also have to
1004 * never let read_encoded() (gzread(), in fact) to empty the pipe - that
1005 * causes further malfunction of zlib :[ ... so we will make sure that
1006 * we will always have at least PIPE_BUF / 2 + 1 in the pipe (returning
1007 * early otherwise)). */
1008 enum { NORMAL, FINISHING } state = NORMAL;
1009 int did_read = 0;
1010 int *length_of_block;
1011 unsigned char *output = NULL;
1013 length_of_block = (http->length == LEN_CHUNKED ? &http->chunk_remaining
1014 : &http->length);
1016 #define BIG_READ 65536
1017 if (!*length_of_block) {
1018 /* Going to finish this decoding bussiness. */
1019 state = FINISHING;
1022 if (conn->content_encoding == ENCODING_NONE) {
1023 *new_len = len;
1024 if (*length_of_block > 0) *length_of_block -= len;
1025 return data;
1028 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1030 if (conn->stream_pipes[0] == -1
1031 && (c_pipe(conn->stream_pipes) < 0
1032 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1033 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1034 return NULL;
1037 do {
1038 /* The initial value is used only when state == NORMAL.
1039 * Unconditional initialization avoids a GCC warning. */
1040 int to_read = PIPE_BUF / 2;
1042 if (state == NORMAL) {
1043 /* ... we aren't finishing yet. */
1044 int written;
1046 written = safe_write(conn->stream_pipes[1], data,
1047 len > to_read ? to_read : len);
1049 if (written > 0) {
1050 data += written;
1051 len -= written;
1053 /* In non-keep-alive connections http->length == -1, so the test below */
1054 if (*length_of_block > 0)
1055 *length_of_block -= written;
1056 /* http->length is 0 at the end of block for all modes: keep-alive,
1057 * non-keep-alive and chunked */
1058 if (!http->length) {
1059 /* That's all, folks - let's finish this. */
1060 state = FINISHING;
1061 } else if (!len) {
1062 /* We've done for this round (but not done
1063 * completely). Thus we will get out with
1064 * what we have and leave what we wrote to
1065 * the next round - we have to do that since
1066 * we MUST NOT ever empty the pipe completely
1067 * - this would cause a disaster for
1068 * read_encoded(), which would simply not
1069 * work right then. */
1070 return output;
1075 if (!conn->stream) {
1076 conn->stream = open_encoded(conn->stream_pipes[0],
1077 conn->content_encoding);
1078 if (!conn->stream) return NULL;
1081 output = (unsigned char *) mem_realloc(output, *new_len + BIG_READ);
1082 if (!output) break;
1084 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1086 if (did_read > 0) *new_len += did_read;
1087 else if (did_read == -1) {
1088 mem_free_set(&output, NULL);
1089 *new_len = 0;
1090 break; /* Loop prevention (bug 517), is this correct ? --Zas */
1092 } while (len || did_read == BIG_READ);
1094 shutdown_connection_stream(conn);
1095 return output;
1098 static int
1099 is_line_in_buffer(struct read_buffer *rb)
1101 int l;
1103 for (l = 0; l < rb->length; l++) {
1104 unsigned char a0 = rb->data[l];
1106 if (a0 == ASCII_LF)
1107 return l + 1;
1108 if (a0 == ASCII_CR) {
1109 if (rb->data[l + 1] == ASCII_LF
1110 && l < rb->length - 1)
1111 return l + 2;
1112 if (l == rb->length - 1)
1113 return 0;
1115 if (a0 < ' ')
1116 return -1;
1118 return 0;
1121 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1123 static void
1124 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1125 int already_got_anything)
1127 enum connection_state state = already_got_anything ? S_TRANS : conn->state;
1129 read_from_socket(conn->socket, rb, state, read_http_data);
1132 static void
1133 read_http_data_done(struct connection *conn)
1135 struct http_connection_info *http = conn->info;
1137 /* There's no content but an error so just print
1138 * that instead of nothing. */
1139 if (!conn->from) {
1140 if (http->code >= 400) {
1141 http_error_document(conn, http->code);
1143 } else {
1144 /* This is not an error, thus fine. No need generate any
1145 * document, as this may be empty and it's not a problem.
1146 * In case of 3xx, we're probably just getting kicked to
1147 * another page anyway. And in case of 2xx, the document
1148 * may indeed be empty and thus the user should see it so. */
1152 http_end_request(conn, S_OK, 0);
1155 /* Returns:
1156 * -1 on error
1157 * 0 if more to read
1158 * 1 if done
1160 static int
1161 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1163 struct http_connection_info *http = conn->info;
1164 int total_data_len = 0;
1166 while (1) {
1167 /* Chunked. Good luck! */
1168 /* See RFC2616, section 3.6.1. Basically, it looks like:
1169 * 1234 ; a = b ; c = d\r\n
1170 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1171 * 0\r\n
1172 * \r\n */
1173 if (http->chunk_remaining == CHUNK_DATA_END) {
1174 int l = is_line_in_buffer(rb);
1176 if (l) {
1177 if (l == -1) {
1178 /* Invalid character in buffer. */
1179 return -1;
1182 /* Remove everything to the EOLN. */
1183 kill_buffer_data(rb, l);
1184 if (l <= 2) {
1185 /* Empty line. */
1186 return 2;
1188 continue;
1191 } else if (http->chunk_remaining == CHUNK_SIZE) {
1192 int l = is_line_in_buffer(rb);
1194 if (l) {
1195 unsigned char *de;
1196 int n = 0;
1198 if (l != -1) {
1199 errno = 0;
1200 n = strtol(rb->data, (char **) &de, 16);
1201 if (errno || !*de) {
1202 return -1;
1206 if (l == -1 || de == rb->data) {
1207 return -1;
1210 /* Remove everything to the EOLN. */
1211 kill_buffer_data(rb, l);
1212 http->chunk_remaining = n;
1213 if (!http->chunk_remaining)
1214 http->chunk_remaining = CHUNK_ZERO_SIZE;
1215 continue;
1218 } else {
1219 unsigned char *data;
1220 int data_len;
1221 int len;
1222 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1224 if (zero) http->chunk_remaining = 0;
1225 len = http->chunk_remaining;
1227 /* Maybe everything necessary didn't come yet.. */
1228 int_upper_bound(&len, rb->length);
1229 conn->received += len;
1231 data = decompress_data(conn, rb->data, len, &data_len);
1233 if (add_fragment(conn->cached, conn->from,
1234 data, data_len) == 1)
1235 conn->tries = 0;
1237 if (data && data != rb->data) mem_free(data);
1239 conn->from += data_len;
1240 total_data_len += data_len;
1242 kill_buffer_data(rb, len);
1244 if (zero) {
1245 /* Last chunk has zero length, so this is last
1246 * chunk, we finished decompression just now
1247 * and now we can happily finish reading this
1248 * stuff. */
1249 http->chunk_remaining = CHUNK_DATA_END;
1250 continue;
1253 if (!http->chunk_remaining && rb->length > 0) {
1254 /* Eat newline succeeding each chunk. */
1255 if (rb->data[0] == ASCII_LF) {
1256 kill_buffer_data(rb, 1);
1257 } else {
1258 if (rb->data[0] != ASCII_CR
1259 || (rb->length >= 2
1260 && rb->data[1] != ASCII_LF)) {
1261 return -1;
1263 if (rb->length < 2) break;
1264 kill_buffer_data(rb, 2);
1266 http->chunk_remaining = CHUNK_SIZE;
1267 continue;
1270 break;
1273 /* More to read. */
1274 return !!total_data_len;
1277 /* Returns 0 if more data, 1 if done. */
1278 static int
1279 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1281 struct http_connection_info *http = conn->info;
1282 unsigned char *data;
1283 int data_len;
1284 int len = rb->length;
1286 if (http->length >= 0 && http->length < len) {
1287 /* We won't read more than we have to go. */
1288 len = http->length;
1291 conn->received += len;
1293 data = decompress_data(conn, rb->data, len, &data_len);
1295 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1296 conn->tries = 0;
1298 if (data && data != rb->data) mem_free(data);
1300 conn->from += data_len;
1302 kill_buffer_data(rb, len);
1304 if (!http->length && conn->socket->state == SOCKET_RETRY_ONCLOSE) {
1305 return 2;
1308 return !!data_len;
1311 static void
1312 read_http_data(struct socket *socket, struct read_buffer *rb)
1314 struct connection *conn = socket->conn;
1315 struct http_connection_info *http = conn->info;
1316 int ret;
1318 if (socket->state == SOCKET_CLOSED) {
1319 if (conn->content_encoding && http->length == -1) {
1320 /* Flush decompression first. */
1321 http->length = 0;
1322 } else {
1323 read_http_data_done(conn);
1324 return;
1328 if (http->length != LEN_CHUNKED) {
1329 ret = read_normal_http_data(conn, rb);
1331 } else {
1332 ret = read_chunked_http_data(conn, rb);
1335 switch (ret) {
1336 case 0:
1337 read_more_http_data(conn, rb, 0);
1338 break;
1339 case 1:
1340 read_more_http_data(conn, rb, 1);
1341 break;
1342 case 2:
1343 read_http_data_done(conn);
1344 break;
1345 default:
1346 assertm(ret == -1, "Unexpected return value: %d", ret);
1347 abort_connection(conn, S_HTTP_ERROR);
1351 /* Returns offset of the header end, zero if more data is needed, -1 when
1352 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1353 * come. */
1354 static int
1355 get_header(struct read_buffer *rb)
1357 int i;
1359 /* XXX: We will have to do some guess about whether an HTTP header is
1360 * coming or not, in order to support HTTP/0.9 reply correctly. This
1361 * means a little code duplication with get_http_code(). --pasky */
1362 if (rb->length > 4 && strncasecmp(rb->data, "HTTP/", 5))
1363 return -2;
1365 for (i = 0; i < rb->length; i++) {
1366 unsigned char a0 = rb->data[i];
1367 unsigned char a1 = rb->data[i + 1];
1369 if (a0 == 0) {
1370 rb->data[i] = ' ';
1371 continue;
1373 if (a0 == ASCII_LF && a1 == ASCII_LF
1374 && i < rb->length - 1)
1375 return i + 2;
1376 if (a0 == ASCII_CR && i < rb->length - 3) {
1377 if (a1 == ASCII_CR) continue;
1378 if (a1 != ASCII_LF) return -1;
1379 if (rb->data[i + 2] == ASCII_CR) {
1380 if (rb->data[i + 3] != ASCII_LF) return -1;
1381 return i + 4;
1386 return 0;
1389 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1390 static int
1391 check_http_authentication(struct connection *conn, struct uri *uri,
1392 unsigned char *header, unsigned char *header_field)
1394 unsigned char *str, *d;
1395 int ret = 0;
1397 d = parse_header(header, header_field, &str);
1398 while (d) {
1399 if (!strncasecmp(d, "Basic", 5)) {
1400 unsigned char *realm = get_header_param(d, "realm");
1402 if (realm) {
1403 add_auth_entry(uri, realm, NULL, NULL, 0);
1404 mem_free(realm);
1405 mem_free(d);
1406 break;
1408 } else if (!strncasecmp(d, "Digest", 6)) {
1409 unsigned char *realm = get_header_param(d, "realm");
1410 unsigned char *nonce = get_header_param(d, "nonce");
1411 unsigned char *opaque = get_header_param(d, "opaque");
1413 add_auth_entry(uri, realm, nonce, opaque, 1);
1415 mem_free_if(realm);
1416 mem_free_if(nonce);
1417 mem_free_if(opaque);
1418 mem_free(d);
1419 break;
1421 #ifdef CONFIG_GSSAPI
1422 else if (!strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1423 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1424 ret = 1;
1425 mem_free(d);
1426 break;
1428 else if (!strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1429 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1430 ret = 1;
1431 mem_free(d);
1432 break;
1434 #endif
1435 mem_free(d);
1436 d = parse_header(str, header_field, &str);
1438 return ret;
1442 void
1443 http_got_header(struct socket *socket, struct read_buffer *rb)
1445 struct connection *conn = socket->conn;
1446 struct http_connection_info *http = conn->info;
1447 unsigned char *head;
1448 #ifdef CONFIG_COOKIES
1449 unsigned char *cookie, *ch;
1450 #endif
1451 unsigned char *d;
1452 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1453 struct http_version version;
1454 enum connection_state state = (conn->state != S_PROC ? S_GETH : S_PROC);
1455 int a, h = 200;
1456 int cf;
1458 if (socket->state == SOCKET_CLOSED) {
1459 if (!conn->tries && uri->host) {
1460 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1461 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1462 } else {
1463 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1464 conn->tries = -1;
1467 retry_connection(conn, S_CANT_READ);
1468 return;
1470 socket->state = SOCKET_RETRY_ONCLOSE;
1472 again:
1473 a = get_header(rb);
1474 if (a == -1) {
1475 abort_connection(conn, S_HTTP_ERROR);
1476 return;
1478 if (!a) {
1479 read_from_socket(conn->socket, rb, state, http_got_header);
1480 return;
1482 if (a == -2) a = 0;
1483 if ((a && get_http_code(rb, &h, &version))
1484 || h == 101) {
1485 abort_connection(conn, S_HTTP_ERROR);
1486 return;
1489 /* When no header, HTTP/0.9 document. That's always text/html,
1490 * according to
1491 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1492 /* FIXME: This usage of fake protocol headers for setting up the
1493 * content type has been obsoleted by the @content_type member of
1494 * {struct cache_entry}. */
1495 head = (a ? memacpy(rb->data, a)
1496 : stracpy("\r\nContent-Type: text/html\r\n"));
1497 if (!head) {
1498 abort_connection(conn, S_OUT_OF_MEM);
1499 return;
1502 if (check_http_server_bugs(uri, http, head)) {
1503 mem_free(head);
1504 retry_connection(conn, S_RESTART);
1505 return;
1508 #ifdef CONFIG_CGI
1509 if (uri->protocol == PROTOCOL_FILE) {
1510 /* ``Status'' is not a standard HTTP header field although some
1511 * HTTP servers like www.php.net uses it for some reason. It should
1512 * only be used for CGI scripts so that it does not interfere
1513 * with status code depended handling for ``normal'' HTTP like
1514 * redirects. */
1515 d = parse_header(head, "Status", NULL);
1516 if (d) {
1517 int h2 = atoi(d);
1519 mem_free(d);
1520 if (h2 >= 100 && h2 < 600) h = h2;
1521 if (h == 101) {
1522 mem_free(head);
1523 abort_connection(conn, S_HTTP_ERROR);
1524 return;
1528 #endif
1530 #ifdef CONFIG_COOKIES
1531 ch = head;
1532 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1533 set_cookie(uri, cookie);
1534 mem_free(cookie);
1536 #endif
1537 http->code = h;
1539 if (h == 100) {
1540 mem_free(head);
1541 state = S_PROC;
1542 kill_buffer_data(rb, a);
1543 goto again;
1545 if (h < 200) {
1546 mem_free(head);
1547 abort_connection(conn, S_HTTP_ERROR);
1548 return;
1550 if (h == 304) {
1551 mem_free(head);
1552 http_end_request(conn, S_OK, 1);
1553 return;
1555 if (h == 204) {
1556 mem_free(head);
1557 http_end_request(conn, S_HTTP_204, 0);
1558 return;
1560 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1561 #ifdef CONFIG_SSL
1562 mem_free(head);
1563 socket->need_ssl = 1;
1564 complete_connect_socket(socket, uri, http_send_header);
1565 #else
1566 abort_connection(conn, S_SSL_ERROR);
1567 #endif
1568 return;
1571 conn->cached = get_cache_entry(conn->uri);
1572 if (!conn->cached) {
1573 mem_free(head);
1574 abort_connection(conn, S_OUT_OF_MEM);
1575 return;
1577 mem_free_set(&conn->cached->head, head);
1579 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1580 struct cache_entry *cached = conn->cached;
1582 /* I am not entirely sure in what order we should process these
1583 * headers and if we should still process Cache-Control max-age
1584 * if we already set max age to date mentioned in Expires.
1585 * --jonas */
1586 /* Ensure that when ever cached->max_age is set, cached->expired
1587 * is also set, so the cache management knows max_age contains a
1588 * valid time. If on the other hand no caching is requested
1589 * cached->expire should be set to zero. */
1590 if ((d = parse_header(cached->head, "Expires", NULL))) {
1591 /* Convert date to seconds. */
1592 time_t expires = parse_date(&d, NULL, 0, 1);
1594 mem_free(d);
1596 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1597 timeval_from_seconds(&cached->max_age, expires);
1598 cached->expire = 1;
1602 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1603 if (strstr(d, "no-cache")) {
1604 cached->cache_mode = CACHE_MODE_NEVER;
1605 cached->expire = 0;
1607 mem_free(d);
1610 if (cached->cache_mode != CACHE_MODE_NEVER
1611 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1612 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1613 cached->cache_mode = CACHE_MODE_NEVER;
1614 cached->expire = 0;
1616 } else {
1617 unsigned char *pos = strstr(d, "max-age=");
1619 assert(cached->cache_mode != CACHE_MODE_NEVER);
1621 if (pos) {
1622 /* Grab the number of seconds. */
1623 timeval_T max_age;
1625 timeval_from_seconds(&max_age, atol(pos + 8));
1626 timeval_now(&cached->max_age);
1627 timeval_add_interval(&cached->max_age, &max_age);
1629 cached->expire = 1;
1633 mem_free(d);
1637 /* XXX: Is there some reason why NOT to follow the Location header
1638 * for any status? If the server didn't mean it, it wouldn't send
1639 * it, after all...? --pasky */
1640 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1641 d = parse_header(conn->cached->head, "Location", NULL);
1642 if (d) {
1643 int use_get_method = (h == 303);
1645 /* A note from RFC 2616 section 10.3.3:
1646 * RFC 1945 and RFC 2068 specify that the client is not
1647 * allowed to change the method on the redirected
1648 * request. However, most existing user agent
1649 * implementations treat 302 as if it were a 303
1650 * response, performing a GET on the Location
1651 * field-value regardless of the original request
1652 * method. */
1653 /* So POST must not be redirected to GET, but some
1654 * BUGGY message boards rely on it :-( */
1655 if (h == 302
1656 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1657 use_get_method = 1;
1659 redirect_cache(conn->cached, d, use_get_method, -1);
1660 mem_free(d);
1664 if (h == 401) {
1665 if (check_http_authentication(conn, uri,
1666 conn->cached->head, "WWW-Authenticate")) {
1667 retry_connection(conn, S_RESTART);
1668 return;
1672 if (h == 407) {
1673 unsigned char *str;
1675 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1676 while (d) {
1677 if (!strncasecmp(d, "Basic", 5)) {
1678 unsigned char *realm = get_header_param(d, "realm");
1680 if (realm) {
1681 mem_free_set(&proxy_auth.realm, realm);
1682 proxy_auth.digest = 0;
1683 mem_free(d);
1684 break;
1687 } else if (!strncasecmp(d, "Digest", 6)) {
1688 unsigned char *realm = get_header_param(d, "realm");
1689 unsigned char *nonce = get_header_param(d, "nonce");
1690 unsigned char *opaque = get_header_param(d, "opaque");
1692 mem_free_set(&proxy_auth.realm, realm);
1693 mem_free_set(&proxy_auth.nonce, nonce);
1694 mem_free_set(&proxy_auth.opaque, opaque);
1695 proxy_auth.digest = 1;
1697 mem_free(d);
1698 break;
1701 mem_free(d);
1702 d = parse_header(str, "Proxy-Authenticate", &str);
1706 kill_buffer_data(rb, a);
1707 http->close = 0;
1708 http->length = -1;
1709 http->recv_version = version;
1711 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1712 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1713 if (!strcasecmp(d, "close")) http->close = 1;
1714 mem_free(d);
1715 } else if (PRE_HTTP_1_1(version)) {
1716 http->close = 1;
1719 cf = conn->from;
1720 conn->from = 0;
1721 d = parse_header(conn->cached->head, "Content-Range", NULL);
1722 if (d) {
1723 if (strlen(d) > 6) {
1724 d[5] = 0;
1725 if (isdigit(d[6]) && !strcasecmp(d, "bytes")) {
1726 int f;
1728 errno = 0;
1729 f = strtol(d + 6, NULL, 10);
1731 if (!errno && f >= 0) conn->from = f;
1734 mem_free(d);
1736 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1737 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1738 /* We don't want this if conn->progress.start because then conn->from will
1739 * be probably value of conn->progress.start, while cf is 0. */
1740 abort_connection(conn, S_HTTP_ERROR);
1741 return;
1744 #if 0
1746 struct status *s;
1747 foreach (s, conn->downloads) {
1748 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1749 conn, s, s->pri, s->state, s->prev_error,
1750 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1753 #endif
1755 if (conn->progress->start >= 0) {
1756 /* Update to the real value which we've got from Content-Range. */
1757 conn->progress->seek = conn->from;
1759 conn->progress->start = conn->from;
1761 d = parse_header(conn->cached->head, "Content-Length", NULL);
1762 if (d) {
1763 unsigned char *ep;
1764 int l;
1766 errno = 0;
1767 l = strtol(d, (char **) &ep, 10);
1769 if (!errno && !*ep && l >= 0) {
1770 if (!http->close || POST_HTTP_1_0(version))
1771 http->length = l;
1772 conn->est_length = conn->from + l;
1774 mem_free(d);
1777 if (!conn->unrestartable) {
1778 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1780 if (d) {
1781 if (!strcasecmp(d, "none"))
1782 conn->unrestartable = 1;
1783 mem_free(d);
1784 } else {
1785 if (!conn->from)
1786 conn->unrestartable = 1;
1790 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1791 if (d) {
1792 if (!strcasecmp(d, "chunked")) {
1793 http->length = LEN_CHUNKED;
1794 http->chunk_remaining = CHUNK_SIZE;
1796 mem_free(d);
1798 if (!http->close && http->length == -1) http->close = 1;
1800 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1801 if (d) {
1802 if (conn->cached->last_modified && strcasecmp(conn->cached->last_modified, d)) {
1803 delete_entry_content(conn->cached);
1804 if (conn->from) {
1805 conn->from = 0;
1806 mem_free(d);
1807 retry_connection(conn, S_MODIFIED);
1808 return;
1811 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1812 else mem_free(d);
1814 if (!conn->cached->last_modified) {
1815 d = parse_header(conn->cached->head, "Date", NULL);
1816 if (d) conn->cached->last_modified = d;
1819 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1820 d = parse_header(conn->cached->head, "ETag", NULL);
1821 if (d) {
1822 if (conn->cached->etag) {
1823 unsigned char *old_tag = conn->cached->etag;
1824 unsigned char *new_tag = d;
1826 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1828 if (new_tag[0] == 'W' && new_tag[1] == '/')
1829 new_tag += 2;
1831 if (old_tag[0] == 'W' && old_tag[1] == '/')
1832 old_tag += 2;
1834 if (strcmp(new_tag, old_tag)) {
1835 delete_entry_content(conn->cached);
1836 if (conn->from) {
1837 conn->from = 0;
1838 mem_free(d);
1839 retry_connection(conn, S_MODIFIED);
1840 return;
1845 if (!conn->cached->etag)
1846 conn->cached->etag = d;
1847 else
1848 mem_free(d);
1851 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1852 if (d) {
1853 unsigned char *extension = get_extension_from_uri(uri);
1854 enum stream_encoding file_encoding;
1856 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1857 mem_free_if(extension);
1859 /* If the content is encoded, we want to preserve the encoding
1860 * if it is implied by the extension, so that saving the URI
1861 * will leave the saved file with the correct encoding. */
1862 #ifdef CONFIG_GZIP
1863 if (file_encoding != ENCODING_GZIP
1864 && (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip")))
1865 conn->content_encoding = ENCODING_GZIP;
1866 #endif
1868 #ifdef CONFIG_BZIP2
1869 if (file_encoding != ENCODING_BZIP2
1870 && (!strcasecmp(d, "bzip2") || !strcasecmp(d, "x-bzip2")))
1871 conn->content_encoding = ENCODING_BZIP2;
1872 #endif
1874 mem_free(d);
1877 if (conn->content_encoding != ENCODING_NONE) {
1878 mem_free_if(conn->cached->encoding_info);
1879 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1882 if (http->length == -1
1883 || (PRE_HTTP_1_1(http->recv_version) && http->close))
1884 socket->state = SOCKET_END_ONCLOSE;
1886 read_http_data(socket, rb);