Tone down the protocol.http.compression warning
[elinks.git] / src / protocol / http / http.c
blob5d30ca7caf376ed2c0a0b8cdfeb142374d464cd7
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
50 #ifdef CONFIG_GSSAPI
51 #include "http_negotiate.h"
52 #endif
54 struct http_version {
55 int major;
56 int minor;
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info {
69 enum blacklist_flags bl_flags;
70 struct http_version recv_version;
71 struct http_version sent_version;
73 int close;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
77 int length;
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
82 #define CHUNK_SIZE -1
83 int chunk_remaining;
85 int code;
89 static struct auth_entry proxy_auth;
91 static unsigned char *accept_charset = NULL;
94 static struct option_info http_options[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
96 "http", 0,
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
101 "bugs", 0,
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it can trigger\n"
107 "bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
116 "This is a problem for a lot of web discussion boards and the like.\n"
117 "If they will do strange things to you, try to play with this.")),
119 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
120 "post_no_keepalive", 0, 0,
121 N_("Disable keepalive connection after POST request.")),
123 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
124 "http10", 0, 0,
125 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
127 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
128 "proxy", 0,
129 N_("HTTP proxy configuration.")),
131 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
132 "host", 0, "",
133 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
134 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
136 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
137 "user", 0, "",
138 N_("Proxy authentication username.")),
140 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
141 "passwd", 0, "",
142 N_("Proxy authentication password.")),
145 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
146 "referer", 0,
147 N_("HTTP referer sending options. HTTP referer is a special header\n"
148 "sent in the HTTP requests, which is supposed to contain the previous\n"
149 "page visited by the browser. This way, the server can know what link\n"
150 "did you follow when accessing that page. However, this behaviour\n"
151 "can unfortunately considerably affect privacy and can lead even to a\n"
152 "security problem on some badly designed web pages.")),
154 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
155 "policy", 0,
156 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
157 N_("Mode of sending HTTP referer:\n"
158 "0 is send no referer\n"
159 "1 is send current URL as referer\n"
160 "2 is send fixed fake referer\n"
161 "3 is send previous URL as referer (correct, but insecure)")),
163 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
164 "fake", 0, "",
165 N_("Fake referer to be sent when policy is 2.")),
168 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
169 "accept_language", 0, "",
170 N_("Send Accept-Language header.")),
172 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
173 "accept_ui_language", 0, 1,
174 N_("Request localised versions of documents from web-servers (using the\n"
175 "Accept-Language header) using the language you have configured for\n"
176 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
177 "value available to scripts). Note that some see this as a potential\n"
178 "security risk because it tells web-masters and the FBI sniffers about\n"
179 "your language preference.")),
181 /* After the compression support has been tested enough,
182 * we might wrap this option in #if CFG_DEBUG. */
183 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
184 "compression", 0, 1,
185 N_("If enabled, the capability to receive compressed content (gzip and/or\n"
186 "bzip2) is announced to the server, which usually sends the reply\n"
187 "compressed, thus saving some bandwidth at slight CPU expense.\n"
188 "\n"
189 "If ELinks displays a incomplete page or garbage, try disabling this\n"
190 "option. If that helps, there may be a bug in the decompression part\n"
191 "of ELinks. Please report such bugs.\n"
192 "\n"
193 "If ELinks has been compiled without compression support, this option\n"
194 "has no effect. To check the supported features, see Help -> About.")),
196 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
197 "trace", 0, 0,
198 N_("If active, all HTTP requests are sent with TRACE as their method\n"
199 "rather than GET or POST. This is useful for debugging of both ELinks\n"
200 "and various server-side scripts --- the server only returns the client's\n"
201 "request back to the client verbatim. Note that this type of request may\n"
202 "not be enabled on all servers.")),
204 /* OSNews.com is supposed to be relying on the textmode token, at least. */
205 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
206 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
207 N_("Change the User Agent ID. That means identification string, which\n"
208 "is sent to HTTP server when a document is requested. The 'textmode'\n"
209 "token in the first field is our silent attempt to establish this as\n"
210 "a standard for new textmode user agents, so that the webmasters can\n"
211 "have just a single uniform test for these if they are e.g. pushing\n"
212 "some lite version to them automagically.\n"
213 "Use \" \" if you don't want any User-Agent header to be sent at all.\n"
214 "%v in the string means ELinks version,\n"
215 "%s in the string means system identification,\n"
216 "%t in the string means size of the terminal,\n"
217 "%b in the string means number of bars displayed by ELinks.")),
220 INIT_OPT_TREE("protocol", N_("HTTPS"),
221 "https", 0,
222 N_("HTTPS-specific options.")),
224 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
225 "proxy", 0,
226 N_("HTTPS proxy configuration.")),
228 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
229 "host", 0, "",
230 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
231 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
232 NULL_OPTION_INFO,
235 static void done_http();
237 struct module http_protocol_module = struct_module(
238 /* name: */ N_("HTTP"),
239 /* options: */ http_options,
240 /* hooks: */ NULL,
241 /* submodules: */ NULL,
242 /* data: */ NULL,
243 /* init: */ NULL,
244 /* done: */ done_http
248 static void
249 done_http(void)
251 mem_free_if(proxy_auth.realm);
252 mem_free_if(proxy_auth.nonce);
253 mem_free_if(proxy_auth.opaque);
255 free_blacklist();
257 if (accept_charset)
258 mem_free(accept_charset);
261 static void
262 init_accept_charset(void)
264 struct string ac;
265 unsigned char *cs;
266 int i;
268 if (!init_string(&ac)) return;
270 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
271 if (ac.length) {
272 add_to_string(&ac, ", ");
273 } else {
274 add_to_string(&ac, "Accept-Charset: ");
276 add_to_string(&ac, cs);
279 if (ac.length) {
280 add_crlf_to_string(&ac);
283 accept_charset = squeezastring(&ac);
285 done_string(&ac);
289 unsigned char *
290 subst_user_agent(unsigned char *fmt, unsigned char *version,
291 unsigned char *sysname, unsigned char *termsize)
293 struct string agent;
295 if (!init_string(&agent)) return NULL;
297 while (*fmt) {
298 int p;
300 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
302 add_bytes_to_string(&agent, fmt, p);
303 fmt += p;
305 if (*fmt != '%') continue;
307 fmt++;
308 switch (*fmt) {
309 case 'b':
310 if (!list_empty(sessions)) {
311 unsigned char bs[4] = "";
312 int blen = 0;
313 struct session *ses = sessions.prev;
314 int bars = ses->status.show_status_bar
315 + ses->status.show_tabs_bar
316 + ses->status.show_title_bar;
318 ulongcat(bs, &blen, bars, 2, 0);
319 add_to_string(&agent, bs);
321 break;
322 case 'v':
323 add_to_string(&agent, version);
324 break;
325 case 's':
326 add_to_string(&agent, sysname);
327 break;
328 case 't':
329 if (termsize)
330 add_to_string(&agent, termsize);
331 break;
332 default:
333 add_bytes_to_string(&agent, fmt - 1, 2);
334 break;
336 if (*fmt) fmt++;
339 return agent.source;
342 static void
343 add_url_to_http_string(struct string *header, struct uri *uri, int components)
345 /* This block substitues spaces in URL by %20s. This is
346 * certainly not the right place where to do it, but now the
347 * behaviour is at least improved compared to what we had
348 * before. We should probably encode all URLs as early as
349 * possible, and possibly decode them back in protocol
350 * backends. --pasky */
351 unsigned char *string = get_uri_string(uri, components);
352 unsigned char *data = string;
354 if (!string) return;
356 while (*data) {
357 int len = strcspn(data, " \t\r\n\\");
359 add_bytes_to_string(header, data, len);
361 if (!data[len]) break;
363 if (data[len++] == '\\')
364 add_char_to_string(header, '/');
365 else
366 add_to_string(header, "%20");
368 data += len;
371 mem_free(string);
374 /* Parse from @end - 1 to @start and set *@value to integer found.
375 * It returns -1 if not a number, 0 otherwise.
376 * @end should be > @start. */
377 static int
378 revstr2num(unsigned char *start, unsigned char *end, int *value)
380 int q = 1, val = 0;
382 do {
383 --end;
384 if (!isdigit(*end)) return -1; /* NaN */
385 val += (*end - '0') * q;
386 q *= 10;
387 } while (end > start);
389 *value = val;
390 return 0;
393 /* This function extracts code, major and minor version from string
394 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
395 * It returns a negative value on error, 0 on success.
397 static int
398 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
400 unsigned char *head = rb->data;
401 unsigned char *start;
403 *code = 0;
404 version->major = 0;
405 version->minor = 0;
407 /* Ignore spaces. */
408 while (*head == ' ') head++;
410 /* HTTP/ */
411 if (c_toupper(*head) != 'H' || c_toupper(*++head) != 'T' ||
412 c_toupper(*++head) != 'T' || c_toupper(*++head) != 'P'
413 || *++head != '/')
414 return -1;
416 /* Version */
417 start = ++head;
418 /* Find next '.' */
419 while (*head && *head != '.') head++;
420 /* Sanity check. */
421 if (!*head || !(head - start)
422 || (head - start) > 4
423 || !isdigit(*(head + 1)))
424 return -2;
426 /* Extract major version number. */
427 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
429 start = head + 1;
431 /* Find next ' '. */
432 while (*head && *head != ' ') head++;
433 /* Sanity check. */
434 if (!*head || !(head - start) || (head - start) > 4) return -4;
436 /* Extract minor version number. */
437 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
439 /* Ignore spaces. */
440 while (*head == ' ') head++;
442 /* Sanity check for code. */
443 if (head[0] < '1' || head[0] > '9' ||
444 !isdigit(head[1]) ||
445 !isdigit(head[2]))
446 return -6; /* Invalid code. */
448 /* Extract code. */
449 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
451 return 0;
454 static int
455 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
456 unsigned char *head)
458 unsigned char *server;
459 const unsigned char *const *s;
460 static const unsigned char *const buggy_servers[] = {
461 "mod_czech/3.1.0",
462 "Purveyor",
463 "Netscape-Enterprise",
464 NULL
467 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
468 || HTTP_1_0(http->sent_version))
469 return 0;
471 server = parse_header(head, "Server", NULL);
472 if (!server)
473 return 0;
475 for (s = buggy_servers; *s; s++) {
476 if (strstr(server, *s)) {
477 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
478 break;
482 mem_free(server);
483 return (*s != NULL);
486 static void
487 http_end_request(struct connection *conn, struct connection_state state,
488 int notrunc)
490 shutdown_connection_stream(conn);
492 if (conn->info && !((struct http_connection_info *) conn->info)->close
493 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
494 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
495 || !conn->uri->post)) {
496 if (is_in_state(state, S_OK) && conn->cached)
497 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
498 set_connection_state(conn, state);
499 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
500 } else {
501 abort_connection(conn, state);
505 static void http_send_header(struct socket *);
507 void
508 http_protocol_handler(struct connection *conn)
510 /* setcstate(conn, S_CONN); */
512 if (!has_keepalive_connection(conn)) {
513 make_connection(conn->socket, conn->uri, http_send_header,
514 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
515 } else {
516 http_send_header(conn->socket);
520 void
521 proxy_protocol_handler(struct connection *conn)
523 http_protocol_handler(conn);
526 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
528 #define connection_is_https_proxy(conn) \
529 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
531 struct http_connection_info *
532 init_http_connection_info(struct connection *conn, int major, int minor, int close)
534 struct http_connection_info *http;
536 http = mem_calloc(1, sizeof(*http));
537 if (!http) {
538 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
539 return NULL;
542 http->sent_version.major = major;
543 http->sent_version.minor = minor;
544 http->close = close;
546 /* The CGI code uses this too and blacklisting expects a host name. */
547 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
548 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
550 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
551 || get_opt_bool("protocol.http.bugs.http10")) {
552 http->sent_version.major = 1;
553 http->sent_version.minor = 0;
556 /* If called from HTTPS proxy connection the connection info might have
557 * already been allocated. */
558 mem_free_set(&conn->info, http);
560 return http;
563 static void
564 accept_encoding_header(struct string *header)
566 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
567 int comma = 0;
569 add_to_string(header, "Accept-Encoding: ");
571 #ifdef CONFIG_BZIP2
572 add_to_string(header, "bzip2");
573 comma = 1;
574 #endif
576 #ifdef CONFIG_GZIP
577 if (comma) add_to_string(header, ", ");
578 add_to_string(header, "deflate, gzip");
579 comma = 1;
580 #endif
582 #ifdef CONFIG_LZMA
583 if (comma) add_to_string(header, ", ");
584 add_to_string(header, "lzma");
585 #endif
586 add_crlf_to_string(header);
587 #endif
590 static void
591 http_send_header(struct socket *socket)
593 struct connection *conn = socket->conn;
594 struct http_connection_info *http;
595 int trace = get_opt_bool("protocol.http.trace");
596 struct string header;
597 unsigned char *post_data = NULL;
598 struct auth_entry *entry = NULL;
599 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
600 unsigned char *optstr;
601 int use_connect, talking_to_proxy;
603 /* Sanity check for a host */
604 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
605 http_end_request(conn, connection_state(S_BAD_URL), 0);
606 return;
609 http = init_http_connection_info(conn, 1, 1, 0);
610 if (!http) return;
612 if (!init_string(&header)) {
613 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
614 return;
617 if (!conn->cached) conn->cached = find_in_cache(uri);
619 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
620 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
622 if (trace) {
623 add_to_string(&header, "TRACE ");
624 } else if (use_connect) {
625 add_to_string(&header, "CONNECT ");
626 /* In CONNECT requests, we send only a subset of the
627 * headers to the proxy. See the "CONNECT:" comments
628 * below. After the CONNECT request succeeds, we
629 * negotiate TLS with the real server and make a new
630 * HTTP request that includes all the headers. */
631 } else if (uri->post) {
632 add_to_string(&header, "POST ");
633 conn->unrestartable = 1;
634 } else {
635 add_to_string(&header, "GET ");
638 if (!talking_to_proxy) {
639 add_char_to_string(&header, '/');
642 if (use_connect) {
643 /* Add port if it was specified or the default port */
644 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
645 } else {
646 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
647 add_url_to_http_string(&header, uri, URI_DATA);
649 } else if (talking_to_proxy) {
650 add_url_to_http_string(&header, uri, URI_PROXY);
652 } else {
653 add_url_to_http_string(&header, conn->uri, URI_DATA);
657 add_to_string(&header, " HTTP/");
658 add_long_to_string(&header, http->sent_version.major);
659 add_char_to_string(&header, '.');
660 add_long_to_string(&header, http->sent_version.minor);
661 add_crlf_to_string(&header);
663 /* CONNECT: Sending a Host header seems pointless as the same
664 * information is already in the CONNECT line. It's harmless
665 * though and Mozilla does it too. */
666 add_to_string(&header, "Host: ");
667 add_uri_to_string(&header, uri, URI_HTTP_HOST);
668 add_crlf_to_string(&header);
670 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
671 if (talking_to_proxy) {
672 unsigned char *user = get_opt_str("protocol.http.proxy.user");
673 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
675 if (proxy_auth.digest) {
676 unsigned char *response;
677 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
678 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
680 if (userlen)
681 memcpy(proxy_auth.user, user, userlen);
682 proxy_auth.user[userlen] = '\0';
683 if (passwordlen)
684 memcpy(proxy_auth.password, passwd, passwordlen);
685 proxy_auth.password[passwordlen] = '\0';
687 /* FIXME: @uri is the proxied URI. Maybe the passed URI
688 * should be the proxy URI aka conn->uri. --jonas */
689 response = get_http_auth_digest_response(&proxy_auth, uri);
690 if (response) {
691 add_to_string(&header, "Proxy-Authorization: Digest ");
692 add_to_string(&header, response);
693 add_crlf_to_string(&header);
695 mem_free(response);
698 } else {
699 if (user[0]) {
700 unsigned char *proxy_data;
702 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
703 if (proxy_data) {
704 unsigned char *proxy_64 = base64_encode(proxy_data);
706 if (proxy_64) {
707 add_to_string(&header, "Proxy-Authorization: Basic ");
708 add_to_string(&header, proxy_64);
709 add_crlf_to_string(&header);
710 mem_free(proxy_64);
712 mem_free(proxy_data);
718 /* CONNECT: User-Agent does not reveal anything about the
719 * resource we're fetching, and it may help the proxy return
720 * better error messages. */
721 optstr = get_opt_str("protocol.http.user_agent");
722 if (*optstr && strcmp(optstr, " ")) {
723 unsigned char *ustr, ts[64] = "";
725 add_to_string(&header, "User-Agent: ");
727 if (!list_empty(terminals)) {
728 unsigned int tslen = 0;
729 struct terminal *term = terminals.prev;
731 ulongcat(ts, &tslen, term->width, 3, 0);
732 ts[tslen++] = 'x';
733 ulongcat(ts, &tslen, term->height, 3, 0);
735 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
736 ts);
738 if (ustr) {
739 add_to_string(&header, ustr);
740 mem_free(ustr);
743 add_crlf_to_string(&header);
746 /* CONNECT: Referer probably is a secret page in the HTTPS
747 * server, so don't reveal it to the proxy. */
748 if (!use_connect) {
749 switch (get_opt_int("protocol.http.referer.policy")) {
750 case REFERER_NONE:
751 /* oh well */
752 break;
754 case REFERER_FAKE:
755 optstr = get_opt_str("protocol.http.referer.fake");
756 if (!optstr[0]) break;
757 add_to_string(&header, "Referer: ");
758 add_to_string(&header, optstr);
759 add_crlf_to_string(&header);
760 break;
762 case REFERER_TRUE:
763 if (!conn->referrer) break;
764 add_to_string(&header, "Referer: ");
765 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
766 add_crlf_to_string(&header);
767 break;
769 case REFERER_SAME_URL:
770 add_to_string(&header, "Referer: ");
771 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
772 add_crlf_to_string(&header);
773 break;
777 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
778 * because they do not reveal anything about the resource
779 * we're going to request via TLS, and they may affect the
780 * error message if the CONNECT request fails.
782 * If ELinks is ever changed to vary its Accept headers based
783 * on what it intends to do with the returned resource, e.g.
784 * sending "Accept: text/css" when it wants an external
785 * stylesheet, then it should do that only in the inner GET
786 * and not in the outer CONNECT. */
787 add_to_string(&header, "Accept: */*");
788 add_crlf_to_string(&header);
790 if (get_opt_bool("protocol.http.compression"))
791 accept_encoding_header(&header);
793 if (!accept_charset) {
794 init_accept_charset();
797 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
798 && !get_opt_bool("protocol.http.bugs.accept_charset")
799 && accept_charset) {
800 add_to_string(&header, accept_charset);
803 optstr = get_opt_str("protocol.http.accept_language");
804 if (optstr[0]) {
805 add_to_string(&header, "Accept-Language: ");
806 add_to_string(&header, optstr);
807 add_crlf_to_string(&header);
809 #ifdef CONFIG_NLS
810 else if (get_opt_bool("protocol.http.accept_ui_language")) {
811 unsigned char *code = language_to_iso639(current_language);
813 if (code) {
814 add_to_string(&header, "Accept-Language: ");
815 add_to_string(&header, code);
816 add_crlf_to_string(&header);
819 #endif
821 /* CONNECT: Proxy-Connection is intended to be seen by the
822 * proxy. If the CONNECT request succeeds, then the proxy
823 * will forward the remainder of the TCP connection to the
824 * origin server, and Proxy-Connection does not matter; but
825 * if the request fails, then Proxy-Connection may matter. */
826 /* FIXME: What about post-HTTP/1.1?? --Zas */
827 if (HTTP_1_1(http->sent_version)) {
828 if (!IS_PROXY_URI(conn->uri)) {
829 add_to_string(&header, "Connection: ");
830 } else {
831 add_to_string(&header, "Proxy-Connection: ");
834 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
835 add_to_string(&header, "Keep-Alive");
836 } else {
837 add_to_string(&header, "close");
839 add_crlf_to_string(&header);
842 /* CONNECT: Do not tell the proxy anything we have cached
843 * about the resource. */
844 if (!use_connect && conn->cached) {
845 if (!conn->cached->incomplete && conn->cached->head
846 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
847 if (conn->cached->last_modified) {
848 add_to_string(&header, "If-Modified-Since: ");
849 add_to_string(&header, conn->cached->last_modified);
850 add_crlf_to_string(&header);
852 if (conn->cached->etag) {
853 add_to_string(&header, "If-None-Match: ");
854 add_to_string(&header, conn->cached->etag);
855 add_crlf_to_string(&header);
860 /* CONNECT: Let's send cache control headers to the proxy too;
861 * they may affect DNS caching. */
862 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
863 add_to_string(&header, "Pragma: no-cache");
864 add_crlf_to_string(&header);
865 add_to_string(&header, "Cache-Control: no-cache");
866 add_crlf_to_string(&header);
869 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
870 * do anything good with that information anyway. */
871 if (!use_connect && (conn->from || conn->progress->start > 0)) {
872 /* conn->from takes precedence. conn->progress.start is set only the first
873 * time, then conn->from gets updated and in case of any retries
874 * etc we have everything interesting in conn->from already. */
875 add_to_string(&header, "Range: bytes=");
876 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
877 add_char_to_string(&header, '-');
878 add_crlf_to_string(&header);
881 /* CONNECT: The Authorization header is for the origin server only. */
882 if (!use_connect) {
883 #ifdef CONFIG_GSSAPI
884 if (http_negotiate_output(uri, &header) != 0)
885 #endif
886 entry = find_auth(uri);
889 if (entry) {
890 if (entry->digest) {
891 unsigned char *response;
893 response = get_http_auth_digest_response(entry, uri);
894 if (response) {
895 add_to_string(&header, "Authorization: Digest ");
896 add_to_string(&header, response);
897 add_crlf_to_string(&header);
899 mem_free(response);
902 } else {
903 /* RFC2617 section 2 [Basic Authentication Scheme]
905 * To receive authorization, the client sends the userid
906 * and password, separated by a single colon (":")
907 * character, within a base64 [7] encoded string in the
908 * credentials. */
909 unsigned char *id;
911 /* Create base64 encoded string. */
912 id = straconcat(entry->user, ":", entry->password,
913 (unsigned char *) NULL);
914 if (id) {
915 unsigned char *base64 = base64_encode(id);
917 mem_free_set(&id, base64);
920 if (id) {
921 add_to_string(&header, "Authorization: Basic ");
922 add_to_string(&header, id);
923 add_crlf_to_string(&header);
924 mem_free(id);
929 /* CONNECT: Any POST data is for the origin server only. */
930 if (!use_connect && uri->post) {
931 /* We search for first '\n' in uri->post to get content type
932 * as set by get_form_uri(). This '\n' is dropped if any
933 * and replaced by correct '\r\n' termination here. */
934 unsigned char *postend = strchr(uri->post, '\n');
936 if (postend) {
937 add_to_string(&header, "Content-Type: ");
938 add_bytes_to_string(&header, uri->post, postend - uri->post);
939 add_crlf_to_string(&header);
942 post_data = postend ? postend + 1 : uri->post;
943 add_to_string(&header, "Content-Length: ");
944 add_long_to_string(&header, strlen(post_data) / 2);
945 add_crlf_to_string(&header);
948 #ifdef CONFIG_COOKIES
949 /* CONNECT: Cookies are for the origin server only. */
950 if (!use_connect) {
951 struct string *cookies = send_cookies(uri);
953 if (cookies) {
954 add_to_string(&header, "Cookie: ");
955 add_string_to_string(&header, cookies);
956 add_crlf_to_string(&header);
957 done_string(cookies);
960 #endif
962 add_crlf_to_string(&header);
964 /* CONNECT: Any POST data is for the origin server only.
965 * This was already checked above and post_data is NULL
966 * in that case. Verified with an assertion below. */
967 if (post_data) {
968 #define POST_BUFFER_SIZE 4096
969 unsigned char *post = post_data;
970 unsigned char buffer[POST_BUFFER_SIZE];
971 int n = 0;
973 assert(!use_connect); /* see comment above */
975 while (post[0] && post[1]) {
976 int h1, h2;
978 h1 = unhx(post[0]);
979 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
980 if_assert_failed h1 = 0;
982 h2 = unhx(post[1]);
983 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
984 if_assert_failed h2 = 0;
986 buffer[n++] = (h1<<4) + h2;
987 post += 2;
988 if (n == POST_BUFFER_SIZE) {
989 add_bytes_to_string(&header, buffer, n);
990 n = 0;
994 if (n)
995 add_bytes_to_string(&header, buffer, n);
996 #undef POST_BUFFER_SIZE
999 request_from_socket(socket, header.source, header.length,
1000 connection_state(S_SENT),
1001 SOCKET_END_ONCLOSE, http_got_header);
1002 done_string(&header);
1006 /* This function decompresses the data block given in @data (if it was
1007 * compressed), which is long @len bytes. The decompressed data block is given
1008 * back to the world as the return value and its length is stored into
1009 * @new_len. After this function returns, the caller will discard all the @len
1010 * input bytes, so this function must use all of them unless an error occurs.
1012 * In this function, value of either http->chunk_remaining or http->length is
1013 * being changed (it depends on if chunked mode is used or not).
1015 * Note that the function is still a little esotheric for me. Don't take it
1016 * lightly and don't mess with it without grave reason! If you dare to touch
1017 * this without testing the changes on slashdot, freshmeat and cvsweb
1018 * (including revision history), don't dare to send me any patches! ;) --pasky
1020 * This function gotta die. */
1021 static unsigned char *
1022 decompress_data(struct connection *conn, unsigned char *data, int len,
1023 int *new_len)
1025 struct http_connection_info *http = conn->info;
1026 enum { NORMAL, FINISHING } state = NORMAL;
1027 int did_read = 0;
1028 int *length_of_block;
1029 unsigned char *output = NULL;
1031 #define BIG_READ 655360
1033 if (http->length == LEN_CHUNKED) {
1034 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
1035 state = FINISHING;
1036 length_of_block = &http->chunk_remaining;
1037 } else {
1038 length_of_block = &http->length;
1039 if (!*length_of_block) {
1040 /* Going to finish this decoding bussiness. */
1041 state = FINISHING;
1045 if (conn->content_encoding == ENCODING_NONE) {
1046 *new_len = len;
1047 if (*length_of_block > 0) *length_of_block -= len;
1048 return data;
1051 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1053 if (conn->stream_pipes[0] == -1
1054 && (c_pipe(conn->stream_pipes) < 0
1055 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1056 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1057 return NULL;
1060 do {
1061 unsigned char *tmp;
1063 if (state == NORMAL) {
1064 /* ... we aren't finishing yet. */
1065 int written = safe_write(conn->stream_pipes[1], data, len);
1067 if (written >= 0) {
1068 data += written;
1069 len -= written;
1071 /* In non-keep-alive connections http->length == -1, so the test below */
1072 if (*length_of_block > 0)
1073 *length_of_block -= written;
1074 /* http->length is 0 at the end of block for all modes: keep-alive,
1075 * non-keep-alive and chunked */
1076 if (!http->length) {
1077 /* That's all, folks - let's finish this. */
1078 state = FINISHING;
1079 } else if (!len) {
1080 /* We've done for this round (but not done
1081 * completely). Thus we will get out with
1082 * what we have and leave what we wrote to
1083 * the next round - we have to do that since
1084 * we MUST NOT ever empty the pipe completely
1085 * - this would cause a disaster for
1086 * read_encoded(), which would simply not
1087 * work right then. */
1088 return output;
1093 if (!conn->stream) {
1094 conn->stream = open_encoded(conn->stream_pipes[0],
1095 conn->content_encoding);
1096 if (!conn->stream) return NULL;
1099 tmp = mem_realloc(output, *new_len + BIG_READ);
1100 if (!tmp) break;
1101 output = tmp;
1103 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1105 /* Do not break from the loop if did_read == 0. It
1106 * means no decoded data is available yet, but some may
1107 * become available later. This happens especially with
1108 * the bzip2 decoder, which needs an entire compressed
1109 * block as input before it generates any output. */
1110 if (did_read < 0) {
1111 state = FINISHING;
1112 break;
1114 *new_len += did_read;
1115 } while (len || (did_read == BIG_READ));
1117 if (state == FINISHING) shutdown_connection_stream(conn);
1118 return output;
1119 #undef BIG_READ
1122 static int
1123 is_line_in_buffer(struct read_buffer *rb)
1125 int l;
1127 for (l = 0; l < rb->length; l++) {
1128 unsigned char a0 = rb->data[l];
1130 if (a0 == ASCII_LF)
1131 return l + 1;
1132 if (a0 == ASCII_CR) {
1133 if (rb->data[l + 1] == ASCII_LF
1134 && l < rb->length - 1)
1135 return l + 2;
1136 if (l == rb->length - 1)
1137 return 0;
1139 if (a0 < ' ')
1140 return -1;
1142 return 0;
1145 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1147 static void
1148 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1149 int already_got_anything)
1151 struct connection_state state = already_got_anything
1152 ? connection_state(S_TRANS) : conn->state;
1154 read_from_socket(conn->socket, rb, state, read_http_data);
1157 static void
1158 read_http_data_done(struct connection *conn)
1160 struct http_connection_info *http = conn->info;
1162 /* There's no content but an error so just print
1163 * that instead of nothing. */
1164 if (!conn->from) {
1165 if (http->code >= 400) {
1166 http_error_document(conn, http->code);
1168 } else {
1169 /* This is not an error, thus fine. No need generate any
1170 * document, as this may be empty and it's not a problem.
1171 * In case of 3xx, we're probably just getting kicked to
1172 * another page anyway. And in case of 2xx, the document
1173 * may indeed be empty and thus the user should see it so. */
1177 http_end_request(conn, connection_state(S_OK), 0);
1180 /* Returns:
1181 * -1 on error
1182 * 0 if more to read
1183 * 1 if done
1185 static int
1186 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1188 struct http_connection_info *http = conn->info;
1189 int total_data_len = 0;
1191 while (1) {
1192 /* Chunked. Good luck! */
1193 /* See RFC2616, section 3.6.1. Basically, it looks like:
1194 * 1234 ; a = b ; c = d\r\n
1195 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1196 * 0\r\n
1197 * \r\n */
1198 if (http->chunk_remaining == CHUNK_DATA_END) {
1199 int l = is_line_in_buffer(rb);
1201 if (l) {
1202 if (l == -1) {
1203 /* Invalid character in buffer. */
1204 return -1;
1207 /* Remove everything to the EOLN. */
1208 kill_buffer_data(rb, l);
1209 if (l <= 2) {
1210 /* Empty line. */
1211 return 2;
1213 continue;
1216 } else if (http->chunk_remaining == CHUNK_SIZE) {
1217 int l = is_line_in_buffer(rb);
1219 if (l) {
1220 unsigned char *de;
1221 int n = 0;
1223 if (l != -1) {
1224 errno = 0;
1225 n = strtol(rb->data, (char **) &de, 16);
1226 if (errno || !*de) {
1227 return -1;
1231 if (l == -1 || de == rb->data) {
1232 return -1;
1235 /* Remove everything to the EOLN. */
1236 kill_buffer_data(rb, l);
1237 http->chunk_remaining = n;
1238 if (!http->chunk_remaining)
1239 http->chunk_remaining = CHUNK_ZERO_SIZE;
1240 continue;
1243 } else {
1244 unsigned char *data;
1245 int data_len;
1246 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1247 int len = zero ? 0 : http->chunk_remaining;
1249 /* Maybe everything necessary didn't come yet.. */
1250 int_upper_bound(&len, rb->length);
1251 conn->received += len;
1253 data = decompress_data(conn, rb->data, len, &data_len);
1255 if (add_fragment(conn->cached, conn->from,
1256 data, data_len) == 1)
1257 conn->tries = 0;
1259 if (data && data != rb->data) mem_free(data);
1261 conn->from += data_len;
1262 total_data_len += data_len;
1264 kill_buffer_data(rb, len);
1266 if (zero) {
1267 /* Last chunk has zero length, so this is last
1268 * chunk, we finished decompression just now
1269 * and now we can happily finish reading this
1270 * stuff. */
1271 http->chunk_remaining = CHUNK_DATA_END;
1272 continue;
1275 if (!http->chunk_remaining && rb->length > 0) {
1276 /* Eat newline succeeding each chunk. */
1277 if (rb->data[0] == ASCII_LF) {
1278 kill_buffer_data(rb, 1);
1279 } else {
1280 if (rb->data[0] != ASCII_CR
1281 || (rb->length >= 2
1282 && rb->data[1] != ASCII_LF)) {
1283 return -1;
1285 if (rb->length < 2) break;
1286 kill_buffer_data(rb, 2);
1288 http->chunk_remaining = CHUNK_SIZE;
1289 continue;
1292 break;
1295 /* More to read. */
1296 return !!total_data_len;
1299 /* Returns 0 if more data, 1 if done. */
1300 static int
1301 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1303 struct http_connection_info *http = conn->info;
1304 unsigned char *data;
1305 int data_len;
1306 int len = rb->length;
1308 if (http->length >= 0 && http->length < len) {
1309 /* We won't read more than we have to go. */
1310 len = http->length;
1313 conn->received += len;
1315 data = decompress_data(conn, rb->data, len, &data_len);
1317 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1318 conn->tries = 0;
1320 if (data && data != rb->data) mem_free(data);
1322 conn->from += data_len;
1324 kill_buffer_data(rb, len);
1326 if (!http->length && (conn->socket->state == SOCKET_RETRY_ONCLOSE
1327 || conn->socket->state == SOCKET_CLOSED)) {
1328 return 2;
1331 return !!data_len;
1334 static void
1335 read_http_data(struct socket *socket, struct read_buffer *rb)
1337 struct connection *conn = socket->conn;
1338 struct http_connection_info *http = conn->info;
1339 int ret;
1341 if (socket->state == SOCKET_CLOSED) {
1342 if (conn->content_encoding) {
1343 /* Flush decompression first. */
1344 http->length = 0;
1345 } else {
1346 read_http_data_done(conn);
1347 return;
1351 if (http->length != LEN_CHUNKED) {
1352 ret = read_normal_http_data(conn, rb);
1354 } else {
1355 ret = read_chunked_http_data(conn, rb);
1358 switch (ret) {
1359 case 0:
1360 read_more_http_data(conn, rb, 0);
1361 break;
1362 case 1:
1363 read_more_http_data(conn, rb, 1);
1364 break;
1365 case 2:
1366 read_http_data_done(conn);
1367 break;
1368 default:
1369 assertm(ret == -1, "Unexpected return value: %d", ret);
1370 abort_connection(conn, connection_state(S_HTTP_ERROR));
1374 /* Returns offset of the header end, zero if more data is needed, -1 when
1375 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1376 * come. */
1377 static int
1378 get_header(struct read_buffer *rb)
1380 int i;
1382 /* XXX: We will have to do some guess about whether an HTTP header is
1383 * coming or not, in order to support HTTP/0.9 reply correctly. This
1384 * means a little code duplication with get_http_code(). --pasky */
1385 if (rb->length > 4 && c_strncasecmp(rb->data, "HTTP/", 5))
1386 return -2;
1388 for (i = 0; i < rb->length; i++) {
1389 unsigned char a0 = rb->data[i];
1390 unsigned char a1 = rb->data[i + 1];
1392 if (a0 == 0) {
1393 rb->data[i] = ' ';
1394 continue;
1396 if (a0 == ASCII_LF && a1 == ASCII_LF
1397 && i < rb->length - 1)
1398 return i + 2;
1399 if (a0 == ASCII_CR && i < rb->length - 3) {
1400 if (a1 == ASCII_CR) continue;
1401 if (a1 != ASCII_LF) return -1;
1402 if (rb->data[i + 2] == ASCII_CR) {
1403 if (rb->data[i + 3] != ASCII_LF) return -1;
1404 return i + 4;
1409 return 0;
1412 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1413 static int
1414 check_http_authentication(struct connection *conn, struct uri *uri,
1415 unsigned char *header, unsigned char *header_field)
1417 unsigned char *str, *d;
1418 int ret = 0;
1420 d = parse_header(header, header_field, &str);
1421 while (d) {
1422 if (!c_strncasecmp(d, "Basic", 5)) {
1423 unsigned char *realm = get_header_param(d, "realm");
1425 if (realm) {
1426 add_auth_entry(uri, realm, NULL, NULL, 0);
1427 mem_free(realm);
1428 mem_free(d);
1429 break;
1431 } else if (!c_strncasecmp(d, "Digest", 6)) {
1432 unsigned char *realm = get_header_param(d, "realm");
1433 unsigned char *nonce = get_header_param(d, "nonce");
1434 unsigned char *opaque = get_header_param(d, "opaque");
1436 add_auth_entry(uri, realm, nonce, opaque, 1);
1438 mem_free_if(realm);
1439 mem_free_if(nonce);
1440 mem_free_if(opaque);
1441 mem_free(d);
1442 break;
1444 #ifdef CONFIG_GSSAPI
1445 else if (!c_strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1446 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1447 ret = 1;
1448 mem_free(d);
1449 break;
1451 else if (!c_strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1452 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1453 ret = 1;
1454 mem_free(d);
1455 break;
1457 #endif
1458 mem_free(d);
1459 d = parse_header(str, header_field, &str);
1461 return ret;
1465 void
1466 http_got_header(struct socket *socket, struct read_buffer *rb)
1468 struct connection *conn = socket->conn;
1469 struct http_connection_info *http = conn->info;
1470 unsigned char *head;
1471 #ifdef CONFIG_COOKIES
1472 unsigned char *cookie, *ch;
1473 #endif
1474 unsigned char *d;
1475 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1476 struct http_version version = { 0, 9 };
1477 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1478 ? connection_state(S_GETH)
1479 : connection_state(S_PROC));
1480 int a, h = 200;
1481 int cf;
1483 if (socket->state == SOCKET_CLOSED) {
1484 if (!conn->tries && uri->host) {
1485 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1486 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1487 } else {
1488 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1489 conn->tries = -1;
1492 retry_connection(conn, connection_state(S_CANT_READ));
1493 return;
1495 socket->state = SOCKET_RETRY_ONCLOSE;
1497 again:
1498 a = get_header(rb);
1499 if (a == -1) {
1500 abort_connection(conn, connection_state(S_HTTP_ERROR));
1501 return;
1503 if (!a) {
1504 read_from_socket(conn->socket, rb, state, http_got_header);
1505 return;
1507 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1508 * the get_http_code call; @h and @version have already been
1509 * initialized with the right values. */
1510 if (a == -2) a = 0;
1511 if ((a && get_http_code(rb, &h, &version))
1512 || h == 101) {
1513 abort_connection(conn, connection_state(S_HTTP_ERROR));
1514 return;
1517 /* When no header, HTTP/0.9 document. That's always text/html,
1518 * according to
1519 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1520 /* FIXME: This usage of fake protocol headers for setting up the
1521 * content type has been obsoleted by the @content_type member of
1522 * {struct cache_entry}. */
1523 head = (a ? memacpy(rb->data, a)
1524 : stracpy("\r\nContent-Type: text/html\r\n"));
1525 if (!head) {
1526 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1527 return;
1530 if (check_http_server_bugs(uri, http, head)) {
1531 mem_free(head);
1532 retry_connection(conn, connection_state(S_RESTART));
1533 return;
1536 #ifdef CONFIG_CGI
1537 if (uri->protocol == PROTOCOL_FILE) {
1538 /* ``Status'' is not a standard HTTP header field although some
1539 * HTTP servers like www.php.net uses it for some reason. It should
1540 * only be used for CGI scripts so that it does not interfere
1541 * with status code depended handling for ``normal'' HTTP like
1542 * redirects. */
1543 d = parse_header(head, "Status", NULL);
1544 if (d) {
1545 int h2 = atoi(d);
1547 mem_free(d);
1548 if (h2 >= 100 && h2 < 600) h = h2;
1549 if (h == 101) {
1550 mem_free(head);
1551 abort_connection(conn, connection_state(S_HTTP_ERROR));
1552 return;
1556 #endif
1558 #ifdef CONFIG_COOKIES
1559 ch = head;
1560 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1561 set_cookie(uri, cookie);
1562 mem_free(cookie);
1564 #endif
1565 http->code = h;
1567 if (h == 100) {
1568 mem_free(head);
1569 state = connection_state(S_PROC);
1570 kill_buffer_data(rb, a);
1571 goto again;
1573 if (h < 200) {
1574 mem_free(head);
1575 abort_connection(conn, connection_state(S_HTTP_ERROR));
1576 return;
1578 if (h == 304) {
1579 mem_free(head);
1580 http_end_request(conn, connection_state(S_OK), 1);
1581 return;
1583 if (h == 204) {
1584 mem_free(head);
1585 http_end_request(conn, connection_state(S_HTTP_204), 0);
1586 return;
1588 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1589 #ifdef CONFIG_SSL
1590 mem_free(head);
1591 socket->need_ssl = 1;
1592 complete_connect_socket(socket, uri, http_send_header);
1593 #else
1594 abort_connection(conn, connection_state(S_SSL_ERROR));
1595 #endif
1596 return;
1599 conn->cached = get_cache_entry(conn->uri);
1600 if (!conn->cached) {
1601 mem_free(head);
1602 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1603 return;
1605 conn->cached->cgi = conn->cgi;
1606 mem_free_set(&conn->cached->head, head);
1608 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1609 struct cache_entry *cached = conn->cached;
1611 /* I am not entirely sure in what order we should process these
1612 * headers and if we should still process Cache-Control max-age
1613 * if we already set max age to date mentioned in Expires.
1614 * --jonas */
1615 /* Ensure that when ever cached->max_age is set, cached->expired
1616 * is also set, so the cache management knows max_age contains a
1617 * valid time. If on the other hand no caching is requested
1618 * cached->expire should be set to zero. */
1619 if ((d = parse_header(cached->head, "Expires", NULL))) {
1620 /* Convert date to seconds. */
1621 time_t expires = parse_date(&d, NULL, 0, 1);
1623 mem_free(d);
1625 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1626 timeval_from_seconds(&cached->max_age, expires);
1627 cached->expire = 1;
1631 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1632 if (strstr(d, "no-cache")) {
1633 cached->cache_mode = CACHE_MODE_NEVER;
1634 cached->expire = 0;
1636 mem_free(d);
1639 if (cached->cache_mode != CACHE_MODE_NEVER
1640 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1641 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1642 cached->cache_mode = CACHE_MODE_NEVER;
1643 cached->expire = 0;
1645 } else {
1646 unsigned char *pos = strstr(d, "max-age=");
1648 assert(cached->cache_mode != CACHE_MODE_NEVER);
1650 if (pos) {
1651 /* Grab the number of seconds. */
1652 timeval_T max_age;
1654 timeval_from_seconds(&max_age, atol(pos + 8));
1655 timeval_now(&cached->max_age);
1656 timeval_add_interval(&cached->max_age, &max_age);
1658 cached->expire = 1;
1662 mem_free(d);
1666 /* XXX: Is there some reason why NOT to follow the Location header
1667 * for any status? If the server didn't mean it, it wouldn't send
1668 * it, after all...? --pasky */
1669 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1670 d = parse_header(conn->cached->head, "Location", NULL);
1671 if (d) {
1672 int use_get_method = (h == 303);
1674 /* A note from RFC 2616 section 10.3.3:
1675 * RFC 1945 and RFC 2068 specify that the client is not
1676 * allowed to change the method on the redirected
1677 * request. However, most existing user agent
1678 * implementations treat 302 as if it were a 303
1679 * response, performing a GET on the Location
1680 * field-value regardless of the original request
1681 * method. */
1682 /* So POST must not be redirected to GET, but some
1683 * BUGGY message boards rely on it :-( */
1684 if (h == 302
1685 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1686 use_get_method = 1;
1688 redirect_cache(conn->cached, d, use_get_method, -1);
1689 mem_free(d);
1693 if (h == 401) {
1694 if (check_http_authentication(conn, uri,
1695 conn->cached->head, "WWW-Authenticate")) {
1696 retry_connection(conn, connection_state(S_RESTART));
1697 return;
1701 if (h == 407) {
1702 unsigned char *str;
1704 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1705 while (d) {
1706 if (!c_strncasecmp(d, "Basic", 5)) {
1707 unsigned char *realm = get_header_param(d, "realm");
1709 if (realm) {
1710 mem_free_set(&proxy_auth.realm, realm);
1711 proxy_auth.digest = 0;
1712 mem_free(d);
1713 break;
1716 } else if (!c_strncasecmp(d, "Digest", 6)) {
1717 unsigned char *realm = get_header_param(d, "realm");
1718 unsigned char *nonce = get_header_param(d, "nonce");
1719 unsigned char *opaque = get_header_param(d, "opaque");
1721 mem_free_set(&proxy_auth.realm, realm);
1722 mem_free_set(&proxy_auth.nonce, nonce);
1723 mem_free_set(&proxy_auth.opaque, opaque);
1724 proxy_auth.digest = 1;
1726 mem_free(d);
1727 break;
1730 mem_free(d);
1731 d = parse_header(str, "Proxy-Authenticate", &str);
1735 kill_buffer_data(rb, a);
1736 http->close = 0;
1737 http->length = -1;
1738 http->recv_version = version;
1740 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1741 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1742 if (!c_strcasecmp(d, "close")) http->close = 1;
1743 mem_free(d);
1744 } else if (PRE_HTTP_1_1(version)) {
1745 http->close = 1;
1748 cf = conn->from;
1749 conn->from = 0;
1750 d = parse_header(conn->cached->head, "Content-Range", NULL);
1751 if (d) {
1752 if (strlen(d) > 6) {
1753 d[5] = 0;
1754 if (isdigit(d[6]) && !c_strcasecmp(d, "bytes")) {
1755 int f;
1757 errno = 0;
1758 f = strtol(d + 6, NULL, 10);
1760 if (!errno && f >= 0) conn->from = f;
1763 mem_free(d);
1765 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1766 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1767 /* We don't want this if conn->progress.start because then conn->from will
1768 * be probably value of conn->progress.start, while cf is 0. */
1769 abort_connection(conn, connection_state(S_HTTP_ERROR));
1770 return;
1773 #if 0
1775 struct status *s;
1776 foreach (s, conn->downloads) {
1777 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1778 conn, s, s->pri, s->state, s->prev_error,
1779 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1782 #endif
1784 if (conn->progress->start >= 0) {
1785 /* Update to the real value which we've got from Content-Range. */
1786 conn->progress->seek = conn->from;
1788 conn->progress->start = conn->from;
1790 d = parse_header(conn->cached->head, "Content-Length", NULL);
1791 if (d) {
1792 unsigned char *ep;
1793 int l;
1795 errno = 0;
1796 l = strtol(d, (char **) &ep, 10);
1798 if (!errno && !*ep && l >= 0) {
1799 if (!http->close || POST_HTTP_1_0(version))
1800 http->length = l;
1801 conn->est_length = conn->from + l;
1803 mem_free(d);
1806 if (!conn->unrestartable) {
1807 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1809 if (d) {
1810 if (!c_strcasecmp(d, "none"))
1811 conn->unrestartable = 1;
1812 mem_free(d);
1813 } else {
1814 if (!conn->from)
1815 conn->unrestartable = 1;
1819 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1820 if (d) {
1821 if (!c_strcasecmp(d, "chunked")) {
1822 http->length = LEN_CHUNKED;
1823 http->chunk_remaining = CHUNK_SIZE;
1825 mem_free(d);
1827 if (!http->close && http->length == -1) http->close = 1;
1829 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1830 if (d) {
1831 if (conn->cached->last_modified && c_strcasecmp(conn->cached->last_modified, d)) {
1832 delete_entry_content(conn->cached);
1833 if (conn->from) {
1834 conn->from = 0;
1835 mem_free(d);
1836 retry_connection(conn, connection_state(S_MODIFIED));
1837 return;
1840 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1841 else mem_free(d);
1843 if (!conn->cached->last_modified) {
1844 d = parse_header(conn->cached->head, "Date", NULL);
1845 if (d) conn->cached->last_modified = d;
1848 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1849 d = parse_header(conn->cached->head, "ETag", NULL);
1850 if (d) {
1851 if (conn->cached->etag) {
1852 unsigned char *old_tag = conn->cached->etag;
1853 unsigned char *new_tag = d;
1855 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1857 if (new_tag[0] == 'W' && new_tag[1] == '/')
1858 new_tag += 2;
1860 if (old_tag[0] == 'W' && old_tag[1] == '/')
1861 old_tag += 2;
1863 if (strcmp(new_tag, old_tag)) {
1864 delete_entry_content(conn->cached);
1865 if (conn->from) {
1866 conn->from = 0;
1867 mem_free(d);
1868 retry_connection(conn, connection_state(S_MODIFIED));
1869 return;
1874 if (!conn->cached->etag)
1875 conn->cached->etag = d;
1876 else
1877 mem_free(d);
1880 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1881 if (d) {
1882 unsigned char *extension = get_extension_from_uri(uri);
1883 enum stream_encoding file_encoding;
1885 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1886 mem_free_if(extension);
1888 /* If the content is encoded, we want to preserve the encoding
1889 * if it is implied by the extension, so that saving the URI
1890 * will leave the saved file with the correct encoding. */
1891 #ifdef CONFIG_GZIP
1892 if (file_encoding != ENCODING_GZIP
1893 && (!c_strcasecmp(d, "gzip") || !c_strcasecmp(d, "x-gzip")))
1894 conn->content_encoding = ENCODING_GZIP;
1895 if (!c_strcasecmp(d, "deflate") || !c_strcasecmp(d, "x-deflate"))
1896 conn->content_encoding = ENCODING_DEFLATE;
1897 #endif
1899 #ifdef CONFIG_BZIP2
1900 if (file_encoding != ENCODING_BZIP2
1901 && (!c_strcasecmp(d, "bzip2") || !c_strcasecmp(d, "x-bzip2")))
1902 conn->content_encoding = ENCODING_BZIP2;
1903 #endif
1905 #ifdef CONFIG_LZMA
1906 if (file_encoding != ENCODING_LZMA
1907 && (!c_strcasecmp(d, "lzma") || !c_strcasecmp(d, "x-lzma")))
1908 conn->content_encoding = ENCODING_LZMA;
1909 #endif
1910 mem_free(d);
1913 if (conn->content_encoding != ENCODING_NONE) {
1914 mem_free_if(conn->cached->encoding_info);
1915 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1918 if (http->length == -1 || http->close)
1919 socket->state = SOCKET_END_ONCLOSE;
1921 read_http_data(socket, rb);