bug 1068: Decompress data when the socket is closed.
[elinks.git] / src / protocol / http / http.c
blob650b19f3542ed677572b62dc97d604c63e05436f
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_LIMITS_H
12 #include <limits.h>
13 #endif
15 #include "elinks.h"
17 #include "cache/cache.h"
18 #include "config/options.h"
19 #include "cookies/cookies.h"
20 #include "intl/charsets.h"
21 #include "intl/gettext/libintl.h"
22 #include "main/module.h"
23 #include "network/connection.h"
24 #include "network/progress.h"
25 #include "network/socket.h"
26 #include "osdep/ascii.h"
27 #include "osdep/osdep.h"
28 #include "osdep/sysname.h"
29 #include "protocol/auth/auth.h"
30 #include "protocol/auth/digest.h"
31 #include "protocol/date.h"
32 #include "protocol/header.h"
33 #include "protocol/http/blacklist.h"
34 #include "protocol/http/codes.h"
35 #include "protocol/http/http.h"
36 #include "protocol/uri.h"
37 #include "session/session.h"
38 #include "terminal/terminal.h"
39 #include "util/base64.h"
40 #include "util/conv.h"
41 #include "util/memory.h"
42 #include "util/string.h"
44 #ifdef CONFIG_GSSAPI
45 #include "http_negotiate.h"
46 #endif
48 /* These macros concern the struct http_version defined in the http.h */
49 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
50 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
51 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
52 #define PRE_HTTP_1_0(x) ((x).major < 1)
53 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
54 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
55 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
58 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
59 #define LEN_FINISHED 0
61 /* Either bytes coming in this chunk yet or "parser state". */
62 #define CHUNK_DATA_END -3
63 #define CHUNK_ZERO_SIZE -2
64 #define CHUNK_SIZE -1
66 static struct auth_entry proxy_auth;
68 static unsigned char *accept_charset = NULL;
71 static struct option_info http_options[] = {
72 INIT_OPT_TREE("protocol", N_("HTTP"),
73 "http", 0,
74 N_("HTTP-specific options.")),
77 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
78 "bugs", 0,
79 N_("Server-side HTTP bugs workarounds.")),
81 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
82 "accept_charset", 0, 1,
83 N_("The Accept-Charset header is quite long and sending it can trigger\n"
84 "bugs in some rarely found servers.")),
86 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
87 "allow_blacklist", 0, 1,
88 N_("Allow blacklisting of buggy servers.")),
90 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
91 "broken_302_redirect", 0, 1,
92 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
93 "This is a problem for a lot of web discussion boards and the like.\n"
94 "If they will do strange things to you, try to play with this.")),
96 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
97 "post_no_keepalive", 0, 0,
98 N_("Disable keepalive connection after POST request.")),
100 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
101 "http10", 0, 0,
102 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
104 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
105 "proxy", 0,
106 N_("HTTP proxy configuration.")),
108 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
109 "host", 0, "",
110 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
111 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
113 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
114 "user", 0, "",
115 N_("Proxy authentication username.")),
117 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
118 "passwd", 0, "",
119 N_("Proxy authentication password.")),
122 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
123 "referer", 0,
124 N_("HTTP referer sending options. HTTP referer is a special header\n"
125 "sent in the HTTP requests, which is supposed to contain the previous\n"
126 "page visited by the browser. This way, the server can know what link\n"
127 "did you follow when accessing that page. However, this behaviour\n"
128 "can unfortunately considerably affect privacy and can lead even to a\n"
129 "security problem on some badly designed web pages.")),
131 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
132 "policy", 0,
133 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
134 N_("Mode of sending HTTP referer:\n"
135 "0 is send no referer\n"
136 "1 is send current URL as referer\n"
137 "2 is send fixed fake referer\n"
138 "3 is send previous URL as referer (correct, but insecure)")),
140 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
141 "fake", 0, "",
142 N_("Fake referer to be sent when policy is 2.")),
145 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
146 "accept_language", 0, "",
147 N_("Send Accept-Language header.")),
149 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
150 "accept_ui_language", 0, 1,
151 N_("Request localised versions of documents from web-servers (using the\n"
152 "Accept-Language header) using the language you have configured for\n"
153 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
154 "value available to scripts). Note that some see this as a potential\n"
155 "security risk because it tells web-masters and the FBI sniffers about\n"
156 "your language preference.")),
158 /* After the compression support has been tested enough,
159 * we might wrap this option in #if CFG_DEBUG. */
160 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
161 "compression", 0, 1,
162 N_("If enabled, the capability to receive compressed content (gzip and/or\n"
163 "bzip2) is announced to the server, which usually sends the reply\n"
164 "compressed, thus saving some bandwidth at slight CPU expense.\n"
165 "\n"
166 "If ELinks displays a incomplete page or garbage, try disabling this\n"
167 "option. If that helps, there may be a bug in the decompression part\n"
168 "of ELinks. Please report such bugs.\n"
169 "\n"
170 "If ELinks has been compiled without compression support, this option\n"
171 "has no effect. To check the supported features, see Help -> About.")),
173 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
174 "trace", 0, 0,
175 N_("If active, all HTTP requests are sent with TRACE as their method\n"
176 "rather than GET or POST. This is useful for debugging of both ELinks\n"
177 "and various server-side scripts --- the server only returns the client's\n"
178 "request back to the client verbatim. Note that this type of request may\n"
179 "not be enabled on all servers.")),
181 /* OSNews.com is supposed to be relying on the textmode token, at least. */
182 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
183 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
184 N_("Change the User Agent ID. That means identification string, which\n"
185 "is sent to HTTP server when a document is requested. The 'textmode'\n"
186 "token in the first field is our silent attempt to establish this as\n"
187 "a standard for new textmode user agents, so that the webmasters can\n"
188 "have just a single uniform test for these if they are e.g. pushing\n"
189 "some lite version to them automagically.\n"
190 "Use \" \" if you don't want any User-Agent header to be sent at all.\n"
191 "%v in the string means ELinks version,\n"
192 "%s in the string means system identification,\n"
193 "%t in the string means size of the terminal,\n"
194 "%b in the string means number of bars displayed by ELinks.")),
197 INIT_OPT_TREE("protocol", N_("HTTPS"),
198 "https", 0,
199 N_("HTTPS-specific options.")),
201 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
202 "proxy", 0,
203 N_("HTTPS proxy configuration.")),
205 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
206 "host", 0, "",
207 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
208 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
209 NULL_OPTION_INFO,
212 static void done_http();
214 struct module http_protocol_module = struct_module(
215 /* name: */ N_("HTTP"),
216 /* options: */ http_options,
217 /* hooks: */ NULL,
218 /* submodules: */ NULL,
219 /* data: */ NULL,
220 /* init: */ NULL,
221 /* done: */ done_http
225 static void
226 done_http(void)
228 mem_free_if(proxy_auth.realm);
229 mem_free_if(proxy_auth.nonce);
230 mem_free_if(proxy_auth.opaque);
232 free_blacklist();
234 if (accept_charset)
235 mem_free(accept_charset);
238 static void
239 init_accept_charset(void)
241 struct string ac;
242 unsigned char *cs;
243 int i;
245 if (!init_string(&ac)) return;
247 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
248 if (ac.length) {
249 add_to_string(&ac, ", ");
250 } else {
251 add_to_string(&ac, "Accept-Charset: ");
253 add_to_string(&ac, cs);
256 if (ac.length) {
257 add_crlf_to_string(&ac);
260 accept_charset = squeezastring(&ac);
262 done_string(&ac);
266 unsigned char *
267 subst_user_agent(unsigned char *fmt, unsigned char *version,
268 unsigned char *sysname, unsigned char *termsize)
270 struct string agent;
272 if (!init_string(&agent)) return NULL;
274 while (*fmt) {
275 int p;
277 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
279 add_bytes_to_string(&agent, fmt, p);
280 fmt += p;
282 if (*fmt != '%') continue;
284 fmt++;
285 switch (*fmt) {
286 case 'b':
287 if (!list_empty(sessions)) {
288 unsigned char bs[4] = "";
289 int blen = 0;
290 struct session *ses = sessions.prev;
291 int bars = ses->status.show_status_bar
292 + ses->status.show_tabs_bar
293 + ses->status.show_title_bar;
295 ulongcat(bs, &blen, bars, 2, 0);
296 add_to_string(&agent, bs);
298 break;
299 case 'v':
300 add_to_string(&agent, version);
301 break;
302 case 's':
303 add_to_string(&agent, sysname);
304 break;
305 case 't':
306 if (termsize)
307 add_to_string(&agent, termsize);
308 break;
309 default:
310 add_bytes_to_string(&agent, fmt - 1, 2);
311 break;
313 if (*fmt) fmt++;
316 return agent.source;
319 static void
320 add_url_to_http_string(struct string *header, struct uri *uri, int components)
322 /* This block substitues spaces in URL by %20s. This is
323 * certainly not the right place where to do it, but now the
324 * behaviour is at least improved compared to what we had
325 * before. We should probably encode all URLs as early as
326 * possible, and possibly decode them back in protocol
327 * backends. --pasky */
328 unsigned char *string = get_uri_string(uri, components);
329 unsigned char *data = string;
331 if (!string) return;
333 while (*data) {
334 int len = strcspn(data, " \t\r\n\\");
336 add_bytes_to_string(header, data, len);
338 if (!data[len]) break;
340 if (data[len++] == '\\')
341 add_char_to_string(header, '/');
342 else
343 add_to_string(header, "%20");
345 data += len;
348 mem_free(string);
351 /* Parse from @end - 1 to @start and set *@value to integer found.
352 * It returns -1 if not a number, 0 otherwise.
353 * @end should be > @start. */
354 static int
355 revstr2num(unsigned char *start, unsigned char *end, int *value)
357 int q = 1, val = 0;
359 do {
360 --end;
361 if (!isdigit(*end)) return -1; /* NaN */
362 val += (*end - '0') * q;
363 q *= 10;
364 } while (end > start);
366 *value = val;
367 return 0;
370 /* This function extracts code, major and minor version from string
371 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
372 * It returns a negative value on error, 0 on success.
374 static int
375 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
377 unsigned char *head = rb->data;
378 unsigned char *start;
380 *code = 0;
381 version->major = 0;
382 version->minor = 0;
384 /* Ignore spaces. */
385 while (*head == ' ') head++;
387 /* HTTP/ */
388 if (c_toupper(*head) != 'H' || c_toupper(*++head) != 'T' ||
389 c_toupper(*++head) != 'T' || c_toupper(*++head) != 'P'
390 || *++head != '/')
391 return -1;
393 /* Version */
394 start = ++head;
395 /* Find next '.' */
396 while (*head && *head != '.') head++;
397 /* Sanity check. */
398 if (!*head || !(head - start)
399 || (head - start) > 4
400 || !isdigit(*(head + 1)))
401 return -2;
403 /* Extract major version number. */
404 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
406 start = head + 1;
408 /* Find next ' '. */
409 while (*head && *head != ' ') head++;
410 /* Sanity check. */
411 if (!*head || !(head - start) || (head - start) > 4) return -4;
413 /* Extract minor version number. */
414 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
416 /* Ignore spaces. */
417 while (*head == ' ') head++;
419 /* Sanity check for code. */
420 if (head[0] < '1' || head[0] > '9' ||
421 !isdigit(head[1]) ||
422 !isdigit(head[2]))
423 return -6; /* Invalid code. */
425 /* Extract code. */
426 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
428 return 0;
431 static int
432 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
433 unsigned char *head)
435 unsigned char *server;
436 const unsigned char *const *s;
437 static const unsigned char *const buggy_servers[] = {
438 "mod_czech/3.1.0",
439 "Purveyor",
440 "Netscape-Enterprise",
441 NULL
444 if (!get_opt_bool("protocol.http.bugs.allow_blacklist", NULL)
445 || HTTP_1_0(http->sent_version))
446 return 0;
448 server = parse_header(head, "Server", NULL);
449 if (!server)
450 return 0;
452 for (s = buggy_servers; *s; s++) {
453 if (strstr(server, *s)) {
454 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
455 break;
459 mem_free(server);
460 return (*s != NULL);
463 static void
464 http_end_request(struct connection *conn, struct connection_state state,
465 int notrunc)
467 struct http_connection_info *http;
469 shutdown_connection_stream(conn);
471 /* shutdown_connection_stream() should not change conn->info,
472 * but in case it does, read conn->info only after the call. */
473 http = conn->info;
474 if (http)
475 done_http_post(&http->post);
477 if (http && !http->close
478 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL)
480 || !conn->uri->post)) {
481 if (is_in_state(state, S_OK) && conn->cached)
482 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
483 set_connection_state(conn, state);
484 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
485 } else {
486 abort_connection(conn, state);
490 static void http_send_header(struct socket *);
492 void
493 http_protocol_handler(struct connection *conn)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn)) {
498 make_connection(conn->socket, conn->uri, http_send_header,
499 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
500 } else {
501 http_send_header(conn->socket);
505 void
506 proxy_protocol_handler(struct connection *conn)
508 http_protocol_handler(conn);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 /** connection.done points to this function if connection.info points
517 * to a struct http_connection_info. */
518 static void
519 done_http_connection(struct connection *conn)
521 struct http_connection_info *http = conn->info;
523 done_http_post(&http->post);
524 mem_free(http);
525 conn->info = NULL;
526 conn->done = NULL;
529 struct http_connection_info *
530 init_http_connection_info(struct connection *conn, int major, int minor, int close)
532 struct http_connection_info *http;
534 http = mem_calloc(1, sizeof(*http));
535 if (!http) {
536 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
537 return NULL;
540 http->sent_version.major = major;
541 http->sent_version.minor = minor;
542 http->close = close;
544 init_http_post(&http->post);
546 /* The CGI code uses this too and blacklisting expects a host name. */
547 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
548 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
550 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
551 || get_opt_bool("protocol.http.bugs.http10", NULL)) {
552 http->sent_version.major = 1;
553 http->sent_version.minor = 0;
556 /* If called from HTTPS proxy connection the connection info might have
557 * already been allocated. */
558 if (conn->done) {
559 conn->done(conn);
560 conn->done = NULL;
562 mem_free_set(&conn->info, http);
563 conn->done = done_http_connection;
565 return http;
568 static void
569 accept_encoding_header(struct string *header)
571 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
572 int comma = 0;
574 add_to_string(header, "Accept-Encoding: ");
576 #ifdef CONFIG_BZIP2
577 add_to_string(header, "bzip2");
578 comma = 1;
579 #endif
581 #ifdef CONFIG_GZIP
582 if (comma) add_to_string(header, ", ");
583 add_to_string(header, "deflate, gzip");
584 comma = 1;
585 #endif
587 #ifdef CONFIG_LZMA
588 if (comma) add_to_string(header, ", ");
589 add_to_string(header, "lzma");
590 #endif
591 add_crlf_to_string(header);
592 #endif
595 #define POST_BUFFER_SIZE 16384
596 #define BIG_READ 655360
598 static void
599 send_more_post_data(struct socket *socket)
601 struct connection *conn = socket->conn;
602 struct http_connection_info *http = conn->info;
603 unsigned char buffer[POST_BUFFER_SIZE];
604 int got;
605 struct connection_state error;
607 got = read_http_post(&http->post, buffer, POST_BUFFER_SIZE, &error);
608 if (got < 0) {
609 http_end_request(conn, error, 0);
610 } else if (got > 0) {
611 write_to_socket(socket, buffer, got, connection_state(S_TRANS),
612 send_more_post_data);
613 } else { /* got == 0, meaning end of data */
614 /* Can't use request_from_socket() because there's no
615 * more data to write. */
616 struct read_buffer *rb = alloc_read_buffer(socket);
618 socket->state = SOCKET_END_ONCLOSE;
619 if (rb)
620 read_from_socket(socket, rb, connection_state(S_SENT),
621 http_got_header);
622 else
623 http_end_request(conn, connection_state(S_OUT_OF_MEM),
630 static void
631 http_send_header(struct socket *socket)
633 struct connection *conn = socket->conn;
634 struct http_connection_info *http;
635 int trace = get_opt_bool("protocol.http.trace", NULL);
636 struct string header;
637 unsigned char *post_data = NULL;
638 struct auth_entry *entry = NULL;
639 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
640 unsigned char *optstr;
641 int use_connect, talking_to_proxy;
643 /* Sanity check for a host */
644 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
645 http_end_request(conn, connection_state(S_BAD_URL), 0);
646 return;
649 http = init_http_connection_info(conn, 1, 1, 0);
650 if (!http) return;
652 if (!init_string(&header)) {
653 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
654 return;
657 if (!conn->cached) conn->cached = find_in_cache(uri);
659 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
660 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
662 if (trace) {
663 add_to_string(&header, "TRACE ");
664 } else if (use_connect) {
665 add_to_string(&header, "CONNECT ");
666 /* In CONNECT requests, we send only a subset of the
667 * headers to the proxy. See the "CONNECT:" comments
668 * below. After the CONNECT request succeeds, we
669 * negotiate TLS with the real server and make a new
670 * HTTP request that includes all the headers. */
671 } else if (uri->post) {
672 add_to_string(&header, "POST ");
673 conn->unrestartable = 1;
674 } else {
675 add_to_string(&header, "GET ");
678 if (!talking_to_proxy) {
679 add_char_to_string(&header, '/');
682 if (use_connect) {
683 /* Add port if it was specified or the default port */
684 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
685 } else {
686 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
687 add_url_to_http_string(&header, uri, URI_DATA);
689 } else if (talking_to_proxy) {
690 add_url_to_http_string(&header, uri, URI_PROXY);
692 } else {
693 add_url_to_http_string(&header, conn->uri, URI_DATA);
697 add_to_string(&header, " HTTP/");
698 add_long_to_string(&header, http->sent_version.major);
699 add_char_to_string(&header, '.');
700 add_long_to_string(&header, http->sent_version.minor);
701 add_crlf_to_string(&header);
703 /* CONNECT: Sending a Host header seems pointless as the same
704 * information is already in the CONNECT line. It's harmless
705 * though and Mozilla does it too. */
706 add_to_string(&header, "Host: ");
707 add_uri_to_string(&header, uri, URI_HTTP_HOST);
708 add_crlf_to_string(&header);
710 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
711 if (talking_to_proxy) {
712 unsigned char *user = get_opt_str("protocol.http.proxy.user", NULL);
713 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd", NULL);
715 if (proxy_auth.digest) {
716 unsigned char *response;
717 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
718 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
720 if (userlen)
721 memcpy(proxy_auth.user, user, userlen);
722 proxy_auth.user[userlen] = '\0';
723 if (passwordlen)
724 memcpy(proxy_auth.password, passwd, passwordlen);
725 proxy_auth.password[passwordlen] = '\0';
727 /* FIXME: @uri is the proxied URI. Maybe the passed URI
728 * should be the proxy URI aka conn->uri. --jonas */
729 response = get_http_auth_digest_response(&proxy_auth, uri);
730 if (response) {
731 add_to_string(&header, "Proxy-Authorization: Digest ");
732 add_to_string(&header, response);
733 add_crlf_to_string(&header);
735 mem_free(response);
738 } else {
739 if (user[0]) {
740 unsigned char *proxy_data;
742 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
743 if (proxy_data) {
744 unsigned char *proxy_64 = base64_encode(proxy_data);
746 if (proxy_64) {
747 add_to_string(&header, "Proxy-Authorization: Basic ");
748 add_to_string(&header, proxy_64);
749 add_crlf_to_string(&header);
750 mem_free(proxy_64);
752 mem_free(proxy_data);
758 /* CONNECT: User-Agent does not reveal anything about the
759 * resource we're fetching, and it may help the proxy return
760 * better error messages. */
761 optstr = get_opt_str("protocol.http.user_agent", NULL);
762 if (*optstr && strcmp(optstr, " ")) {
763 unsigned char *ustr, ts[64] = "";
764 /* TODO: Somehow get the terminal in which the
765 * document will actually be displayed. */
766 struct terminal *term = get_default_terminal();
768 add_to_string(&header, "User-Agent: ");
770 if (term) {
771 unsigned int tslen = 0;
773 ulongcat(ts, &tslen, term->width, 3, 0);
774 ts[tslen++] = 'x';
775 ulongcat(ts, &tslen, term->height, 3, 0);
777 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
778 ts);
780 if (ustr) {
781 add_to_string(&header, ustr);
782 mem_free(ustr);
785 add_crlf_to_string(&header);
788 /* CONNECT: Referer probably is a secret page in the HTTPS
789 * server, so don't reveal it to the proxy. */
790 if (!use_connect) {
791 switch (get_opt_int("protocol.http.referer.policy", NULL)) {
792 case REFERER_NONE:
793 /* oh well */
794 break;
796 case REFERER_FAKE:
797 optstr = get_opt_str("protocol.http.referer.fake", NULL);
798 if (!optstr[0]) break;
799 add_to_string(&header, "Referer: ");
800 add_to_string(&header, optstr);
801 add_crlf_to_string(&header);
802 break;
804 case REFERER_TRUE:
805 if (!conn->referrer) break;
806 add_to_string(&header, "Referer: ");
807 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
808 add_crlf_to_string(&header);
809 break;
811 case REFERER_SAME_URL:
812 add_to_string(&header, "Referer: ");
813 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
814 add_crlf_to_string(&header);
815 break;
819 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
820 * because they do not reveal anything about the resource
821 * we're going to request via TLS, and they may affect the
822 * error message if the CONNECT request fails.
824 * If ELinks is ever changed to vary its Accept headers based
825 * on what it intends to do with the returned resource, e.g.
826 * sending "Accept: text/css" when it wants an external
827 * stylesheet, then it should do that only in the inner GET
828 * and not in the outer CONNECT. */
829 add_to_string(&header, "Accept: */*");
830 add_crlf_to_string(&header);
832 if (get_opt_bool("protocol.http.compression", NULL))
833 accept_encoding_header(&header);
835 if (!accept_charset) {
836 init_accept_charset();
839 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
840 && !get_opt_bool("protocol.http.bugs.accept_charset", NULL)
841 && accept_charset) {
842 add_to_string(&header, accept_charset);
845 optstr = get_opt_str("protocol.http.accept_language", NULL);
846 if (optstr[0]) {
847 add_to_string(&header, "Accept-Language: ");
848 add_to_string(&header, optstr);
849 add_crlf_to_string(&header);
851 #ifdef CONFIG_NLS
852 else if (get_opt_bool("protocol.http.accept_ui_language", NULL)) {
853 unsigned char *code = language_to_iso639(current_language);
855 if (code) {
856 add_to_string(&header, "Accept-Language: ");
857 add_to_string(&header, code);
858 add_crlf_to_string(&header);
861 #endif
863 /* CONNECT: Proxy-Connection is intended to be seen by the
864 * proxy. If the CONNECT request succeeds, then the proxy
865 * will forward the remainder of the TCP connection to the
866 * origin server, and Proxy-Connection does not matter; but
867 * if the request fails, then Proxy-Connection may matter. */
868 /* FIXME: What about post-HTTP/1.1?? --Zas */
869 if (HTTP_1_1(http->sent_version)) {
870 if (!IS_PROXY_URI(conn->uri)) {
871 add_to_string(&header, "Connection: ");
872 } else {
873 add_to_string(&header, "Proxy-Connection: ");
876 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL)) {
877 add_to_string(&header, "Keep-Alive");
878 } else {
879 add_to_string(&header, "close");
881 add_crlf_to_string(&header);
884 /* CONNECT: Do not tell the proxy anything we have cached
885 * about the resource. */
886 if (!use_connect && conn->cached) {
887 if (!conn->cached->incomplete && conn->cached->head
888 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
889 if (conn->cached->last_modified) {
890 add_to_string(&header, "If-Modified-Since: ");
891 add_to_string(&header, conn->cached->last_modified);
892 add_crlf_to_string(&header);
894 if (conn->cached->etag) {
895 add_to_string(&header, "If-None-Match: ");
896 add_to_string(&header, conn->cached->etag);
897 add_crlf_to_string(&header);
902 /* CONNECT: Let's send cache control headers to the proxy too;
903 * they may affect DNS caching. */
904 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
905 add_to_string(&header, "Pragma: no-cache");
906 add_crlf_to_string(&header);
907 add_to_string(&header, "Cache-Control: no-cache");
908 add_crlf_to_string(&header);
911 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
912 * do anything good with that information anyway. */
913 if (!use_connect && (conn->from || conn->progress->start > 0)) {
914 /* conn->from takes precedence. conn->progress.start is set only the first
915 * time, then conn->from gets updated and in case of any retries
916 * etc we have everything interesting in conn->from already. */
917 add_to_string(&header, "Range: bytes=");
918 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
919 add_char_to_string(&header, '-');
920 add_crlf_to_string(&header);
923 /* CONNECT: The Authorization header is for the origin server only. */
924 if (!use_connect) {
925 #ifdef CONFIG_GSSAPI
926 if (http_negotiate_output(uri, &header) != 0)
927 #endif
928 entry = find_auth(uri);
931 if (entry) {
932 if (entry->digest) {
933 unsigned char *response;
935 response = get_http_auth_digest_response(entry, uri);
936 if (response) {
937 add_to_string(&header, "Authorization: Digest ");
938 add_to_string(&header, response);
939 add_crlf_to_string(&header);
941 mem_free(response);
944 } else {
945 /* RFC2617 section 2 [Basic Authentication Scheme]
947 * To receive authorization, the client sends the userid
948 * and password, separated by a single colon (":")
949 * character, within a base64 [7] encoded string in the
950 * credentials. */
951 unsigned char *id;
953 /* Create base64 encoded string. */
954 id = straconcat(entry->user, ":", entry->password,
955 (unsigned char *) NULL);
956 if (id) {
957 unsigned char *base64 = base64_encode(id);
959 mem_free_set(&id, base64);
962 if (id) {
963 add_to_string(&header, "Authorization: Basic ");
964 add_to_string(&header, id);
965 add_crlf_to_string(&header);
966 mem_free(id);
971 /* CONNECT: Any POST data is for the origin server only. */
972 if (!use_connect && uri->post) {
973 /* We search for first '\n' in uri->post to get content type
974 * as set by get_form_uri(). This '\n' is dropped if any
975 * and replaced by correct '\r\n' termination here. */
976 unsigned char *postend = strchr(uri->post, '\n');
977 struct connection_state error;
979 if (postend) {
980 add_to_string(&header, "Content-Type: ");
981 add_bytes_to_string(&header, uri->post, postend - uri->post);
982 add_crlf_to_string(&header);
985 post_data = postend ? postend + 1 : uri->post;
986 if (!open_http_post(&http->post, post_data, &error)) {
987 http_end_request(conn, error, 0);
988 done_string(&header);
989 return;
991 add_format_to_string(&header, "Content-Length: "
992 "%" OFF_PRINT_FORMAT "\x0D\x0A",
993 (off_print_T)
994 http->post.total_upload_length);
997 #ifdef CONFIG_COOKIES
998 /* CONNECT: Cookies are for the origin server only. */
999 if (!use_connect) {
1000 struct string *cookies = send_cookies(uri);
1002 if (cookies) {
1003 add_to_string(&header, "Cookie: ");
1004 add_string_to_string(&header, cookies);
1005 add_crlf_to_string(&header);
1006 done_string(cookies);
1009 #endif
1011 add_crlf_to_string(&header);
1013 /* CONNECT: Any POST data is for the origin server only.
1014 * This was already checked above and post_data is NULL
1015 * in that case. Verified with an assertion below. */
1016 if (post_data) {
1017 assert(!use_connect); /* see comment above */
1019 socket->state = SOCKET_END_ONCLOSE;
1020 if (!conn->http_upload_progress && http->post.file_count)
1021 conn->http_upload_progress = init_progress(0);
1022 write_to_socket(socket, header.source, header.length,
1023 connection_state(S_TRANS),
1024 send_more_post_data);
1025 } else
1026 request_from_socket(socket, header.source, header.length,
1027 connection_state(S_SENT),
1028 SOCKET_END_ONCLOSE, http_got_header);
1029 done_string(&header);
1032 #undef POST_BUFFER_SIZE
1035 /* This function decompresses the data block given in @data (if it was
1036 * compressed), which is long @len bytes. The decompressed data block is given
1037 * back to the world as the return value and its length is stored into
1038 * @new_len. After this function returns, the caller will discard all the @len
1039 * input bytes, so this function must use all of them unless an error occurs.
1041 * In this function, value of either http->chunk_remaining or http->length is
1042 * being changed (it depends on if chunked mode is used or not).
1044 * Note that the function is still a little esotheric for me. Don't take it
1045 * lightly and don't mess with it without grave reason! If you dare to touch
1046 * this without testing the changes on slashdot, freshmeat and cvsweb
1047 * (including revision history), don't dare to send me any patches! ;) --pasky
1049 * This function gotta die. */
1050 static unsigned char *
1051 decompress_data(struct connection *conn, unsigned char *data, int len,
1052 int *new_len)
1054 struct http_connection_info *http = conn->info;
1055 enum { NORMAL, FINISHING } state = NORMAL;
1056 int did_read = 0;
1057 int *length_of_block;
1058 unsigned char *output = NULL;
1061 if (http->length == LEN_CHUNKED) {
1062 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
1063 state = FINISHING;
1064 length_of_block = &http->chunk_remaining;
1065 } else {
1066 length_of_block = &http->length;
1067 if (!*length_of_block) {
1068 /* Going to finish this decoding bussiness. */
1069 state = FINISHING;
1073 if (conn->content_encoding == ENCODING_NONE) {
1074 *new_len = len;
1075 if (*length_of_block > 0) *length_of_block -= len;
1076 return data;
1079 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1081 if (conn->stream_pipes[0] == -1
1082 && (c_pipe(conn->stream_pipes) < 0
1083 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1084 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1085 return NULL;
1088 do {
1089 unsigned char *tmp;
1091 if (state == NORMAL) {
1092 /* ... we aren't finishing yet. */
1093 int written = safe_write(conn->stream_pipes[1], data, len);
1095 if (written >= 0) {
1096 data += written;
1097 len -= written;
1099 /* In non-keep-alive connections http->length == -1, so the test below */
1100 if (*length_of_block > 0)
1101 *length_of_block -= written;
1102 /* http->length is 0 at the end of block for all modes: keep-alive,
1103 * non-keep-alive and chunked */
1104 if (!http->length) {
1105 /* That's all, folks - let's finish this. */
1106 state = FINISHING;
1107 } else if (!len) {
1108 /* We've done for this round (but not done
1109 * completely). Thus we will get out with
1110 * what we have and leave what we wrote to
1111 * the next round - we have to do that since
1112 * we MUST NOT ever empty the pipe completely
1113 * - this would cause a disaster for
1114 * read_encoded(), which would simply not
1115 * work right then. */
1116 return output;
1121 if (!conn->stream) {
1122 conn->stream = open_encoded(conn->stream_pipes[0],
1123 conn->content_encoding);
1124 if (!conn->stream) return NULL;
1127 tmp = mem_realloc(output, *new_len + BIG_READ);
1128 if (!tmp) break;
1129 output = tmp;
1131 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1133 /* Do not break from the loop if did_read == 0. It
1134 * means no decoded data is available yet, but some may
1135 * become available later. This happens especially with
1136 * the bzip2 decoder, which needs an entire compressed
1137 * block as input before it generates any output. */
1138 if (did_read < 0) {
1139 state = FINISHING;
1140 break;
1142 *new_len += did_read;
1143 } while (len || (did_read == BIG_READ));
1145 if (state == FINISHING) shutdown_connection_stream(conn);
1146 return output;
1148 #undef BIG_READ
1150 static int
1151 is_line_in_buffer(struct read_buffer *rb)
1153 int l;
1155 for (l = 0; l < rb->length; l++) {
1156 unsigned char a0 = rb->data[l];
1158 if (a0 == ASCII_LF)
1159 return l + 1;
1160 if (a0 == ASCII_CR) {
1161 if (rb->data[l + 1] == ASCII_LF
1162 && l < rb->length - 1)
1163 return l + 2;
1164 if (l == rb->length - 1)
1165 return 0;
1167 if (a0 < ' ')
1168 return -1;
1170 return 0;
1173 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1175 static void
1176 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1177 int already_got_anything)
1179 struct connection_state state = already_got_anything
1180 ? connection_state(S_TRANS) : conn->state;
1182 read_from_socket(conn->socket, rb, state, read_http_data);
1185 static void
1186 read_http_data_done(struct connection *conn)
1188 struct http_connection_info *http = conn->info;
1190 /* There's no content but an error so just print
1191 * that instead of nothing. */
1192 if (!conn->from) {
1193 if (http->code >= 400) {
1194 http_error_document(conn, http->code);
1196 } else {
1197 /* This is not an error, thus fine. No need generate any
1198 * document, as this may be empty and it's not a problem.
1199 * In case of 3xx, we're probably just getting kicked to
1200 * another page anyway. And in case of 2xx, the document
1201 * may indeed be empty and thus the user should see it so. */
1205 http_end_request(conn, connection_state(S_OK), 0);
1208 /* Returns:
1209 * -1 on error
1210 * 0 if more to read
1211 * 1 if done
1213 static int
1214 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1216 struct http_connection_info *http = conn->info;
1217 int total_data_len = 0;
1219 while (1) {
1220 /* Chunked. Good luck! */
1221 /* See RFC2616, section 3.6.1. Basically, it looks like:
1222 * 1234 ; a = b ; c = d\r\n
1223 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1224 * 0\r\n
1225 * \r\n */
1226 if (http->chunk_remaining == CHUNK_DATA_END) {
1227 int l = is_line_in_buffer(rb);
1229 if (l) {
1230 if (l == -1) {
1231 /* Invalid character in buffer. */
1232 return -1;
1235 /* Remove everything to the EOLN. */
1236 kill_buffer_data(rb, l);
1237 if (l <= 2) {
1238 /* Empty line. */
1239 return 2;
1241 continue;
1244 } else if (http->chunk_remaining == CHUNK_SIZE) {
1245 int l = is_line_in_buffer(rb);
1247 if (l) {
1248 unsigned char *de;
1249 int n = 0;
1251 if (l != -1) {
1252 errno = 0;
1253 n = strtol(rb->data, (char **) &de, 16);
1254 if (errno || !*de) {
1255 return -1;
1259 if (l == -1 || de == rb->data) {
1260 return -1;
1263 /* Remove everything to the EOLN. */
1264 kill_buffer_data(rb, l);
1265 http->chunk_remaining = n;
1266 if (!http->chunk_remaining)
1267 http->chunk_remaining = CHUNK_ZERO_SIZE;
1268 continue;
1271 } else {
1272 unsigned char *data;
1273 int data_len;
1274 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1275 int len = zero ? 0 : http->chunk_remaining;
1277 /* Maybe everything necessary didn't come yet.. */
1278 int_upper_bound(&len, rb->length);
1279 conn->received += len;
1281 data = decompress_data(conn, rb->data, len, &data_len);
1283 if (add_fragment(conn->cached, conn->from,
1284 data, data_len) == 1)
1285 conn->tries = 0;
1287 if (data && data != rb->data) mem_free(data);
1289 conn->from += data_len;
1290 total_data_len += data_len;
1292 kill_buffer_data(rb, len);
1294 if (zero) {
1295 /* Last chunk has zero length, so this is last
1296 * chunk, we finished decompression just now
1297 * and now we can happily finish reading this
1298 * stuff. */
1299 http->chunk_remaining = CHUNK_DATA_END;
1300 continue;
1303 if (!http->chunk_remaining && rb->length > 0) {
1304 /* Eat newline succeeding each chunk. */
1305 if (rb->data[0] == ASCII_LF) {
1306 kill_buffer_data(rb, 1);
1307 } else {
1308 if (rb->data[0] != ASCII_CR
1309 || (rb->length >= 2
1310 && rb->data[1] != ASCII_LF)) {
1311 return -1;
1313 if (rb->length < 2) break;
1314 kill_buffer_data(rb, 2);
1316 http->chunk_remaining = CHUNK_SIZE;
1317 continue;
1320 break;
1323 /* More to read. */
1324 return !!total_data_len;
1327 /* Returns 0 if more data, 1 if done. */
1328 static int
1329 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1331 struct http_connection_info *http = conn->info;
1332 unsigned char *data;
1333 int data_len;
1334 int len = rb->length;
1336 if (http->length >= 0 && http->length < len) {
1337 /* We won't read more than we have to go. */
1338 len = http->length;
1341 conn->received += len;
1343 data = decompress_data(conn, rb->data, len, &data_len);
1345 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1346 conn->tries = 0;
1348 if (data && data != rb->data) mem_free(data);
1350 conn->from += data_len;
1352 kill_buffer_data(rb, len);
1354 if (!http->length && (conn->socket->state == SOCKET_RETRY_ONCLOSE
1355 || conn->socket->state == SOCKET_CLOSED)) {
1356 return 2;
1359 return !!data_len;
1362 static void
1363 read_http_data(struct socket *socket, struct read_buffer *rb)
1365 struct connection *conn = socket->conn;
1366 struct http_connection_info *http = conn->info;
1367 int ret;
1369 if (socket->state == SOCKET_CLOSED) {
1370 if (conn->content_encoding) {
1371 /* Flush decompression first. */
1372 http->length = 0;
1373 } else {
1374 read_http_data_done(conn);
1375 return;
1379 if (http->length != LEN_CHUNKED) {
1380 ret = read_normal_http_data(conn, rb);
1382 } else {
1383 ret = read_chunked_http_data(conn, rb);
1386 switch (ret) {
1387 case 0:
1388 read_more_http_data(conn, rb, 0);
1389 break;
1390 case 1:
1391 read_more_http_data(conn, rb, 1);
1392 break;
1393 case 2:
1394 read_http_data_done(conn);
1395 break;
1396 default:
1397 assertm(ret == -1, "Unexpected return value: %d", ret);
1398 abort_connection(conn, connection_state(S_HTTP_ERROR));
1402 /* Returns offset of the header end, zero if more data is needed, -1 when
1403 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1404 * come. */
1405 static int
1406 get_header(struct read_buffer *rb)
1408 int i;
1410 /* XXX: We will have to do some guess about whether an HTTP header is
1411 * coming or not, in order to support HTTP/0.9 reply correctly. This
1412 * means a little code duplication with get_http_code(). --pasky */
1413 if (rb->length > 4 && c_strncasecmp(rb->data, "HTTP/", 5))
1414 return -2;
1416 for (i = 0; i < rb->length; i++) {
1417 unsigned char a0 = rb->data[i];
1418 unsigned char a1 = rb->data[i + 1];
1420 if (a0 == 0) {
1421 rb->data[i] = ' ';
1422 continue;
1424 if (a0 == ASCII_LF && a1 == ASCII_LF
1425 && i < rb->length - 1)
1426 return i + 2;
1427 if (a0 == ASCII_CR && i < rb->length - 3) {
1428 if (a1 == ASCII_CR) continue;
1429 if (a1 != ASCII_LF) return -1;
1430 if (rb->data[i + 2] == ASCII_CR) {
1431 if (rb->data[i + 3] != ASCII_LF) return -1;
1432 return i + 4;
1437 return 0;
1440 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1441 static int
1442 check_http_authentication(struct connection *conn, struct uri *uri,
1443 unsigned char *header, unsigned char *header_field)
1445 unsigned char *str, *d;
1446 int ret = 0;
1448 d = parse_header(header, header_field, &str);
1449 while (d) {
1450 if (!c_strncasecmp(d, "Basic", 5)) {
1451 unsigned char *realm = get_header_param(d, "realm");
1453 if (realm) {
1454 add_auth_entry(uri, realm, NULL, NULL, 0);
1455 mem_free(realm);
1456 mem_free(d);
1457 break;
1459 } else if (!c_strncasecmp(d, "Digest", 6)) {
1460 unsigned char *realm = get_header_param(d, "realm");
1461 unsigned char *nonce = get_header_param(d, "nonce");
1462 unsigned char *opaque = get_header_param(d, "opaque");
1464 add_auth_entry(uri, realm, nonce, opaque, 1);
1466 mem_free_if(realm);
1467 mem_free_if(nonce);
1468 mem_free_if(opaque);
1469 mem_free(d);
1470 break;
1472 #ifdef CONFIG_GSSAPI
1473 else if (!c_strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1474 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1475 ret = 1;
1476 mem_free(d);
1477 break;
1479 else if (!c_strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1480 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1481 ret = 1;
1482 mem_free(d);
1483 break;
1485 #endif
1486 mem_free(d);
1487 d = parse_header(str, header_field, &str);
1489 return ret;
1493 void
1494 http_got_header(struct socket *socket, struct read_buffer *rb)
1496 struct connection *conn = socket->conn;
1497 struct http_connection_info *http = conn->info;
1498 unsigned char *head;
1499 #ifdef CONFIG_COOKIES
1500 unsigned char *cookie, *ch;
1501 #endif
1502 unsigned char *d;
1503 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1504 struct http_version version = { 0, 9 };
1505 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1506 ? connection_state(S_GETH)
1507 : connection_state(S_PROC));
1508 int a, h = 200;
1509 int cf;
1511 if (socket->state == SOCKET_CLOSED) {
1512 if (!conn->tries && uri->host) {
1513 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1514 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1515 } else {
1516 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1517 conn->tries = -1;
1520 retry_connection(conn, connection_state(S_CANT_READ));
1521 return;
1523 socket->state = SOCKET_RETRY_ONCLOSE;
1525 again:
1526 a = get_header(rb);
1527 if (a == -1) {
1528 abort_connection(conn, connection_state(S_HTTP_ERROR));
1529 return;
1531 if (!a) {
1532 read_from_socket(conn->socket, rb, state, http_got_header);
1533 return;
1535 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1536 * the get_http_code call; @h and @version have already been
1537 * initialized with the right values. */
1538 if (a == -2) a = 0;
1539 if ((a && get_http_code(rb, &h, &version))
1540 || h == 101) {
1541 abort_connection(conn, connection_state(S_HTTP_ERROR));
1542 return;
1545 /* When no header, HTTP/0.9 document. That's always text/html,
1546 * according to
1547 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1548 /* FIXME: This usage of fake protocol headers for setting up the
1549 * content type has been obsoleted by the @content_type member of
1550 * {struct cache_entry}. */
1551 head = (a ? memacpy(rb->data, a)
1552 : stracpy("\r\nContent-Type: text/html\r\n"));
1553 if (!head) {
1554 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1555 return;
1558 if (check_http_server_bugs(uri, http, head)) {
1559 mem_free(head);
1560 retry_connection(conn, connection_state(S_RESTART));
1561 return;
1564 #ifdef CONFIG_CGI
1565 if (uri->protocol == PROTOCOL_FILE) {
1566 /* ``Status'' is not a standard HTTP header field although some
1567 * HTTP servers like www.php.net uses it for some reason. It should
1568 * only be used for CGI scripts so that it does not interfere
1569 * with status code depended handling for ``normal'' HTTP like
1570 * redirects. */
1571 d = parse_header(head, "Status", NULL);
1572 if (d) {
1573 int h2 = atoi(d);
1575 mem_free(d);
1576 if (h2 >= 100 && h2 < 600) h = h2;
1577 if (h == 101) {
1578 mem_free(head);
1579 abort_connection(conn, connection_state(S_HTTP_ERROR));
1580 return;
1584 #endif
1586 #ifdef CONFIG_COOKIES
1587 ch = head;
1588 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1589 set_cookie(uri, cookie);
1590 mem_free(cookie);
1592 #endif
1593 http->code = h;
1595 if (h == 100) {
1596 mem_free(head);
1597 state = connection_state(S_PROC);
1598 kill_buffer_data(rb, a);
1599 goto again;
1601 if (h < 200) {
1602 mem_free(head);
1603 abort_connection(conn, connection_state(S_HTTP_ERROR));
1604 return;
1606 if (h == 304) {
1607 mem_free(head);
1608 http_end_request(conn, connection_state(S_OK), 1);
1609 return;
1611 if (h == 204) {
1612 mem_free(head);
1613 http_end_request(conn, connection_state(S_HTTP_204), 0);
1614 return;
1616 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1617 #ifdef CONFIG_SSL
1618 mem_free(head);
1619 socket->need_ssl = 1;
1620 complete_connect_socket(socket, uri, http_send_header);
1621 #else
1622 abort_connection(conn, connection_state(S_SSL_ERROR));
1623 #endif
1624 return;
1627 conn->cached = get_cache_entry(conn->uri);
1628 if (!conn->cached) {
1629 mem_free(head);
1630 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1631 return;
1633 conn->cached->cgi = conn->cgi;
1634 mem_free_set(&conn->cached->head, head);
1636 if (!get_opt_bool("document.cache.ignore_cache_control", NULL)) {
1637 struct cache_entry *cached = conn->cached;
1639 /* I am not entirely sure in what order we should process these
1640 * headers and if we should still process Cache-Control max-age
1641 * if we already set max age to date mentioned in Expires.
1642 * --jonas */
1643 /* Ensure that when ever cached->max_age is set, cached->expired
1644 * is also set, so the cache management knows max_age contains a
1645 * valid time. If on the other hand no caching is requested
1646 * cached->expire should be set to zero. */
1647 if ((d = parse_header(cached->head, "Expires", NULL))) {
1648 /* Convert date to seconds. */
1649 time_t expires = parse_date(&d, NULL, 0, 1);
1651 mem_free(d);
1653 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1654 timeval_from_seconds(&cached->max_age, expires);
1655 cached->expire = 1;
1659 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1660 if (strstr(d, "no-cache")) {
1661 cached->cache_mode = CACHE_MODE_NEVER;
1662 cached->expire = 0;
1664 mem_free(d);
1667 if (cached->cache_mode != CACHE_MODE_NEVER
1668 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1669 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1670 cached->cache_mode = CACHE_MODE_NEVER;
1671 cached->expire = 0;
1673 } else {
1674 unsigned char *pos = strstr(d, "max-age=");
1676 assert(cached->cache_mode != CACHE_MODE_NEVER);
1678 if (pos) {
1679 /* Grab the number of seconds. */
1680 timeval_T max_age;
1682 timeval_from_seconds(&max_age, atol(pos + 8));
1683 timeval_now(&cached->max_age);
1684 timeval_add_interval(&cached->max_age, &max_age);
1686 cached->expire = 1;
1690 mem_free(d);
1694 /* XXX: Is there some reason why NOT to follow the Location header
1695 * for any status? If the server didn't mean it, it wouldn't send
1696 * it, after all...? --pasky */
1697 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1698 d = parse_header(conn->cached->head, "Location", NULL);
1699 if (d) {
1700 int use_get_method = (h == 303);
1702 /* A note from RFC 2616 section 10.3.3:
1703 * RFC 1945 and RFC 2068 specify that the client is not
1704 * allowed to change the method on the redirected
1705 * request. However, most existing user agent
1706 * implementations treat 302 as if it were a 303
1707 * response, performing a GET on the Location
1708 * field-value regardless of the original request
1709 * method. */
1710 /* So POST must not be redirected to GET, but some
1711 * BUGGY message boards rely on it :-( */
1712 if (h == 302
1713 && get_opt_bool("protocol.http.bugs.broken_302_redirect", NULL))
1714 use_get_method = 1;
1716 redirect_cache(conn->cached, d, use_get_method, -1);
1717 mem_free(d);
1721 if (h == 401) {
1722 if (check_http_authentication(conn, uri,
1723 conn->cached->head, "WWW-Authenticate")) {
1724 retry_connection(conn, connection_state(S_RESTART));
1725 return;
1729 if (h == 407) {
1730 unsigned char *str;
1732 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1733 while (d) {
1734 if (!c_strncasecmp(d, "Basic", 5)) {
1735 unsigned char *realm = get_header_param(d, "realm");
1737 if (realm) {
1738 mem_free_set(&proxy_auth.realm, realm);
1739 proxy_auth.digest = 0;
1740 mem_free(d);
1741 break;
1744 } else if (!c_strncasecmp(d, "Digest", 6)) {
1745 unsigned char *realm = get_header_param(d, "realm");
1746 unsigned char *nonce = get_header_param(d, "nonce");
1747 unsigned char *opaque = get_header_param(d, "opaque");
1749 mem_free_set(&proxy_auth.realm, realm);
1750 mem_free_set(&proxy_auth.nonce, nonce);
1751 mem_free_set(&proxy_auth.opaque, opaque);
1752 proxy_auth.digest = 1;
1754 mem_free(d);
1755 break;
1758 mem_free(d);
1759 d = parse_header(str, "Proxy-Authenticate", &str);
1763 kill_buffer_data(rb, a);
1764 http->close = 0;
1765 http->length = -1;
1766 http->recv_version = version;
1768 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1769 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1770 if (!c_strcasecmp(d, "close")) http->close = 1;
1771 mem_free(d);
1772 } else if (PRE_HTTP_1_1(version)) {
1773 http->close = 1;
1776 cf = conn->from;
1777 conn->from = 0;
1778 d = parse_header(conn->cached->head, "Content-Range", NULL);
1779 if (d) {
1780 if (strlen(d) > 6) {
1781 d[5] = 0;
1782 if (isdigit(d[6]) && !c_strcasecmp(d, "bytes")) {
1783 int f;
1785 errno = 0;
1786 f = strtol(d + 6, NULL, 10);
1788 if (!errno && f >= 0) conn->from = f;
1791 mem_free(d);
1793 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1794 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1795 /* We don't want this if conn->progress.start because then conn->from will
1796 * be probably value of conn->progress.start, while cf is 0. */
1797 abort_connection(conn, connection_state(S_HTTP_ERROR));
1798 return;
1801 #if 0
1803 struct status *s;
1804 foreach (s, conn->downloads) {
1805 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1806 conn, s, s->pri, s->state, s->prev_error,
1807 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1810 #endif
1812 if (conn->progress->start >= 0) {
1813 /* Update to the real value which we've got from Content-Range. */
1814 conn->progress->seek = conn->from;
1816 conn->progress->start = conn->from;
1818 d = parse_header(conn->cached->head, "Content-Length", NULL);
1819 if (d) {
1820 unsigned char *ep;
1821 int l;
1823 errno = 0;
1824 l = strtol(d, (char **) &ep, 10);
1826 if (!errno && !*ep && l >= 0) {
1827 if (!http->close || POST_HTTP_1_0(version))
1828 http->length = l;
1829 conn->est_length = conn->from + l;
1831 mem_free(d);
1834 if (!conn->unrestartable) {
1835 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1837 if (d) {
1838 if (!c_strcasecmp(d, "none"))
1839 conn->unrestartable = 1;
1840 mem_free(d);
1841 } else {
1842 if (!conn->from)
1843 conn->unrestartable = 1;
1847 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1848 if (d) {
1849 if (!c_strcasecmp(d, "chunked")) {
1850 http->length = LEN_CHUNKED;
1851 http->chunk_remaining = CHUNK_SIZE;
1853 mem_free(d);
1855 if (!http->close && http->length == -1) http->close = 1;
1857 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1858 if (d) {
1859 if (conn->cached->last_modified && c_strcasecmp(conn->cached->last_modified, d)) {
1860 delete_entry_content(conn->cached);
1861 if (conn->from) {
1862 conn->from = 0;
1863 mem_free(d);
1864 retry_connection(conn, connection_state(S_MODIFIED));
1865 return;
1868 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1869 else mem_free(d);
1871 if (!conn->cached->last_modified) {
1872 d = parse_header(conn->cached->head, "Date", NULL);
1873 if (d) conn->cached->last_modified = d;
1876 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1877 d = parse_header(conn->cached->head, "ETag", NULL);
1878 if (d) {
1879 if (conn->cached->etag) {
1880 unsigned char *old_tag = conn->cached->etag;
1881 unsigned char *new_tag = d;
1883 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1885 if (new_tag[0] == 'W' && new_tag[1] == '/')
1886 new_tag += 2;
1888 if (old_tag[0] == 'W' && old_tag[1] == '/')
1889 old_tag += 2;
1891 if (strcmp(new_tag, old_tag)) {
1892 delete_entry_content(conn->cached);
1893 if (conn->from) {
1894 conn->from = 0;
1895 mem_free(d);
1896 retry_connection(conn, connection_state(S_MODIFIED));
1897 return;
1902 if (!conn->cached->etag)
1903 conn->cached->etag = d;
1904 else
1905 mem_free(d);
1908 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1909 if (d) {
1910 unsigned char *extension = get_extension_from_uri(uri);
1911 enum stream_encoding file_encoding;
1913 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1914 mem_free_if(extension);
1916 /* If the content is encoded, we want to preserve the encoding
1917 * if it is implied by the extension, so that saving the URI
1918 * will leave the saved file with the correct encoding. */
1919 #ifdef CONFIG_GZIP
1920 if (file_encoding != ENCODING_GZIP
1921 && (!c_strcasecmp(d, "gzip") || !c_strcasecmp(d, "x-gzip")))
1922 conn->content_encoding = ENCODING_GZIP;
1923 if (!c_strcasecmp(d, "deflate") || !c_strcasecmp(d, "x-deflate"))
1924 conn->content_encoding = ENCODING_DEFLATE;
1925 #endif
1927 #ifdef CONFIG_BZIP2
1928 if (file_encoding != ENCODING_BZIP2
1929 && (!c_strcasecmp(d, "bzip2") || !c_strcasecmp(d, "x-bzip2")))
1930 conn->content_encoding = ENCODING_BZIP2;
1931 #endif
1933 #ifdef CONFIG_LZMA
1934 if (file_encoding != ENCODING_LZMA
1935 && (!c_strcasecmp(d, "lzma") || !c_strcasecmp(d, "x-lzma")))
1936 conn->content_encoding = ENCODING_LZMA;
1937 #endif
1938 mem_free(d);
1941 if (conn->content_encoding != ENCODING_NONE) {
1942 mem_free_if(conn->cached->encoding_info);
1943 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1946 if (http->length == -1 || http->close)
1947 socket->state = SOCKET_END_ONCLOSE;
1949 read_http_data(socket, rb);