Experimental brotli encoding support.
[elinks.git] / src / protocol / http / http.c
blob69c5347b9eb65f06b2088dad603e2480dd573019
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_LIMITS_H
12 #include <limits.h>
13 #endif
15 #include "elinks.h"
17 #include "cache/cache.h"
18 #include "config/options.h"
19 #include "cookies/cookies.h"
20 #include "intl/charsets.h"
21 #include "intl/gettext/libintl.h"
22 #include "main/module.h"
23 #include "network/connection.h"
24 #include "network/progress.h"
25 #include "network/socket.h"
26 #include "osdep/ascii.h"
27 #include "osdep/osdep.h"
28 #include "osdep/sysname.h"
29 #include "protocol/auth/auth.h"
30 #include "protocol/auth/digest.h"
31 #include "protocol/date.h"
32 #include "protocol/header.h"
33 #include "protocol/http/blacklist.h"
34 #include "protocol/http/codes.h"
35 #include "protocol/http/http.h"
36 #include "protocol/uri.h"
37 #include "session/session.h"
38 #include "terminal/terminal.h"
39 #include "util/base64.h"
40 #include "util/conv.h"
41 #include "util/memory.h"
42 #include "util/string.h"
44 #ifdef CONFIG_GSSAPI
45 #include "http_negotiate.h"
46 #endif
48 /* These macros concern the struct http_version defined in the http.h */
49 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
50 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
51 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
52 #define PRE_HTTP_1_0(x) ((x).major < 1)
53 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
54 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
55 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
58 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
59 #define LEN_FINISHED 0
61 /* Either bytes coming in this chunk yet or "parser state". */
62 #define CHUNK_DATA_END -3
63 #define CHUNK_ZERO_SIZE -2
64 #define CHUNK_SIZE -1
66 static struct auth_entry proxy_auth;
68 static unsigned char *accept_charset = NULL;
71 static union option_info http_options[] = {
72 INIT_OPT_TREE("protocol", N_("HTTP"),
73 "http", 0,
74 N_("HTTP-specific options.")),
77 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
78 "bugs", 0,
79 N_("Server-side HTTP bugs workarounds.")),
81 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
82 "accept_charset", 0, 1,
83 N_("The Accept-Charset header is quite long and sending it "
84 "can trigger bugs in some rarely found servers.")),
86 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
87 "allow_blacklist", 0, 1,
88 N_("Allow blacklisting of buggy servers.")),
90 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
91 "broken_302_redirect", 0, 1,
92 N_("Broken 302 redirect (violates RFC but compatible with "
93 "Netscape). This is a problem for a lot of web discussion "
94 "boards and the like. If they will do strange things to you, "
95 "try to play with this.")),
97 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
98 "post_no_keepalive", 0, 0,
99 N_("Disable keepalive connection after POST request.")),
101 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
102 "http10", 0, 0,
103 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
105 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
106 "proxy", 0,
107 N_("HTTP proxy configuration.")),
109 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
110 "host", 0, "",
111 N_("Host and port-number (host:port) of the HTTP proxy, "
112 "or blank. If it's blank, HTTP_PROXY environment variable "
113 "is checked as well.")),
115 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
116 "user", 0, "",
117 N_("Proxy authentication username.")),
119 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
120 "passwd", 0, "",
121 N_("Proxy authentication password.")),
124 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
125 "referer", 0,
126 N_("HTTP referer sending options. HTTP referer is a special "
127 "header sent in the HTTP requests, which is supposed to "
128 "contain the previous page visited by the browser."
129 "This way, the server can know what link did you follow "
130 "when accessing that page. However, this behaviour can "
131 "unfortunately considerably affect privacy and can lead even "
132 "to a security problem on some badly designed web pages.")),
134 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
135 "policy", 0,
136 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
137 N_("Mode of sending HTTP referer:\n"
138 "0 is send no referer\n"
139 "1 is send current URL as referer\n"
140 "2 is send fixed fake referer\n"
141 "3 is send previous URL as referer (correct, but insecure)")),
143 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
144 "fake", 0, "",
145 N_("Fake referer to be sent when policy is 2.")),
148 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
149 "accept_language", 0, "",
150 N_("Send Accept-Language header.")),
152 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
153 "accept_ui_language", 0, 1,
154 N_("Request localised versions of documents from web-servers "
155 "(using the Accept-Language header) using the language "
156 "you have configured for ELinks' user-interface (this also "
157 "affects navigator.language ECMAScript value available to "
158 "scripts). Note that some see this as a potential security "
159 "risk because it tells web-masters and the FBI sniffers "
160 "about your language preference.")),
162 /* http://www.eweek.com/c/a/Desktops-and-Notebooks/Intel-Psion-End-Dispute-Concerning-Netbook-Trademark-288875/
163 * responds with "Transfer-Encoding: chunked" and
164 * "Content-Encoding: gzip" but does not compress the first chunk
165 * and the last chunk, causing ELinks to display garbage.
166 * (If User-Agent includes "Gecko" (case sensitive), then
167 * that server correctly compresses the whole stream.)
168 * ELinks should instead report the decompression error (bug 1017)
169 * or perhaps even blacklist the server for compression and retry.
170 * Until that has been implemented, disable compression by default. */
171 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
172 "compression", 0, 0,
173 N_("If enabled, the capability to receive compressed content "
174 "(gzip and/or bzip2) is announced to the server, which "
175 "usually sends the reply compressed, thus saving some "
176 "bandwidth at slight CPU expense.\n"
177 "\n"
178 "If ELinks displays a incomplete page or garbage, try "
179 "disabling this option. If that helps, there may be a bug in "
180 "the decompression part of ELinks. Please report such bugs.\n"
181 "\n"
182 "If ELinks has been compiled without compression support, "
183 "this option has no effect. To check the supported features, "
184 "see Help -> About.")),
186 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
187 "trace", 0, 0,
188 N_("If active, all HTTP requests are sent with TRACE as "
189 "their method rather than GET or POST. This is useful for "
190 "debugging of both ELinks and various server-side scripts "
191 "--- the server only returns the client's request back to "
192 "the client verbatim. Note that this type of request may "
193 "not be enabled on all servers.")),
195 /* OSNews.com is supposed to be relying on the textmode token, at least. */
196 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
197 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
198 N_("Change the User Agent ID. That means identification "
199 "string, which is sent to HTTP server when a document is "
200 "requested. The 'textmode' token in the first field is our "
201 "silent attempt to establish this as a standard for new "
202 "textmode user agents, so that the webmasters can have "
203 "just a single uniform test for these if they are e.g. "
204 "pushing some lite version to them automagically.\n"
205 "\n"
206 "Use \" \" if you don't want any User-Agent header to be sent "
207 "at all. URI rewriting rules may still include parameters "
208 "that reveal you are using ELinks.\n"
209 "\n"
210 "%v in the string means ELinks version,\n"
211 "%s in the string means system identification,\n"
212 "%t in the string means size of the terminal,\n"
213 "%b in the string means number of bars displayed by ELinks.")),
216 INIT_OPT_TREE("protocol", N_("HTTPS"),
217 "https", 0,
218 N_("HTTPS-specific options.")),
220 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
221 "proxy", 0,
222 N_("HTTPS proxy configuration.")),
224 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
225 "host", 0, "",
226 N_("Host and port-number (host:port) of the HTTPS CONNECT "
227 "proxy, or blank. If it's blank, HTTPS_PROXY environment "
228 "variable is checked as well.")),
229 NULL_OPTION_INFO,
232 static void done_http();
234 struct module http_protocol_module = struct_module(
235 /* name: */ N_("HTTP"),
236 /* options: */ http_options,
237 /* hooks: */ NULL,
238 /* submodules: */ NULL,
239 /* data: */ NULL,
240 /* init: */ NULL,
241 /* done: */ done_http
245 static void
246 done_http(void)
248 mem_free_if(proxy_auth.realm);
249 mem_free_if(proxy_auth.nonce);
250 mem_free_if(proxy_auth.opaque);
252 free_blacklist();
254 if (accept_charset)
255 mem_free(accept_charset);
258 static void
259 init_accept_charset(void)
261 struct string ac;
262 unsigned char *cs;
263 int i;
265 if (!init_string(&ac)) return;
267 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
268 if (ac.length) {
269 add_to_string(&ac, ", ");
270 } else {
271 add_to_string(&ac, "Accept-Charset: ");
273 add_to_string(&ac, cs);
276 if (ac.length) {
277 add_crlf_to_string(&ac);
280 accept_charset = squeezastring(&ac);
282 done_string(&ac);
286 unsigned char *
287 subst_user_agent(unsigned char *fmt, unsigned char *version,
288 unsigned char *sysname, unsigned char *termsize)
290 struct string agent;
292 if (!init_string(&agent)) return NULL;
294 while (*fmt) {
295 int p;
297 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
299 add_bytes_to_string(&agent, fmt, p);
300 fmt += p;
302 if (*fmt != '%') continue;
304 fmt++;
305 switch (*fmt) {
306 case 'b':
307 if (!list_empty(sessions)) {
308 unsigned char bs[4] = "";
309 int blen = 0;
310 struct session *ses = sessions.prev;
311 int bars = ses->status.show_status_bar
312 + ses->status.show_tabs_bar
313 + ses->status.show_title_bar;
315 ulongcat(bs, &blen, bars, 2, 0);
316 add_to_string(&agent, bs);
318 break;
319 case 'v':
320 add_to_string(&agent, version);
321 break;
322 case 's':
323 add_to_string(&agent, sysname);
324 break;
325 case 't':
326 if (termsize)
327 add_to_string(&agent, termsize);
328 break;
329 default:
330 add_bytes_to_string(&agent, fmt - 1, 2);
331 break;
333 if (*fmt) fmt++;
336 return agent.source;
339 static void
340 add_url_to_http_string(struct string *header, struct uri *uri, int components)
342 /* This block substitues spaces in URL by %20s. This is
343 * certainly not the right place where to do it, but now the
344 * behaviour is at least improved compared to what we had
345 * before. We should probably encode all URLs as early as
346 * possible, and possibly decode them back in protocol
347 * backends. --pasky */
348 unsigned char *string = get_uri_string(uri, components);
349 unsigned char *data = string;
351 if (!string) return;
353 while (*data) {
354 int len = strcspn(data, " \t\r\n\\");
356 add_bytes_to_string(header, data, len);
358 if (!data[len]) break;
360 if (data[len++] == '\\')
361 add_char_to_string(header, '/');
362 else
363 add_to_string(header, "%20");
365 data += len;
368 mem_free(string);
371 /* Parse from @end - 1 to @start and set *@value to integer found.
372 * It returns -1 if not a number, 0 otherwise.
373 * @end should be > @start. */
374 static int
375 revstr2num(unsigned char *start, unsigned char *end, int *value)
377 int q = 1, val = 0;
379 do {
380 --end;
381 if (!isdigit(*end)) return -1; /* NaN */
382 val += (*end - '0') * q;
383 q *= 10;
384 } while (end > start);
386 *value = val;
387 return 0;
390 /* This function extracts code, major and minor version from string
391 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
392 * It returns a negative value on error, 0 on success.
394 static int
395 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
397 unsigned char *head = rb->data;
398 unsigned char *start;
400 *code = 0;
401 version->major = 0;
402 version->minor = 0;
404 /* Ignore spaces. */
405 while (*head == ' ') head++;
407 /* HTTP/ */
408 if (c_toupper(*head) != 'H' || c_toupper(*++head) != 'T' ||
409 c_toupper(*++head) != 'T' || c_toupper(*++head) != 'P'
410 || *++head != '/')
411 return -1;
413 /* Version */
414 start = ++head;
415 /* Find next '.' */
416 while (*head && *head != '.') head++;
417 /* Sanity check. */
418 if (!*head || !(head - start)
419 || (head - start) > 4
420 || !isdigit(*(head + 1)))
421 return -2;
423 /* Extract major version number. */
424 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
426 start = head + 1;
428 /* Find next ' '. */
429 while (*head && *head != ' ') head++;
430 /* Sanity check. */
431 if (!*head || !(head - start) || (head - start) > 4) return -4;
433 /* Extract minor version number. */
434 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
436 /* Ignore spaces. */
437 while (*head == ' ') head++;
439 /* Sanity check for code. */
440 if (head[0] < '1' || head[0] > '9' ||
441 !isdigit(head[1]) ||
442 !isdigit(head[2]))
443 return -6; /* Invalid code. */
445 /* Extract code. */
446 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
448 return 0;
451 static int
452 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
453 unsigned char *head)
455 unsigned char *server;
456 const unsigned char *const *s;
457 static const unsigned char *const buggy_servers[] = {
458 "mod_czech/3.1.0",
459 "Purveyor",
460 "Netscape-Enterprise",
461 NULL
464 if (!get_opt_bool("protocol.http.bugs.allow_blacklist", NULL)
465 || HTTP_1_0(http->sent_version))
466 return 0;
468 server = parse_header(head, "Server", NULL);
469 if (!server)
470 return 0;
472 for (s = buggy_servers; *s; s++) {
473 if (strstr(server, *s)) {
474 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
475 break;
479 mem_free(server);
480 return (*s != NULL);
483 static void
484 http_end_request(struct connection *conn, struct connection_state state,
485 int notrunc)
487 struct http_connection_info *http;
489 shutdown_connection_stream(conn);
491 /* shutdown_connection_stream() should not change conn->info,
492 * but in case it does, read conn->info only after the call. */
493 http = conn->info;
494 if (http)
495 done_http_post(&http->post);
497 if (http && !http->close
498 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
499 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL)
500 || !conn->uri->post)) {
501 if (is_in_state(state, S_OK) && conn->cached)
502 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
503 set_connection_state(conn, state);
504 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
505 } else {
506 abort_connection(conn, state);
510 static void http_send_header(struct socket *);
512 void
513 http_protocol_handler(struct connection *conn)
515 /* setcstate(conn, S_CONN); */
517 if (!has_keepalive_connection(conn)) {
518 make_connection(conn->socket, conn->uri, http_send_header,
519 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
520 } else {
521 http_send_header(conn->socket);
525 void
526 proxy_protocol_handler(struct connection *conn)
528 http_protocol_handler(conn);
531 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
533 #define connection_is_https_proxy(conn) \
534 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
536 /** connection.done points to this function if connection.info points
537 * to a struct http_connection_info. */
538 static void
539 done_http_connection(struct connection *conn)
541 struct http_connection_info *http = conn->info;
543 done_http_post(&http->post);
544 mem_free(http);
545 conn->info = NULL;
546 conn->done = NULL;
549 struct http_connection_info *
550 init_http_connection_info(struct connection *conn, int major, int minor, int close)
552 struct http_connection_info *http;
554 http = mem_calloc(1, sizeof(*http));
555 if (!http) {
556 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
557 return NULL;
560 http->sent_version.major = major;
561 http->sent_version.minor = minor;
562 http->close = close;
564 init_http_post(&http->post);
566 /* The CGI code uses this too and blacklisting expects a host name. */
567 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
568 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
570 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
571 || get_opt_bool("protocol.http.bugs.http10", NULL)) {
572 http->sent_version.major = 1;
573 http->sent_version.minor = 0;
576 /* If called from HTTPS proxy connection the connection info might have
577 * already been allocated. */
578 if (conn->done) {
579 conn->done(conn);
580 conn->done = NULL;
582 mem_free_set(&conn->info, http);
583 conn->done = done_http_connection;
585 return http;
588 static void
589 accept_encoding_header(struct string *header)
591 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA) || defined(CONFIG_BROTLI)
592 int comma = 0;
594 add_to_string(header, "Accept-Encoding: ");
596 #ifdef CONFIG_BROTLI
597 add_to_string(header, "br");
598 comma = 1;
599 #endif
601 #ifdef CONFIG_BZIP2
602 if (comma) add_to_string(header, ", ");
603 add_to_string(header, "bzip2");
604 comma = 1;
605 #endif
607 #ifdef CONFIG_GZIP
608 if (comma) add_to_string(header, ", ");
609 add_to_string(header, "gzip, deflate");
610 comma = 1;
611 #endif
613 #ifdef CONFIG_LZMA
614 if (comma) add_to_string(header, ", ");
615 add_to_string(header, "lzma");
616 #endif
617 add_crlf_to_string(header);
618 #endif
621 #define POST_BUFFER_SIZE 16384
623 static void
624 send_more_post_data(struct socket *socket)
626 struct connection *conn = socket->conn;
627 struct http_connection_info *http = conn->info;
628 unsigned char buffer[POST_BUFFER_SIZE];
629 int got;
630 struct connection_state error;
632 got = read_http_post(&http->post, buffer, POST_BUFFER_SIZE, &error);
633 if (got < 0) {
634 http_end_request(conn, error, 0);
635 } else if (got > 0) {
636 write_to_socket(socket, buffer, got, connection_state(S_TRANS),
637 send_more_post_data);
638 } else { /* got == 0, meaning end of data */
639 /* Can't use request_from_socket() because there's no
640 * more data to write. */
641 struct read_buffer *rb = alloc_read_buffer(socket);
643 socket->state = SOCKET_END_ONCLOSE;
644 if (rb)
645 read_from_socket(socket, rb, connection_state(S_SENT),
646 http_got_header);
647 else
648 http_end_request(conn, connection_state(S_OUT_OF_MEM),
655 static void
656 http_send_header(struct socket *socket)
658 struct connection *conn = socket->conn;
659 struct http_connection_info *http;
660 int trace = get_opt_bool("protocol.http.trace", NULL);
661 struct string header;
662 unsigned char *post_data = NULL;
663 struct auth_entry *entry = NULL;
664 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
665 unsigned char *optstr;
666 int use_connect, talking_to_proxy;
668 /* Sanity check for a host */
669 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
670 http_end_request(conn, connection_state(S_BAD_URL), 0);
671 return;
674 http = init_http_connection_info(conn, 1, 1, 0);
675 if (!http) return;
677 if (!init_string(&header)) {
678 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
679 return;
682 if (!conn->cached) conn->cached = find_in_cache(uri);
684 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
685 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
687 if (trace) {
688 add_to_string(&header, "TRACE ");
689 } else if (use_connect) {
690 add_to_string(&header, "CONNECT ");
691 /* In CONNECT requests, we send only a subset of the
692 * headers to the proxy. See the "CONNECT:" comments
693 * below. After the CONNECT request succeeds, we
694 * negotiate TLS with the real server and make a new
695 * HTTP request that includes all the headers. */
696 } else if (uri->post) {
697 add_to_string(&header, "POST ");
698 conn->unrestartable = 1;
699 } else {
700 add_to_string(&header, "GET ");
703 if (!talking_to_proxy) {
704 add_char_to_string(&header, '/');
707 if (use_connect) {
708 /* Add port if it was specified or the default port */
709 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
710 } else {
711 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
712 add_url_to_http_string(&header, uri, URI_DATA);
714 } else if (talking_to_proxy) {
715 add_url_to_http_string(&header, uri, URI_PROXY);
717 } else {
718 add_url_to_http_string(&header, conn->uri, URI_DATA);
722 add_to_string(&header, " HTTP/");
723 add_long_to_string(&header, http->sent_version.major);
724 add_char_to_string(&header, '.');
725 add_long_to_string(&header, http->sent_version.minor);
726 add_crlf_to_string(&header);
728 /* CONNECT: Sending a Host header seems pointless as the same
729 * information is already in the CONNECT line. It's harmless
730 * though and Mozilla does it too. */
731 add_to_string(&header, "Host: ");
732 add_uri_to_string(&header, uri, URI_HTTP_HOST);
733 add_crlf_to_string(&header);
735 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
736 if (talking_to_proxy) {
737 unsigned char *user = get_opt_str("protocol.http.proxy.user", NULL);
738 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd", NULL);
740 if (proxy_auth.digest) {
741 unsigned char *response;
742 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
743 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
745 if (userlen)
746 memcpy(proxy_auth.user, user, userlen);
747 proxy_auth.user[userlen] = '\0';
748 if (passwordlen)
749 memcpy(proxy_auth.password, passwd, passwordlen);
750 proxy_auth.password[passwordlen] = '\0';
752 /* FIXME: @uri is the proxied URI. Maybe the passed URI
753 * should be the proxy URI aka conn->uri. --jonas */
754 response = get_http_auth_digest_response(&proxy_auth, uri);
755 if (response) {
756 add_to_string(&header, "Proxy-Authorization: Digest ");
757 add_to_string(&header, response);
758 add_crlf_to_string(&header);
760 mem_free(response);
763 } else {
764 if (user[0]) {
765 unsigned char *proxy_data;
767 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
768 if (proxy_data) {
769 unsigned char *proxy_64 = base64_encode(proxy_data);
771 if (proxy_64) {
772 add_to_string(&header, "Proxy-Authorization: Basic ");
773 add_to_string(&header, proxy_64);
774 add_crlf_to_string(&header);
775 mem_free(proxy_64);
777 mem_free(proxy_data);
783 /* CONNECT: User-Agent does not reveal anything about the
784 * resource we're fetching, and it may help the proxy return
785 * better error messages. */
786 optstr = get_opt_str("protocol.http.user_agent", NULL);
787 if (*optstr && strcmp(optstr, " ")) {
788 unsigned char *ustr, ts[64] = "";
789 /* TODO: Somehow get the terminal in which the
790 * document will actually be displayed. */
791 struct terminal *term = get_default_terminal();
793 add_to_string(&header, "User-Agent: ");
795 if (term) {
796 unsigned int tslen = 0;
798 ulongcat(ts, &tslen, term->width, 3, 0);
799 ts[tslen++] = 'x';
800 ulongcat(ts, &tslen, term->height, 3, 0);
802 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
803 ts);
805 if (ustr) {
806 add_to_string(&header, ustr);
807 mem_free(ustr);
810 add_crlf_to_string(&header);
813 /* CONNECT: Referer probably is a secret page in the HTTPS
814 * server, so don't reveal it to the proxy. */
815 if (!use_connect) {
816 switch (get_opt_int("protocol.http.referer.policy", NULL)) {
817 case REFERER_NONE:
818 /* oh well */
819 break;
821 case REFERER_FAKE:
822 optstr = get_opt_str("protocol.http.referer.fake", NULL);
823 if (!optstr[0]) break;
824 add_to_string(&header, "Referer: ");
825 add_to_string(&header, optstr);
826 add_crlf_to_string(&header);
827 break;
829 case REFERER_TRUE:
830 if (!conn->referrer) break;
831 add_to_string(&header, "Referer: ");
832 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
833 add_crlf_to_string(&header);
834 break;
836 case REFERER_SAME_URL:
837 add_to_string(&header, "Referer: ");
838 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
839 add_crlf_to_string(&header);
840 break;
844 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
845 * because they do not reveal anything about the resource
846 * we're going to request via TLS, and they may affect the
847 * error message if the CONNECT request fails.
849 * If ELinks is ever changed to vary its Accept headers based
850 * on what it intends to do with the returned resource, e.g.
851 * sending "Accept: text/css" when it wants an external
852 * stylesheet, then it should do that only in the inner GET
853 * and not in the outer CONNECT. */
854 add_to_string(&header, "Accept: */*");
855 add_crlf_to_string(&header);
857 if (get_opt_bool("protocol.http.compression", NULL))
858 accept_encoding_header(&header);
860 if (!accept_charset) {
861 init_accept_charset();
864 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
865 && !get_opt_bool("protocol.http.bugs.accept_charset", NULL)
866 && accept_charset) {
867 add_to_string(&header, accept_charset);
870 optstr = get_opt_str("protocol.http.accept_language", NULL);
871 if (optstr[0]) {
872 add_to_string(&header, "Accept-Language: ");
873 add_to_string(&header, optstr);
874 add_crlf_to_string(&header);
876 #ifdef CONFIG_NLS
877 else if (get_opt_bool("protocol.http.accept_ui_language", NULL)) {
878 unsigned char *code = language_to_iso639(current_language);
880 if (code) {
881 add_to_string(&header, "Accept-Language: ");
882 add_to_string(&header, code);
883 add_crlf_to_string(&header);
886 #endif
888 /* CONNECT: Proxy-Connection is intended to be seen by the
889 * proxy. If the CONNECT request succeeds, then the proxy
890 * will forward the remainder of the TCP connection to the
891 * origin server, and Proxy-Connection does not matter; but
892 * if the request fails, then Proxy-Connection may matter. */
893 /* FIXME: What about post-HTTP/1.1?? --Zas */
894 if (HTTP_1_1(http->sent_version)) {
895 if (!IS_PROXY_URI(conn->uri)) {
896 add_to_string(&header, "Connection: ");
897 } else {
898 add_to_string(&header, "Proxy-Connection: ");
901 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL)) {
902 add_to_string(&header, "Keep-Alive");
903 } else {
904 add_to_string(&header, "close");
906 add_crlf_to_string(&header);
909 /* CONNECT: Do not tell the proxy anything we have cached
910 * about the resource. */
911 if (!use_connect && conn->cached) {
912 if (!conn->cached->incomplete && conn->cached->head
913 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
914 if (conn->cached->last_modified) {
915 add_to_string(&header, "If-Modified-Since: ");
916 add_to_string(&header, conn->cached->last_modified);
917 add_crlf_to_string(&header);
919 if (conn->cached->etag) {
920 add_to_string(&header, "If-None-Match: ");
921 add_to_string(&header, conn->cached->etag);
922 add_crlf_to_string(&header);
927 /* CONNECT: Let's send cache control headers to the proxy too;
928 * they may affect DNS caching. */
929 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
930 add_to_string(&header, "Pragma: no-cache");
931 add_crlf_to_string(&header);
932 add_to_string(&header, "Cache-Control: no-cache");
933 add_crlf_to_string(&header);
936 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
937 * do anything good with that information anyway. */
938 if (!use_connect && (conn->from || conn->progress->start > 0)) {
939 /* conn->from takes precedence. conn->progress.start is set only the first
940 * time, then conn->from gets updated and in case of any retries
941 * etc we have everything interesting in conn->from already. */
942 add_to_string(&header, "Range: bytes=");
943 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
944 add_char_to_string(&header, '-');
945 add_crlf_to_string(&header);
948 /* CONNECT: The Authorization header is for the origin server only. */
949 if (!use_connect) {
950 #ifdef CONFIG_GSSAPI
951 if (http_negotiate_output(uri, &header) != 0)
952 #endif
953 entry = find_auth(uri);
956 if (entry) {
957 if (entry->digest) {
958 unsigned char *response;
960 response = get_http_auth_digest_response(entry, uri);
961 if (response) {
962 add_to_string(&header, "Authorization: Digest ");
963 add_to_string(&header, response);
964 add_crlf_to_string(&header);
966 mem_free(response);
969 } else {
970 /* RFC2617 section 2 [Basic Authentication Scheme]
972 * To receive authorization, the client sends the userid
973 * and password, separated by a single colon (":")
974 * character, within a base64 [7] encoded string in the
975 * credentials. */
976 unsigned char *id;
978 /* Create base64 encoded string. */
979 id = straconcat(entry->user, ":", entry->password,
980 (unsigned char *) NULL);
981 if (id) {
982 unsigned char *base64 = base64_encode(id);
984 mem_free_set(&id, base64);
987 if (id) {
988 add_to_string(&header, "Authorization: Basic ");
989 add_to_string(&header, id);
990 add_crlf_to_string(&header);
991 mem_free(id);
996 /* CONNECT: Any POST data is for the origin server only. */
997 if (!use_connect && uri->post) {
998 /* We search for first '\n' in uri->post to get content type
999 * as set by get_form_uri(). This '\n' is dropped if any
1000 * and replaced by correct '\r\n' termination here. */
1001 unsigned char *postend = strchr(uri->post, '\n');
1002 struct connection_state error;
1004 if (postend) {
1005 add_to_string(&header, "Content-Type: ");
1006 add_bytes_to_string(&header, uri->post, postend - uri->post);
1007 add_crlf_to_string(&header);
1010 post_data = postend ? postend + 1 : uri->post;
1011 if (!open_http_post(&http->post, post_data, &error)) {
1012 http_end_request(conn, error, 0);
1013 done_string(&header);
1014 return;
1016 add_format_to_string(&header, "Content-Length: "
1017 "%" OFF_PRINT_FORMAT "\x0D\x0A",
1018 (off_print_T)
1019 http->post.total_upload_length);
1022 #ifdef CONFIG_COOKIES
1023 /* CONNECT: Cookies are for the origin server only. */
1024 if (!use_connect) {
1025 struct string *cookies = send_cookies(uri);
1027 if (cookies) {
1028 add_to_string(&header, "Cookie: ");
1029 add_string_to_string(&header, cookies);
1030 add_crlf_to_string(&header);
1031 done_string(cookies);
1034 #endif
1036 add_crlf_to_string(&header);
1038 /* CONNECT: Any POST data is for the origin server only.
1039 * This was already checked above and post_data is NULL
1040 * in that case. Verified with an assertion below. */
1041 if (post_data) {
1042 assert(!use_connect); /* see comment above */
1044 socket->state = SOCKET_END_ONCLOSE;
1045 if (!conn->http_upload_progress && http->post.file_count)
1046 conn->http_upload_progress = init_progress(0);
1047 write_to_socket(socket, header.source, header.length,
1048 connection_state(S_TRANS),
1049 send_more_post_data);
1050 } else
1051 request_from_socket(socket, header.source, header.length,
1052 connection_state(S_SENT),
1053 SOCKET_END_ONCLOSE, http_got_header);
1054 done_string(&header);
1057 #undef POST_BUFFER_SIZE
1060 static unsigned char *
1061 decompress_data(struct connection *conn, unsigned char *data, int len,
1062 int *new_len)
1064 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1066 if (!conn->stream) {
1067 conn->stream = open_encoded(-1, conn->content_encoding);
1068 if (!conn->stream) return NULL;
1071 return decode_encoded_buffer(conn->stream, conn->content_encoding, data, len, new_len);
1074 static int
1075 is_line_in_buffer(struct read_buffer *rb)
1077 int l;
1079 for (l = 0; l < rb->length; l++) {
1080 unsigned char a0 = rb->data[l];
1082 if (a0 == ASCII_LF)
1083 return l + 1;
1084 if (a0 == ASCII_CR) {
1085 if (rb->data[l + 1] == ASCII_LF
1086 && l < rb->length - 1)
1087 return l + 2;
1088 if (l == rb->length - 1)
1089 return 0;
1091 if (a0 < ' ')
1092 return -1;
1094 return 0;
1097 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1099 static void
1100 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1101 int already_got_anything)
1103 struct connection_state state = already_got_anything
1104 ? connection_state(S_TRANS) : conn->state;
1106 read_from_socket(conn->socket, rb, state, read_http_data);
1109 static void
1110 read_http_data_done(struct connection *conn)
1112 struct http_connection_info *http = conn->info;
1114 /* There's no content but an error so just print
1115 * that instead of nothing. */
1116 if (!conn->from) {
1117 if (http->code >= 400) {
1118 http_error_document(conn, http->code);
1120 } else {
1121 /* This is not an error, thus fine. No need generate any
1122 * document, as this may be empty and it's not a problem.
1123 * In case of 3xx, we're probably just getting kicked to
1124 * another page anyway. And in case of 2xx, the document
1125 * may indeed be empty and thus the user should see it so. */
1129 http_end_request(conn, connection_state(S_OK), 0);
1132 /* Returns:
1133 * -1 on error
1134 * 0 if more to read
1135 * 1 if done
1137 static int
1138 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1140 struct http_connection_info *http = conn->info;
1141 int total_data_len = 0;
1143 while (1) {
1144 /* Chunked. Good luck! */
1145 /* See RFC2616, section 3.6.1. Basically, it looks like:
1146 * 1234 ; a = b ; c = d\r\n
1147 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1148 * 0\r\n
1149 * \r\n */
1150 if (http->chunk_remaining == CHUNK_DATA_END) {
1151 int l = is_line_in_buffer(rb);
1153 if (l) {
1154 if (l == -1) {
1155 /* Invalid character in buffer. */
1156 return -1;
1159 /* Remove everything to the EOLN. */
1160 kill_buffer_data(rb, l);
1161 if (l <= 2) {
1162 /* Empty line. */
1163 return 2;
1165 continue;
1168 } else if (http->chunk_remaining == CHUNK_SIZE) {
1169 int l = is_line_in_buffer(rb);
1171 if (l) {
1172 unsigned char *de;
1173 int n = 0;
1175 if (l != -1) {
1176 errno = 0;
1177 n = strtol(rb->data, (char **) &de, 16);
1178 if (errno || !*de) {
1179 return -1;
1183 if (l == -1 || de == rb->data) {
1184 return -1;
1187 /* Remove everything to the EOLN. */
1188 kill_buffer_data(rb, l);
1189 http->chunk_remaining = n;
1190 if (!http->chunk_remaining)
1191 http->chunk_remaining = CHUNK_ZERO_SIZE;
1192 continue;
1195 } else {
1196 int data_len;
1197 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1198 int len = zero ? 0 : http->chunk_remaining;
1200 /* Maybe everything necessary didn't come yet.. */
1201 int_upper_bound(&len, rb->length);
1202 conn->received += len;
1204 if (http->chunk_remaining > 0) http->chunk_remaining -= len;
1205 if (conn->content_encoding == ENCODING_NONE) {
1206 data_len = len;
1207 if (add_fragment(conn->cached, conn->from, rb->data, len) == 1)
1208 conn->tries = 0;
1209 } else {
1210 unsigned char *data = decompress_data(conn, rb->data, len, &data_len);
1212 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1213 conn->tries = 0;
1215 mem_free_if(data);
1216 if (zero || !http->length) shutdown_connection_stream(conn);
1219 conn->from += data_len;
1220 total_data_len += data_len;
1222 kill_buffer_data(rb, len);
1224 if (zero) {
1225 /* Last chunk has zero length, so this is last
1226 * chunk, we finished decompression just now
1227 * and now we can happily finish reading this
1228 * stuff. */
1229 http->chunk_remaining = CHUNK_DATA_END;
1230 continue;
1233 if (!http->chunk_remaining && rb->length > 0) {
1234 /* Eat newline succeeding each chunk. */
1235 if (rb->data[0] == ASCII_LF) {
1236 kill_buffer_data(rb, 1);
1237 } else {
1238 if (rb->data[0] != ASCII_CR
1239 || (rb->length >= 2
1240 && rb->data[1] != ASCII_LF)) {
1241 return -1;
1243 if (rb->length < 2) break;
1244 kill_buffer_data(rb, 2);
1246 http->chunk_remaining = CHUNK_SIZE;
1247 continue;
1250 break;
1253 /* More to read. */
1254 return !!total_data_len;
1257 /* Returns 0 if more data, 1 if done. */
1258 static int
1259 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1261 struct http_connection_info *http = conn->info;
1262 int data_len;
1263 int len = rb->length;
1265 if (http->length >= 0 && http->length < len) {
1266 /* We won't read more than we have to go. */
1267 len = http->length;
1270 conn->received += len;
1271 if (http->length > 0) http->length -= len;
1273 if (conn->content_encoding == ENCODING_NONE) {
1274 data_len = len;
1275 if (add_fragment(conn->cached, conn->from, rb->data, data_len) == 1)
1276 conn->tries = 0;
1277 } else {
1278 unsigned char *data = decompress_data(conn, rb->data, len, &data_len);
1280 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1281 conn->tries = 0;
1283 mem_free_if(data);
1284 if (!http->length) shutdown_connection_stream(conn);
1287 conn->from += data_len;
1289 kill_buffer_data(rb, len);
1291 if (!http->length && (conn->socket->state == SOCKET_RETRY_ONCLOSE
1292 || conn->socket->state == SOCKET_CLOSED)) {
1293 return 2;
1296 return !!data_len;
1299 static void
1300 read_http_data(struct socket *socket, struct read_buffer *rb)
1302 struct connection *conn = socket->conn;
1303 struct http_connection_info *http = conn->info;
1304 int ret;
1306 if (socket->state == SOCKET_CLOSED) {
1307 if (conn->content_encoding) {
1308 /* Flush decompression first. */
1309 http->length = 0;
1310 } else {
1311 read_http_data_done(conn);
1312 return;
1316 if (http->length != LEN_CHUNKED) {
1317 ret = read_normal_http_data(conn, rb);
1319 } else {
1320 ret = read_chunked_http_data(conn, rb);
1323 switch (ret) {
1324 case 0:
1325 read_more_http_data(conn, rb, 0);
1326 break;
1327 case 1:
1328 read_more_http_data(conn, rb, 1);
1329 break;
1330 case 2:
1331 read_http_data_done(conn);
1332 break;
1333 default:
1334 assertm(ret == -1, "Unexpected return value: %d", ret);
1335 abort_connection(conn, connection_state(S_HTTP_ERROR));
1339 /* Returns offset of the header end, zero if more data is needed, -1 when
1340 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1341 * come. */
1342 static int
1343 get_header(struct read_buffer *rb)
1345 int i;
1347 /* XXX: We will have to do some guess about whether an HTTP header is
1348 * coming or not, in order to support HTTP/0.9 reply correctly. This
1349 * means a little code duplication with get_http_code(). --pasky */
1350 if (rb->length > 4 && c_strncasecmp(rb->data, "HTTP/", 5))
1351 return -2;
1353 for (i = 0; i < rb->length; i++) {
1354 unsigned char a0 = rb->data[i];
1355 unsigned char a1 = rb->data[i + 1];
1357 if (a0 == 0) {
1358 rb->data[i] = ' ';
1359 continue;
1361 if (a0 == ASCII_LF && a1 == ASCII_LF
1362 && i < rb->length - 1)
1363 return i + 2;
1364 if (a0 == ASCII_CR && i < rb->length - 3) {
1365 if (a1 == ASCII_CR) continue;
1366 if (a1 != ASCII_LF) return -1;
1367 if (rb->data[i + 2] == ASCII_CR) {
1368 if (rb->data[i + 3] != ASCII_LF) return -1;
1369 return i + 4;
1374 return 0;
1377 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1378 static int
1379 check_http_authentication(struct connection *conn, struct uri *uri,
1380 unsigned char *header, unsigned char *header_field)
1382 unsigned char *str, *d;
1383 int ret = 0;
1385 d = parse_header(header, header_field, &str);
1386 while (d) {
1387 if (!c_strncasecmp(d, "Basic", 5)) {
1388 unsigned char *realm = get_header_param(d, "realm");
1390 if (realm) {
1391 add_auth_entry(uri, realm, NULL, NULL, 0);
1392 mem_free(realm);
1393 mem_free(d);
1394 break;
1396 } else if (!c_strncasecmp(d, "Digest", 6)) {
1397 unsigned char *realm = get_header_param(d, "realm");
1398 unsigned char *nonce = get_header_param(d, "nonce");
1399 unsigned char *opaque = get_header_param(d, "opaque");
1401 add_auth_entry(uri, realm, nonce, opaque, 1);
1403 mem_free_if(realm);
1404 mem_free_if(nonce);
1405 mem_free_if(opaque);
1406 mem_free(d);
1407 break;
1409 #ifdef CONFIG_GSSAPI
1410 else if (!c_strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1411 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1412 ret = 1;
1413 mem_free(d);
1414 break;
1416 else if (!c_strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1417 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1418 ret = 1;
1419 mem_free(d);
1420 break;
1422 #endif
1423 mem_free(d);
1424 d = parse_header(str, header_field, &str);
1426 return ret;
1430 void
1431 http_got_header(struct socket *socket, struct read_buffer *rb)
1433 struct connection *conn = socket->conn;
1434 struct http_connection_info *http = conn->info;
1435 unsigned char *head;
1436 #ifdef CONFIG_COOKIES
1437 unsigned char *cookie, *ch;
1438 #endif
1439 unsigned char *d;
1440 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1441 struct http_version version = { 0, 9 };
1442 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1443 ? connection_state(S_GETH)
1444 : connection_state(S_PROC));
1445 int a, h = 200;
1446 int cf;
1448 if (socket->state == SOCKET_CLOSED) {
1449 if (!conn->tries && uri->host) {
1450 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1451 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1452 } else {
1453 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1454 conn->tries = -1;
1457 retry_connection(conn, connection_state(S_CANT_READ));
1458 return;
1460 socket->state = SOCKET_RETRY_ONCLOSE;
1462 again:
1463 a = get_header(rb);
1464 if (a == -1) {
1465 abort_connection(conn, connection_state(S_HTTP_ERROR));
1466 return;
1468 if (!a) {
1469 read_from_socket(conn->socket, rb, state, http_got_header);
1470 return;
1472 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1473 * the get_http_code call; @h and @version have already been
1474 * initialized with the right values. */
1475 if (a == -2) a = 0;
1476 if ((a && get_http_code(rb, &h, &version))
1477 || h == 101) {
1478 abort_connection(conn, connection_state(S_HTTP_ERROR));
1479 return;
1482 /* When no header, HTTP/0.9 document. That's always text/html,
1483 * according to
1484 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1485 /* FIXME: This usage of fake protocol headers for setting up the
1486 * content type has been obsoleted by the @content_type member of
1487 * {struct cache_entry}. */
1488 head = (a ? memacpy(rb->data, a)
1489 : stracpy("\r\nContent-Type: text/html\r\n"));
1490 if (!head) {
1491 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1492 return;
1495 if (check_http_server_bugs(uri, http, head)) {
1496 mem_free(head);
1497 retry_connection(conn, connection_state(S_RESTART));
1498 return;
1501 #ifdef CONFIG_CGI
1502 if (uri->protocol == PROTOCOL_FILE) {
1503 /* ``Status'' is not a standard HTTP header field although some
1504 * HTTP servers like www.php.net uses it for some reason. It should
1505 * only be used for CGI scripts so that it does not interfere
1506 * with status code depended handling for ``normal'' HTTP like
1507 * redirects. */
1508 d = parse_header(head, "Status", NULL);
1509 if (d) {
1510 int h2 = atoi(d);
1512 mem_free(d);
1513 if (h2 >= 100 && h2 < 600) h = h2;
1514 if (h == 101) {
1515 mem_free(head);
1516 abort_connection(conn, connection_state(S_HTTP_ERROR));
1517 return;
1521 #endif
1523 #ifdef CONFIG_COOKIES
1524 ch = head;
1525 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1526 set_cookie(uri, cookie);
1527 mem_free(cookie);
1529 #endif
1530 http->code = h;
1532 if (h == 100) {
1533 mem_free(head);
1534 state = connection_state(S_PROC);
1535 kill_buffer_data(rb, a);
1536 goto again;
1538 if (h < 200) {
1539 mem_free(head);
1540 abort_connection(conn, connection_state(S_HTTP_ERROR));
1541 return;
1543 if (h == 304) {
1544 mem_free(head);
1545 http_end_request(conn, connection_state(S_OK), 1);
1546 return;
1548 if (h == 204) {
1549 mem_free(head);
1550 http_end_request(conn, connection_state(S_HTTP_204), 0);
1551 return;
1553 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1554 #ifdef CONFIG_SSL
1555 mem_free(head);
1556 socket->need_ssl = 1;
1557 complete_connect_socket(socket, uri, http_send_header);
1558 #else
1559 abort_connection(conn, connection_state(S_SSL_ERROR));
1560 #endif
1561 return;
1564 conn->cached = get_cache_entry(conn->uri);
1565 if (!conn->cached) {
1566 mem_free(head);
1567 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1568 return;
1570 conn->cached->cgi = conn->cgi;
1571 mem_free_set(&conn->cached->head, head);
1573 if (!get_opt_bool("document.cache.ignore_cache_control", NULL)) {
1574 struct cache_entry *cached = conn->cached;
1576 /* I am not entirely sure in what order we should process these
1577 * headers and if we should still process Cache-Control max-age
1578 * if we already set max age to date mentioned in Expires.
1579 * --jonas */
1580 /* Ensure that when ever cached->max_age is set, cached->expired
1581 * is also set, so the cache management knows max_age contains a
1582 * valid time. If on the other hand no caching is requested
1583 * cached->expire should be set to zero. */
1584 if ((d = parse_header(cached->head, "Expires", NULL))) {
1585 /* Convert date to seconds. */
1586 time_t expires = parse_date(&d, NULL, 0, 1);
1588 mem_free(d);
1590 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1591 timeval_from_seconds(&cached->max_age, expires);
1592 cached->expire = 1;
1596 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1597 if (strstr(d, "no-cache")) {
1598 cached->cache_mode = CACHE_MODE_NEVER;
1599 cached->expire = 0;
1601 mem_free(d);
1604 if (cached->cache_mode != CACHE_MODE_NEVER
1605 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1606 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1607 cached->cache_mode = CACHE_MODE_NEVER;
1608 cached->expire = 0;
1610 } else {
1611 unsigned char *pos = strstr(d, "max-age=");
1613 assert(cached->cache_mode != CACHE_MODE_NEVER);
1615 if (pos) {
1616 /* Grab the number of seconds. */
1617 timeval_T max_age;
1619 timeval_from_seconds(&max_age, atol(pos + 8));
1620 timeval_now(&cached->max_age);
1621 timeval_add_interval(&cached->max_age, &max_age);
1623 cached->expire = 1;
1627 mem_free(d);
1631 /* XXX: Is there some reason why NOT to follow the Location header
1632 * for any status? If the server didn't mean it, it wouldn't send
1633 * it, after all...? --pasky */
1634 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1635 d = parse_header(conn->cached->head, "Location", NULL);
1636 if (d) {
1637 int use_get_method = (h == 303);
1639 /* A note from RFC 2616 section 10.3.3:
1640 * RFC 1945 and RFC 2068 specify that the client is not
1641 * allowed to change the method on the redirected
1642 * request. However, most existing user agent
1643 * implementations treat 302 as if it were a 303
1644 * response, performing a GET on the Location
1645 * field-value regardless of the original request
1646 * method. */
1647 /* So POST must not be redirected to GET, but some
1648 * BUGGY message boards rely on it :-( */
1649 if (h == 302
1650 && get_opt_bool("protocol.http.bugs.broken_302_redirect", NULL))
1651 use_get_method = 1;
1653 redirect_cache(conn->cached, d, use_get_method, -1);
1654 mem_free(d);
1658 if (h == 401) {
1659 if (check_http_authentication(conn, uri,
1660 conn->cached->head, "WWW-Authenticate")) {
1661 retry_connection(conn, connection_state(S_RESTART));
1662 return;
1666 if (h == 407) {
1667 unsigned char *str;
1668 int restart = 0;
1670 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1671 while (d) {
1672 if (!c_strncasecmp(d, "Basic", 5)) {
1673 unsigned char *realm = get_header_param(d, "realm");
1675 if (realm) {
1676 mem_free_set(&proxy_auth.realm, realm);
1677 proxy_auth.digest = 0;
1678 mem_free(d);
1679 break;
1682 } else if (!c_strncasecmp(d, "Digest", 6)) {
1683 unsigned char *realm = get_header_param(d, "realm");
1684 unsigned char *nonce = get_header_param(d, "nonce");
1685 unsigned char *opaque = get_header_param(d, "opaque");
1686 unsigned char *stale = get_header_param(d, "stale");
1688 if (stale) {
1689 if (strcasecmp(stale, "true")) restart = 1;
1690 else restart = 0;
1691 mem_free(stale);
1693 mem_free_set(&proxy_auth.realm, realm);
1694 mem_free_set(&proxy_auth.nonce, nonce);
1695 mem_free_set(&proxy_auth.opaque, opaque);
1696 if (proxy_auth.digest == 0) restart = 1;
1697 proxy_auth.digest = 1;
1699 mem_free(d);
1700 break;
1703 mem_free(d);
1704 d = parse_header(str, "Proxy-Authenticate", &str);
1706 if (restart) {
1707 retry_connection(conn, connection_state(S_RESTART));
1708 return;
1712 kill_buffer_data(rb, a);
1713 http->close = 0;
1714 http->length = -1;
1715 http->recv_version = version;
1717 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1718 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1719 if (!c_strcasecmp(d, "close")) http->close = 1;
1720 mem_free(d);
1721 } else if (PRE_HTTP_1_1(version)) {
1722 http->close = 1;
1725 cf = conn->from;
1726 conn->from = 0;
1727 d = parse_header(conn->cached->head, "Content-Range", NULL);
1728 if (d) {
1729 if (strlen(d) > 6) {
1730 d[5] = 0;
1731 if (isdigit(d[6]) && !c_strcasecmp(d, "bytes")) {
1732 int f;
1734 errno = 0;
1735 f = strtol(d + 6, NULL, 10);
1737 if (!errno && f >= 0) conn->from = f;
1740 mem_free(d);
1742 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1743 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1744 /* We don't want this if conn->progress.start because then conn->from will
1745 * be probably value of conn->progress.start, while cf is 0. */
1746 abort_connection(conn, connection_state(S_HTTP_ERROR));
1747 return;
1750 #if 0
1752 struct status *s;
1753 foreach (s, conn->downloads) {
1754 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1755 conn, s, s->pri, s->state, s->prev_error,
1756 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1759 #endif
1761 if (conn->progress->start >= 0) {
1762 /* Update to the real value which we've got from Content-Range. */
1763 conn->progress->seek = conn->from;
1765 conn->progress->start = conn->from;
1767 d = parse_header(conn->cached->head, "Content-Length", NULL);
1768 if (d) {
1769 unsigned char *ep;
1770 long long l;
1772 errno = 0;
1773 l = strtoll(d, (char **) &ep, 10);
1775 if (!errno && !*ep && l >= 0) {
1776 if (!http->close || POST_HTTP_1_0(version))
1777 http->length = l;
1778 conn->est_length = conn->from + l;
1780 mem_free(d);
1783 if (!conn->unrestartable) {
1784 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1786 if (d) {
1787 if (!c_strcasecmp(d, "none"))
1788 conn->unrestartable = 1;
1789 mem_free(d);
1790 } else {
1791 if (!conn->from)
1792 conn->unrestartable = 1;
1796 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1797 if (d) {
1798 if (!c_strcasecmp(d, "chunked")) {
1799 http->length = LEN_CHUNKED;
1800 http->chunk_remaining = CHUNK_SIZE;
1802 mem_free(d);
1804 if (!http->close && http->length == -1) http->close = 1;
1806 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1807 if (d) {
1808 if (conn->cached->last_modified && c_strcasecmp(conn->cached->last_modified, d)) {
1809 delete_entry_content(conn->cached);
1810 if (conn->from) {
1811 conn->from = 0;
1812 mem_free(d);
1813 retry_connection(conn, connection_state(S_MODIFIED));
1814 return;
1817 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1818 else mem_free(d);
1820 if (!conn->cached->last_modified) {
1821 d = parse_header(conn->cached->head, "Date", NULL);
1822 if (d) conn->cached->last_modified = d;
1825 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1826 d = parse_header(conn->cached->head, "ETag", NULL);
1827 if (d) {
1828 if (conn->cached->etag) {
1829 unsigned char *old_tag = conn->cached->etag;
1830 unsigned char *new_tag = d;
1832 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1834 if (new_tag[0] == 'W' && new_tag[1] == '/')
1835 new_tag += 2;
1837 if (old_tag[0] == 'W' && old_tag[1] == '/')
1838 old_tag += 2;
1840 if (strcmp(new_tag, old_tag)) {
1841 delete_entry_content(conn->cached);
1842 if (conn->from) {
1843 conn->from = 0;
1844 mem_free(d);
1845 retry_connection(conn, connection_state(S_MODIFIED));
1846 return;
1851 if (!conn->cached->etag)
1852 conn->cached->etag = d;
1853 else
1854 mem_free(d);
1857 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1858 if (d) {
1859 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA) || defined(CONFIG_BROTLI)
1860 unsigned char *extension = get_extension_from_uri(uri);
1861 enum stream_encoding file_encoding;
1863 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1864 mem_free_if(extension);
1865 #endif
1866 /* If the content is encoded, we want to preserve the encoding
1867 * if it is implied by the extension, so that saving the URI
1868 * will leave the saved file with the correct encoding. */
1869 #ifdef CONFIG_GZIP
1870 if (file_encoding != ENCODING_GZIP
1871 && (!c_strcasecmp(d, "gzip") || !c_strcasecmp(d, "x-gzip")))
1872 conn->content_encoding = ENCODING_GZIP;
1873 if (!c_strcasecmp(d, "deflate") || !c_strcasecmp(d, "x-deflate"))
1874 conn->content_encoding = ENCODING_DEFLATE;
1875 #endif
1877 #ifdef CONFIG_BROTLI
1878 if (file_encoding != ENCODING_BROTLI
1879 && (!c_strcasecmp(d, "br")))
1880 conn->content_encoding = ENCODING_BROTLI;
1881 #endif
1883 #ifdef CONFIG_BZIP2
1884 if (file_encoding != ENCODING_BZIP2
1885 && (!c_strcasecmp(d, "bzip2") || !c_strcasecmp(d, "x-bzip2")))
1886 conn->content_encoding = ENCODING_BZIP2;
1887 #endif
1889 #ifdef CONFIG_LZMA
1890 if (file_encoding != ENCODING_LZMA
1891 && (!c_strcasecmp(d, "lzma") || !c_strcasecmp(d, "x-lzma")))
1892 conn->content_encoding = ENCODING_LZMA;
1893 #endif
1894 mem_free(d);
1897 if (conn->content_encoding != ENCODING_NONE) {
1898 mem_free_if(conn->cached->encoding_info);
1899 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1902 if (http->length == -1 || http->close)
1903 socket->state = SOCKET_END_ONCLOSE;
1905 read_http_data(socket, rb);