bug 764: Initialize the right member of union option_value
[elinks.git] / src / protocol / http / http.c
blob18cc641a3e5870e0db43a79eea354c8a9452d2b8
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
50 #ifdef CONFIG_GSSAPI
51 #include "http_negotiate.h"
52 #endif
54 struct http_version {
55 int major;
56 int minor;
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info {
69 enum blacklist_flags bl_flags;
70 struct http_version recv_version;
71 struct http_version sent_version;
73 int close;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
77 int length;
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
82 #define CHUNK_SIZE -1
83 int chunk_remaining;
85 int code;
89 static struct auth_entry proxy_auth;
91 static unsigned char *accept_charset = NULL;
94 static union option_info http_options[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
96 "http", 0,
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
101 "bugs", 0,
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it "
107 "can trigger bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with "
116 "Netscape). This is a problem for a lot of web discussion "
117 "boards and the like. If they will do strange things to you, "
118 "try to play with this.")),
120 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
121 "post_no_keepalive", 0, 0,
122 N_("Disable keepalive connection after POST request.")),
124 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
125 "http10", 0, 0,
126 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
128 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
129 "proxy", 0,
130 N_("HTTP proxy configuration.")),
132 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
133 "host", 0, "",
134 N_("Host and port-number (host:port) of the HTTP proxy, "
135 "or blank. If it's blank, HTTP_PROXY environment variable "
136 "is checked as well.")),
138 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
139 "user", 0, "",
140 N_("Proxy authentication username.")),
142 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
143 "passwd", 0, "",
144 N_("Proxy authentication password.")),
147 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
148 "referer", 0,
149 N_("HTTP referer sending options. HTTP referer is a special "
150 "header sent in the HTTP requests, which is supposed to "
151 "contain the previous page visited by the browser."
152 "This way, the server can know what link did you follow "
153 "when accessing that page. However, this behaviour can "
154 "unfortunately considerably affect privacy and can lead even "
155 "to a security problem on some badly designed web pages.")),
157 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
158 "policy", 0,
159 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
160 N_("Mode of sending HTTP referer:\n"
161 "0 is send no referer\n"
162 "1 is send current URL as referer\n"
163 "2 is send fixed fake referer\n"
164 "3 is send previous URL as referer (correct, but insecure)")),
166 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
167 "fake", 0, "",
168 N_("Fake referer to be sent when policy is 2.")),
171 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
172 "accept_language", 0, "",
173 N_("Send Accept-Language header.")),
175 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
176 "accept_ui_language", 0, 1,
177 N_("Request localised versions of documents from web-servers "
178 "(using the Accept-Language header) using the language "
179 "you have configured for ELinks' user-interface (this also "
180 "affects navigator.language ECMAScript value available to "
181 "scripts). Note that some see this as a potential security "
182 "risk because it tells web-masters and the FBI sniffers "
183 "about your language preference.")),
185 /* http://www.eweek.com/c/a/Desktops-and-Notebooks/Intel-Psion-End-Dispute-Concerning-Netbook-Trademark-288875/
186 * responds with "Transfer-Encoding: chunked" and
187 * "Content-Encoding: gzip" but does not compress the first chunk
188 * and the last chunk, causing ELinks to display garbage.
189 * (If User-Agent includes "Gecko" (case sensitive), then
190 * that server correctly compresses the whole stream.)
191 * ELinks should instead report the decompression error (bug 1017)
192 * or perhaps even blacklist the server for compression and retry.
193 * Until that has been implemented, disable compression by default. */
194 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
195 "compression", 0, 0,
196 N_("If enabled, the capability to receive compressed content "
197 "(gzip and/or bzip2) is announced to the server, which "
198 "usually sends the reply compressed, thus saving some "
199 "bandwidth at slight CPU expense.\n"
200 "\n"
201 "If ELinks displays a incomplete page or garbage, try "
202 "disabling this option. If that helps, there may be a bug in "
203 "the decompression part of ELinks. Please report such bugs.\n"
204 "\n"
205 "If ELinks has been compiled without compression support, "
206 "this option has no effect. To check the supported features, "
207 "see Help -> About.")),
209 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
210 "trace", 0, 0,
211 N_("If active, all HTTP requests are sent with TRACE as "
212 "their method rather than GET or POST. This is useful for "
213 "debugging of both ELinks and various server-side scripts "
214 "--- the server only returns the client's request back to "
215 "the client verbatim. Note that this type of request may "
216 "not be enabled on all servers.")),
218 /* OSNews.com is supposed to be relying on the textmode token, at least. */
219 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
220 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
221 N_("Change the User Agent ID. That means identification "
222 "string, which is sent to HTTP server when a document is "
223 "requested. The 'textmode' token in the first field is our "
224 "silent attempt to establish this as a standard for new "
225 "textmode user agents, so that the webmasters can have "
226 "just a single uniform test for these if they are e.g. "
227 "pushing some lite version to them automagically.\n"
228 "\n"
229 "Use \" \" if you don't want any User-Agent header to be sent "
230 "at all.\n"
231 "\n"
232 "%v in the string means ELinks version,\n"
233 "%s in the string means system identification,\n"
234 "%t in the string means size of the terminal,\n"
235 "%b in the string means number of bars displayed by ELinks.")),
238 INIT_OPT_TREE("protocol", N_("HTTPS"),
239 "https", 0,
240 N_("HTTPS-specific options.")),
242 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
243 "proxy", 0,
244 N_("HTTPS proxy configuration.")),
246 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
247 "host", 0, "",
248 N_("Host and port-number (host:port) of the HTTPS CONNECT "
249 "proxy, or blank. If it's blank, HTTPS_PROXY environment "
250 "variable is checked as well.")),
251 NULL_OPTION_INFO,
254 static void done_http();
256 struct module http_protocol_module = struct_module(
257 /* name: */ N_("HTTP"),
258 /* options: */ http_options,
259 /* hooks: */ NULL,
260 /* submodules: */ NULL,
261 /* data: */ NULL,
262 /* init: */ NULL,
263 /* done: */ done_http
267 static void
268 done_http(void)
270 mem_free_if(proxy_auth.realm);
271 mem_free_if(proxy_auth.nonce);
272 mem_free_if(proxy_auth.opaque);
274 free_blacklist();
276 if (accept_charset)
277 mem_free(accept_charset);
280 static void
281 init_accept_charset(void)
283 struct string ac;
284 unsigned char *cs;
285 int i;
287 if (!init_string(&ac)) return;
289 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
290 if (ac.length) {
291 add_to_string(&ac, ", ");
292 } else {
293 add_to_string(&ac, "Accept-Charset: ");
295 add_to_string(&ac, cs);
298 if (ac.length) {
299 add_crlf_to_string(&ac);
302 accept_charset = squeezastring(&ac);
304 done_string(&ac);
308 unsigned char *
309 subst_user_agent(unsigned char *fmt, unsigned char *version,
310 unsigned char *sysname, unsigned char *termsize)
312 struct string agent;
314 if (!init_string(&agent)) return NULL;
316 while (*fmt) {
317 int p;
319 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
321 add_bytes_to_string(&agent, fmt, p);
322 fmt += p;
324 if (*fmt != '%') continue;
326 fmt++;
327 switch (*fmt) {
328 case 'b':
329 if (!list_empty(sessions)) {
330 unsigned char bs[4] = "";
331 int blen = 0;
332 struct session *ses = sessions.prev;
333 int bars = ses->status.show_status_bar
334 + ses->status.show_tabs_bar
335 + ses->status.show_title_bar;
337 ulongcat(bs, &blen, bars, 2, 0);
338 add_to_string(&agent, bs);
340 break;
341 case 'v':
342 add_to_string(&agent, version);
343 break;
344 case 's':
345 add_to_string(&agent, sysname);
346 break;
347 case 't':
348 if (termsize)
349 add_to_string(&agent, termsize);
350 break;
351 default:
352 add_bytes_to_string(&agent, fmt - 1, 2);
353 break;
355 if (*fmt) fmt++;
358 return agent.source;
361 static void
362 add_url_to_http_string(struct string *header, struct uri *uri, int components)
364 /* This block substitues spaces in URL by %20s. This is
365 * certainly not the right place where to do it, but now the
366 * behaviour is at least improved compared to what we had
367 * before. We should probably encode all URLs as early as
368 * possible, and possibly decode them back in protocol
369 * backends. --pasky */
370 unsigned char *string = get_uri_string(uri, components);
371 unsigned char *data = string;
373 if (!string) return;
375 while (*data) {
376 int len = strcspn(data, " \t\r\n\\");
378 add_bytes_to_string(header, data, len);
380 if (!data[len]) break;
382 if (data[len++] == '\\')
383 add_char_to_string(header, '/');
384 else
385 add_to_string(header, "%20");
387 data += len;
390 mem_free(string);
393 /* Parse from @end - 1 to @start and set *@value to integer found.
394 * It returns -1 if not a number, 0 otherwise.
395 * @end should be > @start. */
396 static int
397 revstr2num(unsigned char *start, unsigned char *end, int *value)
399 int q = 1, val = 0;
401 do {
402 --end;
403 if (!isdigit(*end)) return -1; /* NaN */
404 val += (*end - '0') * q;
405 q *= 10;
406 } while (end > start);
408 *value = val;
409 return 0;
412 /* This function extracts code, major and minor version from string
413 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
414 * It returns a negative value on error, 0 on success.
416 static int
417 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
419 unsigned char *head = rb->data;
420 unsigned char *start;
422 *code = 0;
423 version->major = 0;
424 version->minor = 0;
426 /* Ignore spaces. */
427 while (*head == ' ') head++;
429 /* HTTP/ */
430 if (c_toupper(*head) != 'H' || c_toupper(*++head) != 'T' ||
431 c_toupper(*++head) != 'T' || c_toupper(*++head) != 'P'
432 || *++head != '/')
433 return -1;
435 /* Version */
436 start = ++head;
437 /* Find next '.' */
438 while (*head && *head != '.') head++;
439 /* Sanity check. */
440 if (!*head || !(head - start)
441 || (head - start) > 4
442 || !isdigit(*(head + 1)))
443 return -2;
445 /* Extract major version number. */
446 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
448 start = head + 1;
450 /* Find next ' '. */
451 while (*head && *head != ' ') head++;
452 /* Sanity check. */
453 if (!*head || !(head - start) || (head - start) > 4) return -4;
455 /* Extract minor version number. */
456 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
458 /* Ignore spaces. */
459 while (*head == ' ') head++;
461 /* Sanity check for code. */
462 if (head[0] < '1' || head[0] > '9' ||
463 !isdigit(head[1]) ||
464 !isdigit(head[2]))
465 return -6; /* Invalid code. */
467 /* Extract code. */
468 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
470 return 0;
473 static int
474 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
475 unsigned char *head)
477 unsigned char *server;
478 const unsigned char *const *s;
479 static const unsigned char *const buggy_servers[] = {
480 "mod_czech/3.1.0",
481 "Purveyor",
482 "Netscape-Enterprise",
483 NULL
486 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
487 || HTTP_1_0(http->sent_version))
488 return 0;
490 server = parse_header(head, "Server", NULL);
491 if (!server)
492 return 0;
494 for (s = buggy_servers; *s; s++) {
495 if (strstr(server, *s)) {
496 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
497 break;
501 mem_free(server);
502 return (*s != NULL);
505 static void
506 http_end_request(struct connection *conn, struct connection_state state,
507 int notrunc)
509 shutdown_connection_stream(conn);
511 if (conn->info && !((struct http_connection_info *) conn->info)->close
512 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
513 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
514 || !conn->uri->post)) {
515 if (is_in_state(state, S_OK) && conn->cached)
516 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
517 set_connection_state(conn, state);
518 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
519 } else {
520 abort_connection(conn, state);
524 static void http_send_header(struct socket *);
526 void
527 http_protocol_handler(struct connection *conn)
529 /* setcstate(conn, S_CONN); */
531 if (!has_keepalive_connection(conn)) {
532 make_connection(conn->socket, conn->uri, http_send_header,
533 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
534 } else {
535 http_send_header(conn->socket);
539 void
540 proxy_protocol_handler(struct connection *conn)
542 http_protocol_handler(conn);
545 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
547 #define connection_is_https_proxy(conn) \
548 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
550 struct http_connection_info *
551 init_http_connection_info(struct connection *conn, int major, int minor, int close)
553 struct http_connection_info *http;
555 http = mem_calloc(1, sizeof(*http));
556 if (!http) {
557 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
558 return NULL;
561 http->sent_version.major = major;
562 http->sent_version.minor = minor;
563 http->close = close;
565 /* The CGI code uses this too and blacklisting expects a host name. */
566 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
567 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
569 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
570 || get_opt_bool("protocol.http.bugs.http10")) {
571 http->sent_version.major = 1;
572 http->sent_version.minor = 0;
575 /* If called from HTTPS proxy connection the connection info might have
576 * already been allocated. */
577 mem_free_set(&conn->info, http);
579 return http;
582 static void
583 accept_encoding_header(struct string *header)
585 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
586 int comma = 0;
588 add_to_string(header, "Accept-Encoding: ");
590 #ifdef CONFIG_BZIP2
591 add_to_string(header, "bzip2");
592 comma = 1;
593 #endif
595 #ifdef CONFIG_GZIP
596 if (comma) add_to_string(header, ", ");
597 add_to_string(header, "deflate, gzip");
598 comma = 1;
599 #endif
601 #ifdef CONFIG_LZMA
602 if (comma) add_to_string(header, ", ");
603 add_to_string(header, "lzma");
604 #endif
605 add_crlf_to_string(header);
606 #endif
609 static void
610 http_send_header(struct socket *socket)
612 struct connection *conn = socket->conn;
613 struct http_connection_info *http;
614 int trace = get_opt_bool("protocol.http.trace");
615 struct string header;
616 unsigned char *post_data = NULL;
617 struct auth_entry *entry = NULL;
618 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
619 unsigned char *optstr;
620 int use_connect, talking_to_proxy;
622 /* Sanity check for a host */
623 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
624 http_end_request(conn, connection_state(S_BAD_URL), 0);
625 return;
628 http = init_http_connection_info(conn, 1, 1, 0);
629 if (!http) return;
631 if (!init_string(&header)) {
632 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
633 return;
636 if (!conn->cached) conn->cached = find_in_cache(uri);
638 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
639 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
641 if (trace) {
642 add_to_string(&header, "TRACE ");
643 } else if (use_connect) {
644 add_to_string(&header, "CONNECT ");
645 /* In CONNECT requests, we send only a subset of the
646 * headers to the proxy. See the "CONNECT:" comments
647 * below. After the CONNECT request succeeds, we
648 * negotiate TLS with the real server and make a new
649 * HTTP request that includes all the headers. */
650 } else if (uri->post) {
651 add_to_string(&header, "POST ");
652 conn->unrestartable = 1;
653 } else {
654 add_to_string(&header, "GET ");
657 if (!talking_to_proxy) {
658 add_char_to_string(&header, '/');
661 if (use_connect) {
662 /* Add port if it was specified or the default port */
663 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
664 } else {
665 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
666 add_url_to_http_string(&header, uri, URI_DATA);
668 } else if (talking_to_proxy) {
669 add_url_to_http_string(&header, uri, URI_PROXY);
671 } else {
672 add_url_to_http_string(&header, conn->uri, URI_DATA);
676 add_to_string(&header, " HTTP/");
677 add_long_to_string(&header, http->sent_version.major);
678 add_char_to_string(&header, '.');
679 add_long_to_string(&header, http->sent_version.minor);
680 add_crlf_to_string(&header);
682 /* CONNECT: Sending a Host header seems pointless as the same
683 * information is already in the CONNECT line. It's harmless
684 * though and Mozilla does it too. */
685 add_to_string(&header, "Host: ");
686 add_uri_to_string(&header, uri, URI_HTTP_HOST);
687 add_crlf_to_string(&header);
689 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
690 if (talking_to_proxy) {
691 unsigned char *user = get_opt_str("protocol.http.proxy.user");
692 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
694 if (proxy_auth.digest) {
695 unsigned char *response;
696 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
697 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
699 if (userlen)
700 memcpy(proxy_auth.user, user, userlen);
701 proxy_auth.user[userlen] = '\0';
702 if (passwordlen)
703 memcpy(proxy_auth.password, passwd, passwordlen);
704 proxy_auth.password[passwordlen] = '\0';
706 /* FIXME: @uri is the proxied URI. Maybe the passed URI
707 * should be the proxy URI aka conn->uri. --jonas */
708 response = get_http_auth_digest_response(&proxy_auth, uri);
709 if (response) {
710 add_to_string(&header, "Proxy-Authorization: Digest ");
711 add_to_string(&header, response);
712 add_crlf_to_string(&header);
714 mem_free(response);
717 } else {
718 if (user[0]) {
719 unsigned char *proxy_data;
721 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
722 if (proxy_data) {
723 unsigned char *proxy_64 = base64_encode(proxy_data);
725 if (proxy_64) {
726 add_to_string(&header, "Proxy-Authorization: Basic ");
727 add_to_string(&header, proxy_64);
728 add_crlf_to_string(&header);
729 mem_free(proxy_64);
731 mem_free(proxy_data);
737 /* CONNECT: User-Agent does not reveal anything about the
738 * resource we're fetching, and it may help the proxy return
739 * better error messages. */
740 optstr = get_opt_str("protocol.http.user_agent");
741 if (*optstr && strcmp(optstr, " ")) {
742 unsigned char *ustr, ts[64] = "";
744 add_to_string(&header, "User-Agent: ");
746 if (!list_empty(terminals)) {
747 unsigned int tslen = 0;
748 struct terminal *term = terminals.prev;
750 ulongcat(ts, &tslen, term->width, 3, 0);
751 ts[tslen++] = 'x';
752 ulongcat(ts, &tslen, term->height, 3, 0);
754 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
755 ts);
757 if (ustr) {
758 add_to_string(&header, ustr);
759 mem_free(ustr);
762 add_crlf_to_string(&header);
765 /* CONNECT: Referer probably is a secret page in the HTTPS
766 * server, so don't reveal it to the proxy. */
767 if (!use_connect) {
768 switch (get_opt_int("protocol.http.referer.policy")) {
769 case REFERER_NONE:
770 /* oh well */
771 break;
773 case REFERER_FAKE:
774 optstr = get_opt_str("protocol.http.referer.fake");
775 if (!optstr[0]) break;
776 add_to_string(&header, "Referer: ");
777 add_to_string(&header, optstr);
778 add_crlf_to_string(&header);
779 break;
781 case REFERER_TRUE:
782 if (!conn->referrer) break;
783 add_to_string(&header, "Referer: ");
784 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
785 add_crlf_to_string(&header);
786 break;
788 case REFERER_SAME_URL:
789 add_to_string(&header, "Referer: ");
790 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
791 add_crlf_to_string(&header);
792 break;
796 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
797 * because they do not reveal anything about the resource
798 * we're going to request via TLS, and they may affect the
799 * error message if the CONNECT request fails.
801 * If ELinks is ever changed to vary its Accept headers based
802 * on what it intends to do with the returned resource, e.g.
803 * sending "Accept: text/css" when it wants an external
804 * stylesheet, then it should do that only in the inner GET
805 * and not in the outer CONNECT. */
806 add_to_string(&header, "Accept: */*");
807 add_crlf_to_string(&header);
809 if (get_opt_bool("protocol.http.compression"))
810 accept_encoding_header(&header);
812 if (!accept_charset) {
813 init_accept_charset();
816 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
817 && !get_opt_bool("protocol.http.bugs.accept_charset")
818 && accept_charset) {
819 add_to_string(&header, accept_charset);
822 optstr = get_opt_str("protocol.http.accept_language");
823 if (optstr[0]) {
824 add_to_string(&header, "Accept-Language: ");
825 add_to_string(&header, optstr);
826 add_crlf_to_string(&header);
828 #ifdef CONFIG_NLS
829 else if (get_opt_bool("protocol.http.accept_ui_language")) {
830 unsigned char *code = language_to_iso639(current_language);
832 if (code) {
833 add_to_string(&header, "Accept-Language: ");
834 add_to_string(&header, code);
835 add_crlf_to_string(&header);
838 #endif
840 /* CONNECT: Proxy-Connection is intended to be seen by the
841 * proxy. If the CONNECT request succeeds, then the proxy
842 * will forward the remainder of the TCP connection to the
843 * origin server, and Proxy-Connection does not matter; but
844 * if the request fails, then Proxy-Connection may matter. */
845 /* FIXME: What about post-HTTP/1.1?? --Zas */
846 if (HTTP_1_1(http->sent_version)) {
847 if (!IS_PROXY_URI(conn->uri)) {
848 add_to_string(&header, "Connection: ");
849 } else {
850 add_to_string(&header, "Proxy-Connection: ");
853 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
854 add_to_string(&header, "Keep-Alive");
855 } else {
856 add_to_string(&header, "close");
858 add_crlf_to_string(&header);
861 /* CONNECT: Do not tell the proxy anything we have cached
862 * about the resource. */
863 if (!use_connect && conn->cached) {
864 if (!conn->cached->incomplete && conn->cached->head
865 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
866 if (conn->cached->last_modified) {
867 add_to_string(&header, "If-Modified-Since: ");
868 add_to_string(&header, conn->cached->last_modified);
869 add_crlf_to_string(&header);
871 if (conn->cached->etag) {
872 add_to_string(&header, "If-None-Match: ");
873 add_to_string(&header, conn->cached->etag);
874 add_crlf_to_string(&header);
879 /* CONNECT: Let's send cache control headers to the proxy too;
880 * they may affect DNS caching. */
881 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
882 add_to_string(&header, "Pragma: no-cache");
883 add_crlf_to_string(&header);
884 add_to_string(&header, "Cache-Control: no-cache");
885 add_crlf_to_string(&header);
888 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
889 * do anything good with that information anyway. */
890 if (!use_connect && (conn->from || conn->progress->start > 0)) {
891 /* conn->from takes precedence. conn->progress.start is set only the first
892 * time, then conn->from gets updated and in case of any retries
893 * etc we have everything interesting in conn->from already. */
894 add_to_string(&header, "Range: bytes=");
895 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
896 add_char_to_string(&header, '-');
897 add_crlf_to_string(&header);
900 /* CONNECT: The Authorization header is for the origin server only. */
901 if (!use_connect) {
902 #ifdef CONFIG_GSSAPI
903 if (http_negotiate_output(uri, &header) != 0)
904 #endif
905 entry = find_auth(uri);
908 if (entry) {
909 if (entry->digest) {
910 unsigned char *response;
912 response = get_http_auth_digest_response(entry, uri);
913 if (response) {
914 add_to_string(&header, "Authorization: Digest ");
915 add_to_string(&header, response);
916 add_crlf_to_string(&header);
918 mem_free(response);
921 } else {
922 /* RFC2617 section 2 [Basic Authentication Scheme]
924 * To receive authorization, the client sends the userid
925 * and password, separated by a single colon (":")
926 * character, within a base64 [7] encoded string in the
927 * credentials. */
928 unsigned char *id;
930 /* Create base64 encoded string. */
931 id = straconcat(entry->user, ":", entry->password,
932 (unsigned char *) NULL);
933 if (id) {
934 unsigned char *base64 = base64_encode(id);
936 mem_free_set(&id, base64);
939 if (id) {
940 add_to_string(&header, "Authorization: Basic ");
941 add_to_string(&header, id);
942 add_crlf_to_string(&header);
943 mem_free(id);
948 /* CONNECT: Any POST data is for the origin server only. */
949 if (!use_connect && uri->post) {
950 /* We search for first '\n' in uri->post to get content type
951 * as set by get_form_uri(). This '\n' is dropped if any
952 * and replaced by correct '\r\n' termination here. */
953 unsigned char *postend = strchr(uri->post, '\n');
955 if (postend) {
956 add_to_string(&header, "Content-Type: ");
957 add_bytes_to_string(&header, uri->post, postend - uri->post);
958 add_crlf_to_string(&header);
961 post_data = postend ? postend + 1 : uri->post;
962 add_to_string(&header, "Content-Length: ");
963 add_long_to_string(&header, strlen(post_data) / 2);
964 add_crlf_to_string(&header);
967 #ifdef CONFIG_COOKIES
968 /* CONNECT: Cookies are for the origin server only. */
969 if (!use_connect) {
970 struct string *cookies = send_cookies(uri);
972 if (cookies) {
973 add_to_string(&header, "Cookie: ");
974 add_string_to_string(&header, cookies);
975 add_crlf_to_string(&header);
976 done_string(cookies);
979 #endif
981 add_crlf_to_string(&header);
983 /* CONNECT: Any POST data is for the origin server only.
984 * This was already checked above and post_data is NULL
985 * in that case. Verified with an assertion below. */
986 if (post_data) {
987 #define POST_BUFFER_SIZE 4096
988 unsigned char *post = post_data;
989 unsigned char buffer[POST_BUFFER_SIZE];
990 int n = 0;
992 assert(!use_connect); /* see comment above */
994 while (post[0] && post[1]) {
995 int h1, h2;
997 h1 = unhx(post[0]);
998 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
999 if_assert_failed h1 = 0;
1001 h2 = unhx(post[1]);
1002 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
1003 if_assert_failed h2 = 0;
1005 buffer[n++] = (h1<<4) + h2;
1006 post += 2;
1007 if (n == POST_BUFFER_SIZE) {
1008 add_bytes_to_string(&header, buffer, n);
1009 n = 0;
1013 if (n)
1014 add_bytes_to_string(&header, buffer, n);
1015 #undef POST_BUFFER_SIZE
1018 request_from_socket(socket, header.source, header.length,
1019 connection_state(S_SENT),
1020 SOCKET_END_ONCLOSE, http_got_header);
1021 done_string(&header);
1025 /* This function decompresses the data block given in @data (if it was
1026 * compressed), which is long @len bytes. The decompressed data block is given
1027 * back to the world as the return value and its length is stored into
1028 * @new_len. After this function returns, the caller will discard all the @len
1029 * input bytes, so this function must use all of them unless an error occurs.
1031 * In this function, value of either http->chunk_remaining or http->length is
1032 * being changed (it depends on if chunked mode is used or not).
1034 * Note that the function is still a little esotheric for me. Don't take it
1035 * lightly and don't mess with it without grave reason! If you dare to touch
1036 * this without testing the changes on slashdot, freshmeat and cvsweb
1037 * (including revision history), don't dare to send me any patches! ;) --pasky
1039 * This function gotta die. */
1040 static unsigned char *
1041 decompress_data(struct connection *conn, unsigned char *data, int len,
1042 int *new_len)
1044 struct http_connection_info *http = conn->info;
1045 enum { NORMAL, FINISHING } state = NORMAL;
1046 int did_read = 0;
1047 int *length_of_block;
1048 unsigned char *output = NULL;
1050 #define BIG_READ 655360
1052 if (http->length == LEN_CHUNKED) {
1053 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
1054 state = FINISHING;
1055 length_of_block = &http->chunk_remaining;
1056 } else {
1057 length_of_block = &http->length;
1058 if (!*length_of_block) {
1059 /* Going to finish this decoding bussiness. */
1060 state = FINISHING;
1064 if (conn->content_encoding == ENCODING_NONE) {
1065 *new_len = len;
1066 if (*length_of_block > 0) *length_of_block -= len;
1067 return data;
1070 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1072 if (conn->stream_pipes[0] == -1
1073 && (c_pipe(conn->stream_pipes) < 0
1074 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1075 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1076 return NULL;
1079 do {
1080 unsigned char *tmp;
1082 if (state == NORMAL) {
1083 /* ... we aren't finishing yet. */
1084 int written = safe_write(conn->stream_pipes[1], data, len);
1086 if (written >= 0) {
1087 data += written;
1088 len -= written;
1090 /* In non-keep-alive connections http->length == -1, so the test below */
1091 if (*length_of_block > 0)
1092 *length_of_block -= written;
1093 /* http->length is 0 at the end of block for all modes: keep-alive,
1094 * non-keep-alive and chunked */
1095 if (!http->length) {
1096 /* That's all, folks - let's finish this. */
1097 state = FINISHING;
1098 } else if (!len) {
1099 /* We've done for this round (but not done
1100 * completely). Thus we will get out with
1101 * what we have and leave what we wrote to
1102 * the next round - we have to do that since
1103 * we MUST NOT ever empty the pipe completely
1104 * - this would cause a disaster for
1105 * read_encoded(), which would simply not
1106 * work right then. */
1107 return output;
1112 if (!conn->stream) {
1113 conn->stream = open_encoded(conn->stream_pipes[0],
1114 conn->content_encoding);
1115 if (!conn->stream) return NULL;
1118 tmp = mem_realloc(output, *new_len + BIG_READ);
1119 if (!tmp) break;
1120 output = tmp;
1122 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1124 /* Do not break from the loop if did_read == 0. It
1125 * means no decoded data is available yet, but some may
1126 * become available later. This happens especially with
1127 * the bzip2 decoder, which needs an entire compressed
1128 * block as input before it generates any output. */
1129 if (did_read < 0) {
1130 state = FINISHING;
1131 break;
1133 *new_len += did_read;
1134 } while (len || (did_read == BIG_READ));
1136 if (state == FINISHING) shutdown_connection_stream(conn);
1137 return output;
1138 #undef BIG_READ
1141 static int
1142 is_line_in_buffer(struct read_buffer *rb)
1144 int l;
1146 for (l = 0; l < rb->length; l++) {
1147 unsigned char a0 = rb->data[l];
1149 if (a0 == ASCII_LF)
1150 return l + 1;
1151 if (a0 == ASCII_CR) {
1152 if (rb->data[l + 1] == ASCII_LF
1153 && l < rb->length - 1)
1154 return l + 2;
1155 if (l == rb->length - 1)
1156 return 0;
1158 if (a0 < ' ')
1159 return -1;
1161 return 0;
1164 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1166 static void
1167 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1168 int already_got_anything)
1170 struct connection_state state = already_got_anything
1171 ? connection_state(S_TRANS) : conn->state;
1173 read_from_socket(conn->socket, rb, state, read_http_data);
1176 static void
1177 read_http_data_done(struct connection *conn)
1179 struct http_connection_info *http = conn->info;
1181 /* There's no content but an error so just print
1182 * that instead of nothing. */
1183 if (!conn->from) {
1184 if (http->code >= 400) {
1185 http_error_document(conn, http->code);
1187 } else {
1188 /* This is not an error, thus fine. No need generate any
1189 * document, as this may be empty and it's not a problem.
1190 * In case of 3xx, we're probably just getting kicked to
1191 * another page anyway. And in case of 2xx, the document
1192 * may indeed be empty and thus the user should see it so. */
1196 http_end_request(conn, connection_state(S_OK), 0);
1199 /* Returns:
1200 * -1 on error
1201 * 0 if more to read
1202 * 1 if done
1204 static int
1205 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1207 struct http_connection_info *http = conn->info;
1208 int total_data_len = 0;
1210 while (1) {
1211 /* Chunked. Good luck! */
1212 /* See RFC2616, section 3.6.1. Basically, it looks like:
1213 * 1234 ; a = b ; c = d\r\n
1214 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1215 * 0\r\n
1216 * \r\n */
1217 if (http->chunk_remaining == CHUNK_DATA_END) {
1218 int l = is_line_in_buffer(rb);
1220 if (l) {
1221 if (l == -1) {
1222 /* Invalid character in buffer. */
1223 return -1;
1226 /* Remove everything to the EOLN. */
1227 kill_buffer_data(rb, l);
1228 if (l <= 2) {
1229 /* Empty line. */
1230 return 2;
1232 continue;
1235 } else if (http->chunk_remaining == CHUNK_SIZE) {
1236 int l = is_line_in_buffer(rb);
1238 if (l) {
1239 unsigned char *de;
1240 int n = 0;
1242 if (l != -1) {
1243 errno = 0;
1244 n = strtol(rb->data, (char **) &de, 16);
1245 if (errno || !*de) {
1246 return -1;
1250 if (l == -1 || de == rb->data) {
1251 return -1;
1254 /* Remove everything to the EOLN. */
1255 kill_buffer_data(rb, l);
1256 http->chunk_remaining = n;
1257 if (!http->chunk_remaining)
1258 http->chunk_remaining = CHUNK_ZERO_SIZE;
1259 continue;
1262 } else {
1263 unsigned char *data;
1264 int data_len;
1265 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1266 int len = zero ? 0 : http->chunk_remaining;
1268 /* Maybe everything necessary didn't come yet.. */
1269 int_upper_bound(&len, rb->length);
1270 conn->received += len;
1272 data = decompress_data(conn, rb->data, len, &data_len);
1274 if (add_fragment(conn->cached, conn->from,
1275 data, data_len) == 1)
1276 conn->tries = 0;
1278 if (data && data != rb->data) mem_free(data);
1280 conn->from += data_len;
1281 total_data_len += data_len;
1283 kill_buffer_data(rb, len);
1285 if (zero) {
1286 /* Last chunk has zero length, so this is last
1287 * chunk, we finished decompression just now
1288 * and now we can happily finish reading this
1289 * stuff. */
1290 http->chunk_remaining = CHUNK_DATA_END;
1291 continue;
1294 if (!http->chunk_remaining && rb->length > 0) {
1295 /* Eat newline succeeding each chunk. */
1296 if (rb->data[0] == ASCII_LF) {
1297 kill_buffer_data(rb, 1);
1298 } else {
1299 if (rb->data[0] != ASCII_CR
1300 || (rb->length >= 2
1301 && rb->data[1] != ASCII_LF)) {
1302 return -1;
1304 if (rb->length < 2) break;
1305 kill_buffer_data(rb, 2);
1307 http->chunk_remaining = CHUNK_SIZE;
1308 continue;
1311 break;
1314 /* More to read. */
1315 return !!total_data_len;
1318 /* Returns 0 if more data, 1 if done. */
1319 static int
1320 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1322 struct http_connection_info *http = conn->info;
1323 unsigned char *data;
1324 int data_len;
1325 int len = rb->length;
1327 if (http->length >= 0 && http->length < len) {
1328 /* We won't read more than we have to go. */
1329 len = http->length;
1332 conn->received += len;
1334 data = decompress_data(conn, rb->data, len, &data_len);
1336 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1337 conn->tries = 0;
1339 if (data && data != rb->data) mem_free(data);
1341 conn->from += data_len;
1343 kill_buffer_data(rb, len);
1345 if (!http->length && (conn->socket->state == SOCKET_RETRY_ONCLOSE
1346 || conn->socket->state == SOCKET_CLOSED)) {
1347 return 2;
1350 return !!data_len;
1353 static void
1354 read_http_data(struct socket *socket, struct read_buffer *rb)
1356 struct connection *conn = socket->conn;
1357 struct http_connection_info *http = conn->info;
1358 int ret;
1360 if (socket->state == SOCKET_CLOSED) {
1361 if (conn->content_encoding) {
1362 /* Flush decompression first. */
1363 http->length = 0;
1364 } else {
1365 read_http_data_done(conn);
1366 return;
1370 if (http->length != LEN_CHUNKED) {
1371 ret = read_normal_http_data(conn, rb);
1373 } else {
1374 ret = read_chunked_http_data(conn, rb);
1377 switch (ret) {
1378 case 0:
1379 read_more_http_data(conn, rb, 0);
1380 break;
1381 case 1:
1382 read_more_http_data(conn, rb, 1);
1383 break;
1384 case 2:
1385 read_http_data_done(conn);
1386 break;
1387 default:
1388 assertm(ret == -1, "Unexpected return value: %d", ret);
1389 abort_connection(conn, connection_state(S_HTTP_ERROR));
1393 /* Returns offset of the header end, zero if more data is needed, -1 when
1394 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1395 * come. */
1396 static int
1397 get_header(struct read_buffer *rb)
1399 int i;
1401 /* XXX: We will have to do some guess about whether an HTTP header is
1402 * coming or not, in order to support HTTP/0.9 reply correctly. This
1403 * means a little code duplication with get_http_code(). --pasky */
1404 if (rb->length > 4 && c_strncasecmp(rb->data, "HTTP/", 5))
1405 return -2;
1407 for (i = 0; i < rb->length; i++) {
1408 unsigned char a0 = rb->data[i];
1409 unsigned char a1 = rb->data[i + 1];
1411 if (a0 == 0) {
1412 rb->data[i] = ' ';
1413 continue;
1415 if (a0 == ASCII_LF && a1 == ASCII_LF
1416 && i < rb->length - 1)
1417 return i + 2;
1418 if (a0 == ASCII_CR && i < rb->length - 3) {
1419 if (a1 == ASCII_CR) continue;
1420 if (a1 != ASCII_LF) return -1;
1421 if (rb->data[i + 2] == ASCII_CR) {
1422 if (rb->data[i + 3] != ASCII_LF) return -1;
1423 return i + 4;
1428 return 0;
1431 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1432 static int
1433 check_http_authentication(struct connection *conn, struct uri *uri,
1434 unsigned char *header, unsigned char *header_field)
1436 unsigned char *str, *d;
1437 int ret = 0;
1439 d = parse_header(header, header_field, &str);
1440 while (d) {
1441 if (!c_strncasecmp(d, "Basic", 5)) {
1442 unsigned char *realm = get_header_param(d, "realm");
1444 if (realm) {
1445 add_auth_entry(uri, realm, NULL, NULL, 0);
1446 mem_free(realm);
1447 mem_free(d);
1448 break;
1450 } else if (!c_strncasecmp(d, "Digest", 6)) {
1451 unsigned char *realm = get_header_param(d, "realm");
1452 unsigned char *nonce = get_header_param(d, "nonce");
1453 unsigned char *opaque = get_header_param(d, "opaque");
1455 add_auth_entry(uri, realm, nonce, opaque, 1);
1457 mem_free_if(realm);
1458 mem_free_if(nonce);
1459 mem_free_if(opaque);
1460 mem_free(d);
1461 break;
1463 #ifdef CONFIG_GSSAPI
1464 else if (!c_strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1465 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1466 ret = 1;
1467 mem_free(d);
1468 break;
1470 else if (!c_strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1471 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1472 ret = 1;
1473 mem_free(d);
1474 break;
1476 #endif
1477 mem_free(d);
1478 d = parse_header(str, header_field, &str);
1480 return ret;
1484 void
1485 http_got_header(struct socket *socket, struct read_buffer *rb)
1487 struct connection *conn = socket->conn;
1488 struct http_connection_info *http = conn->info;
1489 unsigned char *head;
1490 #ifdef CONFIG_COOKIES
1491 unsigned char *cookie, *ch;
1492 #endif
1493 unsigned char *d;
1494 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1495 struct http_version version = { 0, 9 };
1496 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1497 ? connection_state(S_GETH)
1498 : connection_state(S_PROC));
1499 int a, h = 200;
1500 int cf;
1502 if (socket->state == SOCKET_CLOSED) {
1503 if (!conn->tries && uri->host) {
1504 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1505 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1506 } else {
1507 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1508 conn->tries = -1;
1511 retry_connection(conn, connection_state(S_CANT_READ));
1512 return;
1514 socket->state = SOCKET_RETRY_ONCLOSE;
1516 again:
1517 a = get_header(rb);
1518 if (a == -1) {
1519 abort_connection(conn, connection_state(S_HTTP_ERROR));
1520 return;
1522 if (!a) {
1523 read_from_socket(conn->socket, rb, state, http_got_header);
1524 return;
1526 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1527 * the get_http_code call; @h and @version have already been
1528 * initialized with the right values. */
1529 if (a == -2) a = 0;
1530 if ((a && get_http_code(rb, &h, &version))
1531 || h == 101) {
1532 abort_connection(conn, connection_state(S_HTTP_ERROR));
1533 return;
1536 /* When no header, HTTP/0.9 document. That's always text/html,
1537 * according to
1538 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1539 /* FIXME: This usage of fake protocol headers for setting up the
1540 * content type has been obsoleted by the @content_type member of
1541 * {struct cache_entry}. */
1542 head = (a ? memacpy(rb->data, a)
1543 : stracpy("\r\nContent-Type: text/html\r\n"));
1544 if (!head) {
1545 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1546 return;
1549 if (check_http_server_bugs(uri, http, head)) {
1550 mem_free(head);
1551 retry_connection(conn, connection_state(S_RESTART));
1552 return;
1555 #ifdef CONFIG_CGI
1556 if (uri->protocol == PROTOCOL_FILE) {
1557 /* ``Status'' is not a standard HTTP header field although some
1558 * HTTP servers like www.php.net uses it for some reason. It should
1559 * only be used for CGI scripts so that it does not interfere
1560 * with status code depended handling for ``normal'' HTTP like
1561 * redirects. */
1562 d = parse_header(head, "Status", NULL);
1563 if (d) {
1564 int h2 = atoi(d);
1566 mem_free(d);
1567 if (h2 >= 100 && h2 < 600) h = h2;
1568 if (h == 101) {
1569 mem_free(head);
1570 abort_connection(conn, connection_state(S_HTTP_ERROR));
1571 return;
1575 #endif
1577 #ifdef CONFIG_COOKIES
1578 ch = head;
1579 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1580 set_cookie(uri, cookie);
1581 mem_free(cookie);
1583 #endif
1584 http->code = h;
1586 if (h == 100) {
1587 mem_free(head);
1588 state = connection_state(S_PROC);
1589 kill_buffer_data(rb, a);
1590 goto again;
1592 if (h < 200) {
1593 mem_free(head);
1594 abort_connection(conn, connection_state(S_HTTP_ERROR));
1595 return;
1597 if (h == 304) {
1598 mem_free(head);
1599 http_end_request(conn, connection_state(S_OK), 1);
1600 return;
1602 if (h == 204) {
1603 mem_free(head);
1604 http_end_request(conn, connection_state(S_HTTP_204), 0);
1605 return;
1607 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1608 #ifdef CONFIG_SSL
1609 mem_free(head);
1610 socket->need_ssl = 1;
1611 complete_connect_socket(socket, uri, http_send_header);
1612 #else
1613 abort_connection(conn, connection_state(S_SSL_ERROR));
1614 #endif
1615 return;
1618 conn->cached = get_cache_entry(conn->uri);
1619 if (!conn->cached) {
1620 mem_free(head);
1621 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1622 return;
1624 conn->cached->cgi = conn->cgi;
1625 mem_free_set(&conn->cached->head, head);
1627 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1628 struct cache_entry *cached = conn->cached;
1630 /* I am not entirely sure in what order we should process these
1631 * headers and if we should still process Cache-Control max-age
1632 * if we already set max age to date mentioned in Expires.
1633 * --jonas */
1634 /* Ensure that when ever cached->max_age is set, cached->expired
1635 * is also set, so the cache management knows max_age contains a
1636 * valid time. If on the other hand no caching is requested
1637 * cached->expire should be set to zero. */
1638 if ((d = parse_header(cached->head, "Expires", NULL))) {
1639 /* Convert date to seconds. */
1640 time_t expires = parse_date(&d, NULL, 0, 1);
1642 mem_free(d);
1644 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1645 timeval_from_seconds(&cached->max_age, expires);
1646 cached->expire = 1;
1650 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1651 if (strstr(d, "no-cache")) {
1652 cached->cache_mode = CACHE_MODE_NEVER;
1653 cached->expire = 0;
1655 mem_free(d);
1658 if (cached->cache_mode != CACHE_MODE_NEVER
1659 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1660 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1661 cached->cache_mode = CACHE_MODE_NEVER;
1662 cached->expire = 0;
1664 } else {
1665 unsigned char *pos = strstr(d, "max-age=");
1667 assert(cached->cache_mode != CACHE_MODE_NEVER);
1669 if (pos) {
1670 /* Grab the number of seconds. */
1671 timeval_T max_age;
1673 timeval_from_seconds(&max_age, atol(pos + 8));
1674 timeval_now(&cached->max_age);
1675 timeval_add_interval(&cached->max_age, &max_age);
1677 cached->expire = 1;
1681 mem_free(d);
1685 /* XXX: Is there some reason why NOT to follow the Location header
1686 * for any status? If the server didn't mean it, it wouldn't send
1687 * it, after all...? --pasky */
1688 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1689 d = parse_header(conn->cached->head, "Location", NULL);
1690 if (d) {
1691 int use_get_method = (h == 303);
1693 /* A note from RFC 2616 section 10.3.3:
1694 * RFC 1945 and RFC 2068 specify that the client is not
1695 * allowed to change the method on the redirected
1696 * request. However, most existing user agent
1697 * implementations treat 302 as if it were a 303
1698 * response, performing a GET on the Location
1699 * field-value regardless of the original request
1700 * method. */
1701 /* So POST must not be redirected to GET, but some
1702 * BUGGY message boards rely on it :-( */
1703 if (h == 302
1704 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1705 use_get_method = 1;
1707 redirect_cache(conn->cached, d, use_get_method, -1);
1708 mem_free(d);
1712 if (h == 401) {
1713 if (check_http_authentication(conn, uri,
1714 conn->cached->head, "WWW-Authenticate")) {
1715 retry_connection(conn, connection_state(S_RESTART));
1716 return;
1720 if (h == 407) {
1721 unsigned char *str;
1723 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1724 while (d) {
1725 if (!c_strncasecmp(d, "Basic", 5)) {
1726 unsigned char *realm = get_header_param(d, "realm");
1728 if (realm) {
1729 mem_free_set(&proxy_auth.realm, realm);
1730 proxy_auth.digest = 0;
1731 mem_free(d);
1732 break;
1735 } else if (!c_strncasecmp(d, "Digest", 6)) {
1736 unsigned char *realm = get_header_param(d, "realm");
1737 unsigned char *nonce = get_header_param(d, "nonce");
1738 unsigned char *opaque = get_header_param(d, "opaque");
1740 mem_free_set(&proxy_auth.realm, realm);
1741 mem_free_set(&proxy_auth.nonce, nonce);
1742 mem_free_set(&proxy_auth.opaque, opaque);
1743 proxy_auth.digest = 1;
1745 mem_free(d);
1746 break;
1749 mem_free(d);
1750 d = parse_header(str, "Proxy-Authenticate", &str);
1754 kill_buffer_data(rb, a);
1755 http->close = 0;
1756 http->length = -1;
1757 http->recv_version = version;
1759 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1760 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1761 if (!c_strcasecmp(d, "close")) http->close = 1;
1762 mem_free(d);
1763 } else if (PRE_HTTP_1_1(version)) {
1764 http->close = 1;
1767 cf = conn->from;
1768 conn->from = 0;
1769 d = parse_header(conn->cached->head, "Content-Range", NULL);
1770 if (d) {
1771 if (strlen(d) > 6) {
1772 d[5] = 0;
1773 if (isdigit(d[6]) && !c_strcasecmp(d, "bytes")) {
1774 int f;
1776 errno = 0;
1777 f = strtol(d + 6, NULL, 10);
1779 if (!errno && f >= 0) conn->from = f;
1782 mem_free(d);
1784 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1785 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1786 /* We don't want this if conn->progress.start because then conn->from will
1787 * be probably value of conn->progress.start, while cf is 0. */
1788 abort_connection(conn, connection_state(S_HTTP_ERROR));
1789 return;
1792 #if 0
1794 struct status *s;
1795 foreach (s, conn->downloads) {
1796 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1797 conn, s, s->pri, s->state, s->prev_error,
1798 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1801 #endif
1803 if (conn->progress->start >= 0) {
1804 /* Update to the real value which we've got from Content-Range. */
1805 conn->progress->seek = conn->from;
1807 conn->progress->start = conn->from;
1809 d = parse_header(conn->cached->head, "Content-Length", NULL);
1810 if (d) {
1811 unsigned char *ep;
1812 int l;
1814 errno = 0;
1815 l = strtol(d, (char **) &ep, 10);
1817 if (!errno && !*ep && l >= 0) {
1818 if (!http->close || POST_HTTP_1_0(version))
1819 http->length = l;
1820 conn->est_length = conn->from + l;
1822 mem_free(d);
1825 if (!conn->unrestartable) {
1826 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1828 if (d) {
1829 if (!c_strcasecmp(d, "none"))
1830 conn->unrestartable = 1;
1831 mem_free(d);
1832 } else {
1833 if (!conn->from)
1834 conn->unrestartable = 1;
1838 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1839 if (d) {
1840 if (!c_strcasecmp(d, "chunked")) {
1841 http->length = LEN_CHUNKED;
1842 http->chunk_remaining = CHUNK_SIZE;
1844 mem_free(d);
1846 if (!http->close && http->length == -1) http->close = 1;
1848 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1849 if (d) {
1850 if (conn->cached->last_modified && c_strcasecmp(conn->cached->last_modified, d)) {
1851 delete_entry_content(conn->cached);
1852 if (conn->from) {
1853 conn->from = 0;
1854 mem_free(d);
1855 retry_connection(conn, connection_state(S_MODIFIED));
1856 return;
1859 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1860 else mem_free(d);
1862 if (!conn->cached->last_modified) {
1863 d = parse_header(conn->cached->head, "Date", NULL);
1864 if (d) conn->cached->last_modified = d;
1867 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1868 d = parse_header(conn->cached->head, "ETag", NULL);
1869 if (d) {
1870 if (conn->cached->etag) {
1871 unsigned char *old_tag = conn->cached->etag;
1872 unsigned char *new_tag = d;
1874 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1876 if (new_tag[0] == 'W' && new_tag[1] == '/')
1877 new_tag += 2;
1879 if (old_tag[0] == 'W' && old_tag[1] == '/')
1880 old_tag += 2;
1882 if (strcmp(new_tag, old_tag)) {
1883 delete_entry_content(conn->cached);
1884 if (conn->from) {
1885 conn->from = 0;
1886 mem_free(d);
1887 retry_connection(conn, connection_state(S_MODIFIED));
1888 return;
1893 if (!conn->cached->etag)
1894 conn->cached->etag = d;
1895 else
1896 mem_free(d);
1899 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1900 if (d) {
1901 unsigned char *extension = get_extension_from_uri(uri);
1902 enum stream_encoding file_encoding;
1904 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1905 mem_free_if(extension);
1907 /* If the content is encoded, we want to preserve the encoding
1908 * if it is implied by the extension, so that saving the URI
1909 * will leave the saved file with the correct encoding. */
1910 #ifdef CONFIG_GZIP
1911 if (file_encoding != ENCODING_GZIP
1912 && (!c_strcasecmp(d, "gzip") || !c_strcasecmp(d, "x-gzip")))
1913 conn->content_encoding = ENCODING_GZIP;
1914 if (!c_strcasecmp(d, "deflate") || !c_strcasecmp(d, "x-deflate"))
1915 conn->content_encoding = ENCODING_DEFLATE;
1916 #endif
1918 #ifdef CONFIG_BZIP2
1919 if (file_encoding != ENCODING_BZIP2
1920 && (!c_strcasecmp(d, "bzip2") || !c_strcasecmp(d, "x-bzip2")))
1921 conn->content_encoding = ENCODING_BZIP2;
1922 #endif
1924 #ifdef CONFIG_LZMA
1925 if (file_encoding != ENCODING_LZMA
1926 && (!c_strcasecmp(d, "lzma") || !c_strcasecmp(d, "x-lzma")))
1927 conn->content_encoding = ENCODING_LZMA;
1928 #endif
1929 mem_free(d);
1932 if (conn->content_encoding != ENCODING_NONE) {
1933 mem_free_if(conn->cached->encoding_info);
1934 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1937 if (http->length == -1 || http->close)
1938 socket->state = SOCKET_END_ONCLOSE;
1940 read_http_data(socket, rb);