http_negotiate: do not delegate GSSAPI credentials
[elinks.git] / src / protocol / http / http.c
blobd99a7f7ed852b0aa454e8c80e9aa5080c37e279a
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
50 #ifdef CONFIG_GSSAPI
51 #include "http_negotiate.h"
52 #endif
54 struct http_version {
55 int major;
56 int minor;
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info {
69 enum blacklist_flags bl_flags;
70 struct http_version recv_version;
71 struct http_version sent_version;
73 int close;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
77 int length;
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
82 #define CHUNK_SIZE -1
83 int chunk_remaining;
85 int code;
89 static struct auth_entry proxy_auth;
91 static unsigned char *accept_charset = NULL;
94 static union option_info http_options[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
96 "http", 0,
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
101 "bugs", 0,
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it "
107 "can trigger bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with "
116 "Netscape). This is a problem for a lot of web discussion "
117 "boards and the like. If they will do strange things to you, "
118 "try to play with this.")),
120 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
121 "post_no_keepalive", 0, 0,
122 N_("Disable keepalive connection after POST request.")),
124 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
125 "http10", 0, 0,
126 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
128 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
129 "proxy", 0,
130 N_("HTTP proxy configuration.")),
132 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
133 "host", 0, "",
134 N_("Host and port-number (host:port) of the HTTP proxy, "
135 "or blank. If it's blank, HTTP_PROXY environment variable "
136 "is checked as well.")),
138 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
139 "user", 0, "",
140 N_("Proxy authentication username.")),
142 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
143 "passwd", 0, "",
144 N_("Proxy authentication password.")),
147 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
148 "referer", 0,
149 N_("HTTP referer sending options. HTTP referer is a special "
150 "header sent in the HTTP requests, which is supposed to "
151 "contain the previous page visited by the browser."
152 "This way, the server can know what link did you follow "
153 "when accessing that page. However, this behaviour can "
154 "unfortunately considerably affect privacy and can lead even "
155 "to a security problem on some badly designed web pages.")),
157 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
158 "policy", 0,
159 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
160 N_("Mode of sending HTTP referer:\n"
161 "0 is send no referer\n"
162 "1 is send current URL as referer\n"
163 "2 is send fixed fake referer\n"
164 "3 is send previous URL as referer (correct, but insecure)")),
166 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
167 "fake", 0, "",
168 N_("Fake referer to be sent when policy is 2.")),
171 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
172 "accept_language", 0, "",
173 N_("Send Accept-Language header.")),
175 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
176 "accept_ui_language", 0, 1,
177 N_("Request localised versions of documents from web-servers "
178 "(using the Accept-Language header) using the language "
179 "you have configured for ELinks' user-interface (this also "
180 "affects navigator.language ECMAScript value available to "
181 "scripts). Note that some see this as a potential security "
182 "risk because it tells web-masters and the FBI sniffers "
183 "about your language preference.")),
185 /* http://www.eweek.com/c/a/Desktops-and-Notebooks/Intel-Psion-End-Dispute-Concerning-Netbook-Trademark-288875/
186 * responds with "Transfer-Encoding: chunked" and
187 * "Content-Encoding: gzip" but does not compress the first chunk
188 * and the last chunk, causing ELinks to display garbage.
189 * (If User-Agent includes "Gecko" (case sensitive), then
190 * that server correctly compresses the whole stream.)
191 * ELinks should instead report the decompression error (bug 1017)
192 * or perhaps even blacklist the server for compression and retry.
193 * Until that has been implemented, disable compression by default. */
194 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
195 "compression", 0, 0,
196 N_("If enabled, the capability to receive compressed content "
197 "(gzip and/or bzip2) is announced to the server, which "
198 "usually sends the reply compressed, thus saving some "
199 "bandwidth at slight CPU expense.\n"
200 "\n"
201 "If ELinks displays a incomplete page or garbage, try "
202 "disabling this option. If that helps, there may be a bug in "
203 "the decompression part of ELinks. Please report such bugs.\n"
204 "\n"
205 "If ELinks has been compiled without compression support, "
206 "this option has no effect. To check the supported features, "
207 "see Help -> About.")),
209 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
210 "trace", 0, 0,
211 N_("If active, all HTTP requests are sent with TRACE as "
212 "their method rather than GET or POST. This is useful for "
213 "debugging of both ELinks and various server-side scripts "
214 "--- the server only returns the client's request back to "
215 "the client verbatim. Note that this type of request may "
216 "not be enabled on all servers.")),
218 /* OSNews.com is supposed to be relying on the textmode token, at least. */
219 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
220 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
221 N_("Change the User Agent ID. That means identification "
222 "string, which is sent to HTTP server when a document is "
223 "requested. The 'textmode' token in the first field is our "
224 "silent attempt to establish this as a standard for new "
225 "textmode user agents, so that the webmasters can have "
226 "just a single uniform test for these if they are e.g. "
227 "pushing some lite version to them automagically.\n"
228 "\n"
229 "Use \" \" if you don't want any User-Agent header to be sent "
230 "at all.\n"
231 "\n"
232 "%v in the string means ELinks version,\n"
233 "%s in the string means system identification,\n"
234 "%t in the string means size of the terminal,\n"
235 "%b in the string means number of bars displayed by ELinks.")),
238 INIT_OPT_TREE("protocol", N_("HTTPS"),
239 "https", 0,
240 N_("HTTPS-specific options.")),
242 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
243 "proxy", 0,
244 N_("HTTPS proxy configuration.")),
246 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
247 "host", 0, "",
248 N_("Host and port-number (host:port) of the HTTPS CONNECT "
249 "proxy, or blank. If it's blank, HTTPS_PROXY environment "
250 "variable is checked as well.")),
251 NULL_OPTION_INFO,
254 static void done_http();
256 struct module http_protocol_module = struct_module(
257 /* name: */ N_("HTTP"),
258 /* options: */ http_options,
259 /* hooks: */ NULL,
260 /* submodules: */ NULL,
261 /* data: */ NULL,
262 /* init: */ NULL,
263 /* done: */ done_http
267 static void
268 done_http(void)
270 mem_free_if(proxy_auth.realm);
271 mem_free_if(proxy_auth.nonce);
272 mem_free_if(proxy_auth.opaque);
274 free_blacklist();
276 if (accept_charset)
277 mem_free(accept_charset);
280 static void
281 init_accept_charset(void)
283 struct string ac;
284 unsigned char *cs;
285 int i;
287 if (!init_string(&ac)) return;
289 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
290 if (ac.length) {
291 add_to_string(&ac, ", ");
292 } else {
293 add_to_string(&ac, "Accept-Charset: ");
295 add_to_string(&ac, cs);
298 if (ac.length) {
299 add_crlf_to_string(&ac);
302 accept_charset = squeezastring(&ac);
304 done_string(&ac);
308 unsigned char *
309 subst_user_agent(unsigned char *fmt, unsigned char *version,
310 unsigned char *sysname, unsigned char *termsize)
312 struct string agent;
314 if (!init_string(&agent)) return NULL;
316 while (*fmt) {
317 int p;
319 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
321 add_bytes_to_string(&agent, fmt, p);
322 fmt += p;
324 if (*fmt != '%') continue;
326 fmt++;
327 switch (*fmt) {
328 case 'b':
329 if (!list_empty(sessions)) {
330 unsigned char bs[4] = "";
331 int blen = 0;
332 struct session *ses = sessions.prev;
333 int bars = ses->status.show_status_bar
334 + ses->status.show_tabs_bar
335 + ses->status.show_title_bar;
337 ulongcat(bs, &blen, bars, 2, 0);
338 add_to_string(&agent, bs);
340 break;
341 case 'v':
342 add_to_string(&agent, version);
343 break;
344 case 's':
345 add_to_string(&agent, sysname);
346 break;
347 case 't':
348 if (termsize)
349 add_to_string(&agent, termsize);
350 break;
351 default:
352 add_bytes_to_string(&agent, fmt - 1, 2);
353 break;
355 if (*fmt) fmt++;
358 return agent.source;
361 static void
362 add_url_to_http_string(struct string *header, struct uri *uri, int components)
364 /* This block substitues spaces in URL by %20s. This is
365 * certainly not the right place where to do it, but now the
366 * behaviour is at least improved compared to what we had
367 * before. We should probably encode all URLs as early as
368 * possible, and possibly decode them back in protocol
369 * backends. --pasky */
370 unsigned char *string = get_uri_string(uri, components);
371 unsigned char *data = string;
373 if (!string) return;
375 while (*data) {
376 int len = strcspn(data, " \t\r\n\\");
378 add_bytes_to_string(header, data, len);
380 if (!data[len]) break;
382 if (data[len++] == '\\')
383 add_char_to_string(header, '/');
384 else
385 add_to_string(header, "%20");
387 data += len;
390 mem_free(string);
393 /* Parse from @end - 1 to @start and set *@value to integer found.
394 * It returns -1 if not a number, 0 otherwise.
395 * @end should be > @start. */
396 static int
397 revstr2num(unsigned char *start, unsigned char *end, int *value)
399 int q = 1, val = 0;
401 do {
402 --end;
403 if (!isdigit(*end)) return -1; /* NaN */
404 val += (*end - '0') * q;
405 q *= 10;
406 } while (end > start);
408 *value = val;
409 return 0;
412 /* This function extracts code, major and minor version from string
413 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
414 * It returns a negative value on error, 0 on success.
416 static int
417 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
419 unsigned char *head = rb->data;
420 unsigned char *start;
422 *code = 0;
423 version->major = 0;
424 version->minor = 0;
426 /* Ignore spaces. */
427 while (*head == ' ') head++;
429 /* HTTP/ */
430 if (c_toupper(*head) != 'H' || c_toupper(*++head) != 'T' ||
431 c_toupper(*++head) != 'T' || c_toupper(*++head) != 'P'
432 || *++head != '/')
433 return -1;
435 /* Version */
436 start = ++head;
437 /* Find next '.' */
438 while (*head && *head != '.') head++;
439 /* Sanity check. */
440 if (!*head || !(head - start)
441 || (head - start) > 4
442 || !isdigit(*(head + 1)))
443 return -2;
445 /* Extract major version number. */
446 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
448 start = head + 1;
450 /* Find next ' '. */
451 while (*head && *head != ' ') head++;
452 /* Sanity check. */
453 if (!*head || !(head - start) || (head - start) > 4) return -4;
455 /* Extract minor version number. */
456 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
458 /* Ignore spaces. */
459 while (*head == ' ') head++;
461 /* Sanity check for code. */
462 if (head[0] < '1' || head[0] > '9' ||
463 !isdigit(head[1]) ||
464 !isdigit(head[2]))
465 return -6; /* Invalid code. */
467 /* Extract code. */
468 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
470 return 0;
473 static int
474 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
475 unsigned char *head)
477 unsigned char *server;
478 const unsigned char *const *s;
479 static const unsigned char *const buggy_servers[] = {
480 "mod_czech/3.1.0",
481 "Purveyor",
482 "Netscape-Enterprise",
483 NULL
486 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
487 || HTTP_1_0(http->sent_version))
488 return 0;
490 server = parse_header(head, "Server", NULL);
491 if (!server)
492 return 0;
494 for (s = buggy_servers; *s; s++) {
495 if (strstr(server, *s)) {
496 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
497 break;
501 mem_free(server);
502 return (*s != NULL);
505 static void
506 http_end_request(struct connection *conn, struct connection_state state,
507 int notrunc)
509 shutdown_connection_stream(conn);
511 if (conn->info && !((struct http_connection_info *) conn->info)->close
512 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
513 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
514 || !conn->uri->post)) {
515 if (is_in_state(state, S_OK) && conn->cached)
516 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
517 set_connection_state(conn, state);
518 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
519 } else {
520 abort_connection(conn, state);
524 static void http_send_header(struct socket *);
526 void
527 http_protocol_handler(struct connection *conn)
529 /* setcstate(conn, S_CONN); */
531 if (!has_keepalive_connection(conn)) {
532 make_connection(conn->socket, conn->uri, http_send_header,
533 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
534 } else {
535 http_send_header(conn->socket);
539 void
540 proxy_protocol_handler(struct connection *conn)
542 http_protocol_handler(conn);
545 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
547 #define connection_is_https_proxy(conn) \
548 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
550 struct http_connection_info *
551 init_http_connection_info(struct connection *conn, int major, int minor, int close)
553 struct http_connection_info *http;
555 http = mem_calloc(1, sizeof(*http));
556 if (!http) {
557 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
558 return NULL;
561 http->sent_version.major = major;
562 http->sent_version.minor = minor;
563 http->close = close;
565 /* The CGI code uses this too and blacklisting expects a host name. */
566 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
567 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
569 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
570 || get_opt_bool("protocol.http.bugs.http10")) {
571 http->sent_version.major = 1;
572 http->sent_version.minor = 0;
575 /* If called from HTTPS proxy connection the connection info might have
576 * already been allocated. */
577 mem_free_set(&conn->info, http);
579 return http;
582 static void
583 accept_encoding_header(struct string *header)
585 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
586 int comma = 0;
588 add_to_string(header, "Accept-Encoding: ");
590 #ifdef CONFIG_BZIP2
591 add_to_string(header, "bzip2");
592 comma = 1;
593 #endif
595 #ifdef CONFIG_GZIP
596 if (comma) add_to_string(header, ", ");
597 add_to_string(header, "deflate, gzip");
598 comma = 1;
599 #endif
601 #ifdef CONFIG_LZMA
602 if (comma) add_to_string(header, ", ");
603 add_to_string(header, "lzma");
604 #endif
605 add_crlf_to_string(header);
606 #endif
609 static void
610 http_send_header(struct socket *socket)
612 struct connection *conn = socket->conn;
613 struct http_connection_info *http;
614 int trace = get_opt_bool("protocol.http.trace");
615 struct string header;
616 unsigned char *post_data = NULL;
617 struct auth_entry *entry = NULL;
618 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
619 unsigned char *optstr;
620 int use_connect, talking_to_proxy;
622 /* Sanity check for a host */
623 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
624 http_end_request(conn, connection_state(S_BAD_URL), 0);
625 return;
628 http = init_http_connection_info(conn, 1, 1, 0);
629 if (!http) return;
631 if (!init_string(&header)) {
632 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
633 return;
636 if (!conn->cached) conn->cached = find_in_cache(uri);
638 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
639 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
641 if (trace) {
642 add_to_string(&header, "TRACE ");
643 } else if (use_connect) {
644 add_to_string(&header, "CONNECT ");
645 /* In CONNECT requests, we send only a subset of the
646 * headers to the proxy. See the "CONNECT:" comments
647 * below. After the CONNECT request succeeds, we
648 * negotiate TLS with the real server and make a new
649 * HTTP request that includes all the headers. */
650 } else if (uri->post) {
651 add_to_string(&header, "POST ");
652 conn->unrestartable = 1;
653 } else {
654 add_to_string(&header, "GET ");
657 if (!talking_to_proxy) {
658 add_char_to_string(&header, '/');
661 if (use_connect) {
662 /* Add port if it was specified or the default port */
663 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
664 } else {
665 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
666 add_url_to_http_string(&header, uri, URI_DATA);
668 } else if (talking_to_proxy) {
669 add_url_to_http_string(&header, uri, URI_PROXY);
671 } else {
672 add_url_to_http_string(&header, conn->uri, URI_DATA);
676 add_to_string(&header, " HTTP/");
677 add_long_to_string(&header, http->sent_version.major);
678 add_char_to_string(&header, '.');
679 add_long_to_string(&header, http->sent_version.minor);
680 add_crlf_to_string(&header);
682 /* CONNECT: Sending a Host header seems pointless as the same
683 * information is already in the CONNECT line. It's harmless
684 * though and Mozilla does it too. */
685 add_to_string(&header, "Host: ");
686 add_uri_to_string(&header, uri, URI_HTTP_HOST);
687 add_crlf_to_string(&header);
689 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
690 if (talking_to_proxy) {
691 unsigned char *user = get_opt_str("protocol.http.proxy.user");
692 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
694 if (proxy_auth.digest) {
695 unsigned char *response;
696 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
697 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
699 if (userlen)
700 memcpy(proxy_auth.user, user, userlen);
701 proxy_auth.user[userlen] = '\0';
702 if (passwordlen)
703 memcpy(proxy_auth.password, passwd, passwordlen);
704 proxy_auth.password[passwordlen] = '\0';
706 /* FIXME: @uri is the proxied URI. Maybe the passed URI
707 * should be the proxy URI aka conn->uri. --jonas */
708 response = get_http_auth_digest_response(&proxy_auth, uri);
709 if (response) {
710 add_to_string(&header, "Proxy-Authorization: Digest ");
711 add_to_string(&header, response);
712 add_crlf_to_string(&header);
714 mem_free(response);
717 } else {
718 if (user[0]) {
719 unsigned char *proxy_data;
721 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
722 if (proxy_data) {
723 unsigned char *proxy_64 = base64_encode(proxy_data);
725 if (proxy_64) {
726 add_to_string(&header, "Proxy-Authorization: Basic ");
727 add_to_string(&header, proxy_64);
728 add_crlf_to_string(&header);
729 mem_free(proxy_64);
731 mem_free(proxy_data);
737 /* CONNECT: User-Agent does not reveal anything about the
738 * resource we're fetching, and it may help the proxy return
739 * better error messages. */
740 optstr = get_opt_str("protocol.http.user_agent");
741 if (*optstr && strcmp(optstr, " ")) {
742 unsigned char *ustr, ts[64] = "";
744 add_to_string(&header, "User-Agent: ");
746 if (!list_empty(terminals)) {
747 unsigned int tslen = 0;
748 struct terminal *term = terminals.prev;
750 ulongcat(ts, &tslen, term->width, 3, 0);
751 ts[tslen++] = 'x';
752 ulongcat(ts, &tslen, term->height, 3, 0);
754 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
755 ts);
757 if (ustr) {
758 add_to_string(&header, ustr);
759 mem_free(ustr);
762 add_crlf_to_string(&header);
765 /* CONNECT: Referer probably is a secret page in the HTTPS
766 * server, so don't reveal it to the proxy. */
767 if (!use_connect) {
768 switch (get_opt_int("protocol.http.referer.policy")) {
769 case REFERER_NONE:
770 /* oh well */
771 break;
773 case REFERER_FAKE:
774 optstr = get_opt_str("protocol.http.referer.fake");
775 if (!optstr[0]) break;
776 add_to_string(&header, "Referer: ");
777 add_to_string(&header, optstr);
778 add_crlf_to_string(&header);
779 break;
781 case REFERER_TRUE:
782 if (!conn->referrer) break;
783 add_to_string(&header, "Referer: ");
784 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
785 add_crlf_to_string(&header);
786 break;
788 case REFERER_SAME_URL:
789 add_to_string(&header, "Referer: ");
790 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
791 add_crlf_to_string(&header);
792 break;
796 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
797 * because they do not reveal anything about the resource
798 * we're going to request via TLS, and they may affect the
799 * error message if the CONNECT request fails.
801 * If ELinks is ever changed to vary its Accept headers based
802 * on what it intends to do with the returned resource, e.g.
803 * sending "Accept: text/css" when it wants an external
804 * stylesheet, then it should do that only in the inner GET
805 * and not in the outer CONNECT. */
806 add_to_string(&header, "Accept: */*");
807 add_crlf_to_string(&header);
809 if (get_opt_bool("protocol.http.compression"))
810 accept_encoding_header(&header);
812 if (!accept_charset) {
813 init_accept_charset();
816 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
817 && !get_opt_bool("protocol.http.bugs.accept_charset")
818 && accept_charset) {
819 add_to_string(&header, accept_charset);
822 optstr = get_opt_str("protocol.http.accept_language");
823 if (optstr[0]) {
824 add_to_string(&header, "Accept-Language: ");
825 add_to_string(&header, optstr);
826 add_crlf_to_string(&header);
828 #ifdef CONFIG_NLS
829 else if (get_opt_bool("protocol.http.accept_ui_language")) {
830 unsigned char *code = language_to_iso639(current_language);
832 if (code) {
833 add_to_string(&header, "Accept-Language: ");
834 add_to_string(&header, code);
835 add_crlf_to_string(&header);
838 #endif
840 /* CONNECT: Proxy-Connection is intended to be seen by the
841 * proxy. If the CONNECT request succeeds, then the proxy
842 * will forward the remainder of the TCP connection to the
843 * origin server, and Proxy-Connection does not matter; but
844 * if the request fails, then Proxy-Connection may matter. */
845 /* FIXME: What about post-HTTP/1.1?? --Zas */
846 if (HTTP_1_1(http->sent_version)) {
847 if (!IS_PROXY_URI(conn->uri)) {
848 add_to_string(&header, "Connection: ");
849 } else {
850 add_to_string(&header, "Proxy-Connection: ");
853 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
854 add_to_string(&header, "Keep-Alive");
855 } else {
856 add_to_string(&header, "close");
858 add_crlf_to_string(&header);
861 /* CONNECT: Do not tell the proxy anything we have cached
862 * about the resource. */
863 if (!use_connect && conn->cached) {
864 if (!conn->cached->incomplete && conn->cached->head
865 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
866 if (conn->cached->last_modified) {
867 add_to_string(&header, "If-Modified-Since: ");
868 add_to_string(&header, conn->cached->last_modified);
869 add_crlf_to_string(&header);
871 if (conn->cached->etag) {
872 add_to_string(&header, "If-None-Match: ");
873 add_to_string(&header, conn->cached->etag);
874 add_crlf_to_string(&header);
879 /* CONNECT: Let's send cache control headers to the proxy too;
880 * they may affect DNS caching. */
881 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
882 add_to_string(&header, "Pragma: no-cache");
883 add_crlf_to_string(&header);
884 add_to_string(&header, "Cache-Control: no-cache");
885 add_crlf_to_string(&header);
888 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
889 * do anything good with that information anyway. */
890 if (!use_connect && (conn->from || conn->progress->start > 0)) {
891 /* conn->from takes precedence. conn->progress.start is set only the first
892 * time, then conn->from gets updated and in case of any retries
893 * etc we have everything interesting in conn->from already. */
894 add_to_string(&header, "Range: bytes=");
895 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
896 add_char_to_string(&header, '-');
897 add_crlf_to_string(&header);
900 /* CONNECT: The Authorization header is for the origin server only. */
901 if (!use_connect) {
902 #ifdef CONFIG_GSSAPI
903 if (http_negotiate_output(uri, &header) != 0)
904 #endif
905 entry = find_auth(uri);
908 if (entry) {
909 if (entry->digest) {
910 unsigned char *response;
912 response = get_http_auth_digest_response(entry, uri);
913 if (response) {
914 add_to_string(&header, "Authorization: Digest ");
915 add_to_string(&header, response);
916 add_crlf_to_string(&header);
918 mem_free(response);
921 } else {
922 /* RFC2617 section 2 [Basic Authentication Scheme]
924 * To receive authorization, the client sends the userid
925 * and password, separated by a single colon (":")
926 * character, within a base64 [7] encoded string in the
927 * credentials. */
928 unsigned char *id;
930 /* Create base64 encoded string. */
931 id = straconcat(entry->user, ":", entry->password,
932 (unsigned char *) NULL);
933 if (id) {
934 unsigned char *base64 = base64_encode(id);
936 mem_free_set(&id, base64);
939 if (id) {
940 add_to_string(&header, "Authorization: Basic ");
941 add_to_string(&header, id);
942 add_crlf_to_string(&header);
943 mem_free(id);
948 /* CONNECT: Any POST data is for the origin server only. */
949 if (!use_connect && uri->post) {
950 /* We search for first '\n' in uri->post to get content type
951 * as set by get_form_uri(). This '\n' is dropped if any
952 * and replaced by correct '\r\n' termination here. */
953 unsigned char *postend = strchr(uri->post, '\n');
955 if (postend) {
956 add_to_string(&header, "Content-Type: ");
957 add_bytes_to_string(&header, uri->post, postend - uri->post);
958 add_crlf_to_string(&header);
961 post_data = postend ? postend + 1 : uri->post;
962 add_to_string(&header, "Content-Length: ");
963 add_long_to_string(&header, strlen(post_data) / 2);
964 add_crlf_to_string(&header);
967 #ifdef CONFIG_COOKIES
968 /* CONNECT: Cookies are for the origin server only. */
969 if (!use_connect) {
970 struct string *cookies = send_cookies(uri);
972 if (cookies) {
973 add_to_string(&header, "Cookie: ");
974 add_string_to_string(&header, cookies);
975 add_crlf_to_string(&header);
976 done_string(cookies);
979 #endif
981 add_crlf_to_string(&header);
983 /* CONNECT: Any POST data is for the origin server only.
984 * This was already checked above and post_data is NULL
985 * in that case. Verified with an assertion below. */
986 if (post_data) {
987 #define POST_BUFFER_SIZE 4096
988 unsigned char *post = post_data;
989 unsigned char buffer[POST_BUFFER_SIZE];
990 int n = 0;
992 assert(!use_connect); /* see comment above */
994 while (post[0] && post[1]) {
995 int h1, h2;
997 h1 = unhx(post[0]);
998 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
999 if_assert_failed h1 = 0;
1001 h2 = unhx(post[1]);
1002 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
1003 if_assert_failed h2 = 0;
1005 buffer[n++] = (h1<<4) + h2;
1006 post += 2;
1007 if (n == POST_BUFFER_SIZE) {
1008 add_bytes_to_string(&header, buffer, n);
1009 n = 0;
1013 if (n)
1014 add_bytes_to_string(&header, buffer, n);
1015 #undef POST_BUFFER_SIZE
1018 request_from_socket(socket, header.source, header.length,
1019 connection_state(S_SENT),
1020 SOCKET_END_ONCLOSE, http_got_header);
1021 done_string(&header);
1025 /* This function decompresses the data block given in @data (if it was
1026 * compressed), which is long @len bytes. The decompressed data block is given
1027 * back to the world as the return value and its length is stored into
1028 * @new_len. After this function returns, the caller will discard all the @len
1029 * input bytes, so this function must use all of them unless an error occurs.
1031 * In this function, value of either http->chunk_remaining or http->length is
1032 * being changed (it depends on if chunked mode is used or not).
1034 * Note that the function is still a little esotheric for me. Don't take it
1035 * lightly and don't mess with it without grave reason! If you dare to touch
1036 * this without testing the changes on slashdot, freshmeat and cvsweb
1037 * (including revision history), don't dare to send me any patches! ;) --pasky
1039 * This function gotta die. */
1040 static unsigned char *
1041 decompress_data(struct connection *conn, unsigned char *data, int len,
1042 int *new_len)
1044 struct http_connection_info *http = conn->info;
1045 enum { NORMAL, FINISHING } state = NORMAL;
1046 int did_read = 0;
1047 int *length_of_block;
1048 unsigned char *output = NULL;
1050 #define BIG_READ 655360
1052 if (http->length == LEN_CHUNKED) {
1053 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
1054 state = FINISHING;
1055 length_of_block = &http->chunk_remaining;
1056 } else {
1057 length_of_block = &http->length;
1058 if (!*length_of_block) {
1059 /* Going to finish this decoding bussiness. */
1060 state = FINISHING;
1064 if (conn->content_encoding == ENCODING_NONE) {
1065 *new_len = len;
1066 if (*length_of_block > 0) *length_of_block -= len;
1067 return data;
1070 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1072 if (conn->stream_pipes[0] == -1
1073 && (c_pipe(conn->stream_pipes) < 0
1074 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1075 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1076 return NULL;
1079 do {
1080 unsigned char *tmp;
1082 if (state == NORMAL) {
1083 /* ... we aren't finishing yet. */
1084 int written = safe_write(conn->stream_pipes[1], data, len);
1086 if (written >= 0) {
1087 data += written;
1088 len -= written;
1090 /* In non-keep-alive connections http->length == -1, so the test below */
1091 if (*length_of_block > 0)
1092 *length_of_block -= written;
1093 /* http->length is 0 at the end of block for all modes: keep-alive,
1094 * non-keep-alive and chunked */
1095 if (!http->length) {
1096 /* That's all, folks - let's finish this. */
1097 state = FINISHING;
1098 } else if (!len) {
1099 /* We've done for this round (but not done
1100 * completely). Thus we will get out with
1101 * what we have and leave what we wrote to
1102 * the next round - we have to do that since
1103 * we MUST NOT ever empty the pipe completely
1104 * - this would cause a disaster for
1105 * read_encoded(), which would simply not
1106 * work right then. */
1107 return output;
1112 if (!conn->stream) {
1113 conn->stream = open_encoded(conn->stream_pipes[0],
1114 conn->content_encoding);
1115 if (!conn->stream) return NULL;
1118 tmp = mem_realloc(output, *new_len + BIG_READ);
1119 if (!tmp) break;
1120 output = tmp;
1122 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1124 if (did_read > 0)
1125 *new_len += did_read;
1126 else if (did_read != READENC_EAGAIN) {
1127 state = FINISHING;
1128 break;
1130 } while (len || (did_read == BIG_READ));
1132 if (state == FINISHING) shutdown_connection_stream(conn);
1133 return output;
1134 #undef BIG_READ
1137 static int
1138 is_line_in_buffer(struct read_buffer *rb)
1140 int l;
1142 for (l = 0; l < rb->length; l++) {
1143 unsigned char a0 = rb->data[l];
1145 if (a0 == ASCII_LF)
1146 return l + 1;
1147 if (a0 == ASCII_CR) {
1148 if (rb->data[l + 1] == ASCII_LF
1149 && l < rb->length - 1)
1150 return l + 2;
1151 if (l == rb->length - 1)
1152 return 0;
1154 if (a0 < ' ')
1155 return -1;
1157 return 0;
1160 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1162 static void
1163 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1164 int already_got_anything)
1166 struct connection_state state = already_got_anything
1167 ? connection_state(S_TRANS) : conn->state;
1169 read_from_socket(conn->socket, rb, state, read_http_data);
1172 static void
1173 read_http_data_done(struct connection *conn)
1175 struct http_connection_info *http = conn->info;
1177 /* There's no content but an error so just print
1178 * that instead of nothing. */
1179 if (!conn->from) {
1180 if (http->code >= 400) {
1181 http_error_document(conn, http->code);
1183 } else {
1184 /* This is not an error, thus fine. No need generate any
1185 * document, as this may be empty and it's not a problem.
1186 * In case of 3xx, we're probably just getting kicked to
1187 * another page anyway. And in case of 2xx, the document
1188 * may indeed be empty and thus the user should see it so. */
1192 http_end_request(conn, connection_state(S_OK), 0);
1195 /* Returns:
1196 * -1 on error
1197 * 0 if more to read
1198 * 1 if done
1200 static int
1201 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1203 struct http_connection_info *http = conn->info;
1204 int total_data_len = 0;
1206 while (1) {
1207 /* Chunked. Good luck! */
1208 /* See RFC2616, section 3.6.1. Basically, it looks like:
1209 * 1234 ; a = b ; c = d\r\n
1210 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1211 * 0\r\n
1212 * \r\n */
1213 if (http->chunk_remaining == CHUNK_DATA_END) {
1214 int l = is_line_in_buffer(rb);
1216 if (l) {
1217 if (l == -1) {
1218 /* Invalid character in buffer. */
1219 return -1;
1222 /* Remove everything to the EOLN. */
1223 kill_buffer_data(rb, l);
1224 if (l <= 2) {
1225 /* Empty line. */
1226 return 2;
1228 continue;
1231 } else if (http->chunk_remaining == CHUNK_SIZE) {
1232 int l = is_line_in_buffer(rb);
1234 if (l) {
1235 unsigned char *de;
1236 int n = 0;
1238 if (l != -1) {
1239 errno = 0;
1240 n = strtol(rb->data, (char **) &de, 16);
1241 if (errno || !*de) {
1242 return -1;
1246 if (l == -1 || de == rb->data) {
1247 return -1;
1250 /* Remove everything to the EOLN. */
1251 kill_buffer_data(rb, l);
1252 http->chunk_remaining = n;
1253 if (!http->chunk_remaining)
1254 http->chunk_remaining = CHUNK_ZERO_SIZE;
1255 continue;
1258 } else {
1259 unsigned char *data;
1260 int data_len;
1261 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1262 int len = zero ? 0 : http->chunk_remaining;
1264 /* Maybe everything necessary didn't come yet.. */
1265 int_upper_bound(&len, rb->length);
1266 conn->received += len;
1268 data = decompress_data(conn, rb->data, len, &data_len);
1270 if (add_fragment(conn->cached, conn->from,
1271 data, data_len) == 1)
1272 conn->tries = 0;
1274 if (data && data != rb->data) mem_free(data);
1276 conn->from += data_len;
1277 total_data_len += data_len;
1279 kill_buffer_data(rb, len);
1281 if (zero) {
1282 /* Last chunk has zero length, so this is last
1283 * chunk, we finished decompression just now
1284 * and now we can happily finish reading this
1285 * stuff. */
1286 http->chunk_remaining = CHUNK_DATA_END;
1287 continue;
1290 if (!http->chunk_remaining && rb->length > 0) {
1291 /* Eat newline succeeding each chunk. */
1292 if (rb->data[0] == ASCII_LF) {
1293 kill_buffer_data(rb, 1);
1294 } else {
1295 if (rb->data[0] != ASCII_CR
1296 || (rb->length >= 2
1297 && rb->data[1] != ASCII_LF)) {
1298 return -1;
1300 if (rb->length < 2) break;
1301 kill_buffer_data(rb, 2);
1303 http->chunk_remaining = CHUNK_SIZE;
1304 continue;
1307 break;
1310 /* More to read. */
1311 return !!total_data_len;
1314 /* Returns 0 if more data, 1 if done. */
1315 static int
1316 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1318 struct http_connection_info *http = conn->info;
1319 unsigned char *data;
1320 int data_len;
1321 int len = rb->length;
1323 if (http->length >= 0 && http->length < len) {
1324 /* We won't read more than we have to go. */
1325 len = http->length;
1328 conn->received += len;
1330 data = decompress_data(conn, rb->data, len, &data_len);
1332 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1333 conn->tries = 0;
1335 if (data && data != rb->data) mem_free(data);
1337 conn->from += data_len;
1339 kill_buffer_data(rb, len);
1341 if (!http->length && (conn->socket->state == SOCKET_RETRY_ONCLOSE
1342 || conn->socket->state == SOCKET_CLOSED)) {
1343 return 2;
1346 return !!data_len;
1349 static void
1350 read_http_data(struct socket *socket, struct read_buffer *rb)
1352 struct connection *conn = socket->conn;
1353 struct http_connection_info *http = conn->info;
1354 int ret;
1356 if (socket->state == SOCKET_CLOSED) {
1357 if (conn->content_encoding) {
1358 /* Flush decompression first. */
1359 http->length = 0;
1360 } else {
1361 read_http_data_done(conn);
1362 return;
1366 if (http->length != LEN_CHUNKED) {
1367 ret = read_normal_http_data(conn, rb);
1369 } else {
1370 ret = read_chunked_http_data(conn, rb);
1373 switch (ret) {
1374 case 0:
1375 read_more_http_data(conn, rb, 0);
1376 break;
1377 case 1:
1378 read_more_http_data(conn, rb, 1);
1379 break;
1380 case 2:
1381 read_http_data_done(conn);
1382 break;
1383 default:
1384 assertm(ret == -1, "Unexpected return value: %d", ret);
1385 abort_connection(conn, connection_state(S_HTTP_ERROR));
1389 /* Returns offset of the header end, zero if more data is needed, -1 when
1390 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1391 * come. */
1392 static int
1393 get_header(struct read_buffer *rb)
1395 int i;
1397 /* XXX: We will have to do some guess about whether an HTTP header is
1398 * coming or not, in order to support HTTP/0.9 reply correctly. This
1399 * means a little code duplication with get_http_code(). --pasky */
1400 if (rb->length > 4 && c_strncasecmp(rb->data, "HTTP/", 5))
1401 return -2;
1403 for (i = 0; i < rb->length; i++) {
1404 unsigned char a0 = rb->data[i];
1405 unsigned char a1 = rb->data[i + 1];
1407 if (a0 == 0) {
1408 rb->data[i] = ' ';
1409 continue;
1411 if (a0 == ASCII_LF && a1 == ASCII_LF
1412 && i < rb->length - 1)
1413 return i + 2;
1414 if (a0 == ASCII_CR && i < rb->length - 3) {
1415 if (a1 == ASCII_CR) continue;
1416 if (a1 != ASCII_LF) return -1;
1417 if (rb->data[i + 2] == ASCII_CR) {
1418 if (rb->data[i + 3] != ASCII_LF) return -1;
1419 return i + 4;
1424 return 0;
1427 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1428 static int
1429 check_http_authentication(struct connection *conn, struct uri *uri,
1430 unsigned char *header, unsigned char *header_field)
1432 unsigned char *str, *d;
1433 int ret = 0;
1435 d = parse_header(header, header_field, &str);
1436 while (d) {
1437 if (!c_strncasecmp(d, "Basic", 5)) {
1438 unsigned char *realm = get_header_param(d, "realm");
1440 if (realm) {
1441 add_auth_entry(uri, realm, NULL, NULL, 0);
1442 mem_free(realm);
1443 mem_free(d);
1444 break;
1446 } else if (!c_strncasecmp(d, "Digest", 6)) {
1447 unsigned char *realm = get_header_param(d, "realm");
1448 unsigned char *nonce = get_header_param(d, "nonce");
1449 unsigned char *opaque = get_header_param(d, "opaque");
1451 add_auth_entry(uri, realm, nonce, opaque, 1);
1453 mem_free_if(realm);
1454 mem_free_if(nonce);
1455 mem_free_if(opaque);
1456 mem_free(d);
1457 break;
1459 #ifdef CONFIG_GSSAPI
1460 else if (!c_strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1461 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1462 ret = 1;
1463 mem_free(d);
1464 break;
1466 else if (!c_strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1467 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1468 ret = 1;
1469 mem_free(d);
1470 break;
1472 #endif
1473 mem_free(d);
1474 d = parse_header(str, header_field, &str);
1476 return ret;
1480 void
1481 http_got_header(struct socket *socket, struct read_buffer *rb)
1483 struct connection *conn = socket->conn;
1484 struct http_connection_info *http = conn->info;
1485 unsigned char *head;
1486 #ifdef CONFIG_COOKIES
1487 unsigned char *cookie, *ch;
1488 #endif
1489 unsigned char *d;
1490 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1491 struct http_version version = { 0, 9 };
1492 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1493 ? connection_state(S_GETH)
1494 : connection_state(S_PROC));
1495 int a, h = 200;
1496 int cf;
1498 if (socket->state == SOCKET_CLOSED) {
1499 if (!conn->tries && uri->host) {
1500 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1501 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1502 } else {
1503 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1504 conn->tries = -1;
1507 retry_connection(conn, connection_state(S_CANT_READ));
1508 return;
1510 socket->state = SOCKET_RETRY_ONCLOSE;
1512 again:
1513 a = get_header(rb);
1514 if (a == -1) {
1515 abort_connection(conn, connection_state(S_HTTP_ERROR));
1516 return;
1518 if (!a) {
1519 read_from_socket(conn->socket, rb, state, http_got_header);
1520 return;
1522 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1523 * the get_http_code call; @h and @version have already been
1524 * initialized with the right values. */
1525 if (a == -2) a = 0;
1526 if ((a && get_http_code(rb, &h, &version))
1527 || h == 101) {
1528 abort_connection(conn, connection_state(S_HTTP_ERROR));
1529 return;
1532 /* When no header, HTTP/0.9 document. That's always text/html,
1533 * according to
1534 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1535 /* FIXME: This usage of fake protocol headers for setting up the
1536 * content type has been obsoleted by the @content_type member of
1537 * {struct cache_entry}. */
1538 head = (a ? memacpy(rb->data, a)
1539 : stracpy("\r\nContent-Type: text/html\r\n"));
1540 if (!head) {
1541 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1542 return;
1545 if (check_http_server_bugs(uri, http, head)) {
1546 mem_free(head);
1547 retry_connection(conn, connection_state(S_RESTART));
1548 return;
1551 #ifdef CONFIG_CGI
1552 if (uri->protocol == PROTOCOL_FILE) {
1553 /* ``Status'' is not a standard HTTP header field although some
1554 * HTTP servers like www.php.net uses it for some reason. It should
1555 * only be used for CGI scripts so that it does not interfere
1556 * with status code depended handling for ``normal'' HTTP like
1557 * redirects. */
1558 d = parse_header(head, "Status", NULL);
1559 if (d) {
1560 int h2 = atoi(d);
1562 mem_free(d);
1563 if (h2 >= 100 && h2 < 600) h = h2;
1564 if (h == 101) {
1565 mem_free(head);
1566 abort_connection(conn, connection_state(S_HTTP_ERROR));
1567 return;
1571 #endif
1573 #ifdef CONFIG_COOKIES
1574 ch = head;
1575 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1576 set_cookie(uri, cookie);
1577 mem_free(cookie);
1579 #endif
1580 http->code = h;
1582 if (h == 100) {
1583 mem_free(head);
1584 state = connection_state(S_PROC);
1585 kill_buffer_data(rb, a);
1586 goto again;
1588 if (h < 200) {
1589 mem_free(head);
1590 abort_connection(conn, connection_state(S_HTTP_ERROR));
1591 return;
1593 if (h == 304) {
1594 mem_free(head);
1595 http_end_request(conn, connection_state(S_OK), 1);
1596 return;
1598 if (h == 204) {
1599 mem_free(head);
1600 http_end_request(conn, connection_state(S_HTTP_204), 0);
1601 return;
1603 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1604 #ifdef CONFIG_SSL
1605 mem_free(head);
1606 socket->need_ssl = 1;
1607 complete_connect_socket(socket, uri, http_send_header);
1608 #else
1609 abort_connection(conn, connection_state(S_SSL_ERROR));
1610 #endif
1611 return;
1614 conn->cached = get_cache_entry(conn->uri);
1615 if (!conn->cached) {
1616 mem_free(head);
1617 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1618 return;
1620 conn->cached->cgi = conn->cgi;
1621 mem_free_set(&conn->cached->head, head);
1623 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1624 struct cache_entry *cached = conn->cached;
1626 /* I am not entirely sure in what order we should process these
1627 * headers and if we should still process Cache-Control max-age
1628 * if we already set max age to date mentioned in Expires.
1629 * --jonas */
1630 /* Ensure that when ever cached->max_age is set, cached->expired
1631 * is also set, so the cache management knows max_age contains a
1632 * valid time. If on the other hand no caching is requested
1633 * cached->expire should be set to zero. */
1634 if ((d = parse_header(cached->head, "Expires", NULL))) {
1635 /* Convert date to seconds. */
1636 time_t expires = parse_date(&d, NULL, 0, 1);
1638 mem_free(d);
1640 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1641 timeval_from_seconds(&cached->max_age, expires);
1642 cached->expire = 1;
1646 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1647 if (strstr(d, "no-cache")) {
1648 cached->cache_mode = CACHE_MODE_NEVER;
1649 cached->expire = 0;
1651 mem_free(d);
1654 if (cached->cache_mode != CACHE_MODE_NEVER
1655 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1656 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1657 cached->cache_mode = CACHE_MODE_NEVER;
1658 cached->expire = 0;
1660 } else {
1661 unsigned char *pos = strstr(d, "max-age=");
1663 assert(cached->cache_mode != CACHE_MODE_NEVER);
1665 if (pos) {
1666 /* Grab the number of seconds. */
1667 timeval_T max_age;
1669 timeval_from_seconds(&max_age, atol(pos + 8));
1670 timeval_now(&cached->max_age);
1671 timeval_add_interval(&cached->max_age, &max_age);
1673 cached->expire = 1;
1677 mem_free(d);
1681 /* XXX: Is there some reason why NOT to follow the Location header
1682 * for any status? If the server didn't mean it, it wouldn't send
1683 * it, after all...? --pasky */
1684 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1685 d = parse_header(conn->cached->head, "Location", NULL);
1686 if (d) {
1687 int use_get_method = (h == 303);
1689 /* A note from RFC 2616 section 10.3.3:
1690 * RFC 1945 and RFC 2068 specify that the client is not
1691 * allowed to change the method on the redirected
1692 * request. However, most existing user agent
1693 * implementations treat 302 as if it were a 303
1694 * response, performing a GET on the Location
1695 * field-value regardless of the original request
1696 * method. */
1697 /* So POST must not be redirected to GET, but some
1698 * BUGGY message boards rely on it :-( */
1699 if (h == 302
1700 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1701 use_get_method = 1;
1703 redirect_cache(conn->cached, d, use_get_method, -1);
1704 mem_free(d);
1708 if (h == 401) {
1709 if (check_http_authentication(conn, uri,
1710 conn->cached->head, "WWW-Authenticate")) {
1711 retry_connection(conn, connection_state(S_RESTART));
1712 return;
1716 if (h == 407) {
1717 unsigned char *str;
1719 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1720 while (d) {
1721 if (!c_strncasecmp(d, "Basic", 5)) {
1722 unsigned char *realm = get_header_param(d, "realm");
1724 if (realm) {
1725 mem_free_set(&proxy_auth.realm, realm);
1726 proxy_auth.digest = 0;
1727 mem_free(d);
1728 break;
1731 } else if (!c_strncasecmp(d, "Digest", 6)) {
1732 unsigned char *realm = get_header_param(d, "realm");
1733 unsigned char *nonce = get_header_param(d, "nonce");
1734 unsigned char *opaque = get_header_param(d, "opaque");
1736 mem_free_set(&proxy_auth.realm, realm);
1737 mem_free_set(&proxy_auth.nonce, nonce);
1738 mem_free_set(&proxy_auth.opaque, opaque);
1739 proxy_auth.digest = 1;
1741 mem_free(d);
1742 break;
1745 mem_free(d);
1746 d = parse_header(str, "Proxy-Authenticate", &str);
1750 kill_buffer_data(rb, a);
1751 http->close = 0;
1752 http->length = -1;
1753 http->recv_version = version;
1755 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1756 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1757 if (!c_strcasecmp(d, "close")) http->close = 1;
1758 mem_free(d);
1759 } else if (PRE_HTTP_1_1(version)) {
1760 http->close = 1;
1763 cf = conn->from;
1764 conn->from = 0;
1765 d = parse_header(conn->cached->head, "Content-Range", NULL);
1766 if (d) {
1767 if (strlen(d) > 6) {
1768 d[5] = 0;
1769 if (isdigit(d[6]) && !c_strcasecmp(d, "bytes")) {
1770 int f;
1772 errno = 0;
1773 f = strtol(d + 6, NULL, 10);
1775 if (!errno && f >= 0) conn->from = f;
1778 mem_free(d);
1780 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1781 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1782 /* We don't want this if conn->progress.start because then conn->from will
1783 * be probably value of conn->progress.start, while cf is 0. */
1784 abort_connection(conn, connection_state(S_HTTP_ERROR));
1785 return;
1788 #if 0
1790 struct status *s;
1791 foreach (s, conn->downloads) {
1792 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1793 conn, s, s->pri, s->state, s->prev_error,
1794 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1797 #endif
1799 if (conn->progress->start >= 0) {
1800 /* Update to the real value which we've got from Content-Range. */
1801 conn->progress->seek = conn->from;
1803 conn->progress->start = conn->from;
1805 d = parse_header(conn->cached->head, "Content-Length", NULL);
1806 if (d) {
1807 unsigned char *ep;
1808 int l;
1810 errno = 0;
1811 l = strtol(d, (char **) &ep, 10);
1813 if (!errno && !*ep && l >= 0) {
1814 if (!http->close || POST_HTTP_1_0(version))
1815 http->length = l;
1816 conn->est_length = conn->from + l;
1818 mem_free(d);
1821 if (!conn->unrestartable) {
1822 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1824 if (d) {
1825 if (!c_strcasecmp(d, "none"))
1826 conn->unrestartable = 1;
1827 mem_free(d);
1828 } else {
1829 if (!conn->from)
1830 conn->unrestartable = 1;
1834 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1835 if (d) {
1836 if (!c_strcasecmp(d, "chunked")) {
1837 http->length = LEN_CHUNKED;
1838 http->chunk_remaining = CHUNK_SIZE;
1840 mem_free(d);
1842 if (!http->close && http->length == -1) http->close = 1;
1844 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1845 if (d) {
1846 if (conn->cached->last_modified && c_strcasecmp(conn->cached->last_modified, d)) {
1847 delete_entry_content(conn->cached);
1848 if (conn->from) {
1849 conn->from = 0;
1850 mem_free(d);
1851 retry_connection(conn, connection_state(S_MODIFIED));
1852 return;
1855 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1856 else mem_free(d);
1858 if (!conn->cached->last_modified) {
1859 d = parse_header(conn->cached->head, "Date", NULL);
1860 if (d) conn->cached->last_modified = d;
1863 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1864 d = parse_header(conn->cached->head, "ETag", NULL);
1865 if (d) {
1866 if (conn->cached->etag) {
1867 unsigned char *old_tag = conn->cached->etag;
1868 unsigned char *new_tag = d;
1870 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1872 if (new_tag[0] == 'W' && new_tag[1] == '/')
1873 new_tag += 2;
1875 if (old_tag[0] == 'W' && old_tag[1] == '/')
1876 old_tag += 2;
1878 if (strcmp(new_tag, old_tag)) {
1879 delete_entry_content(conn->cached);
1880 if (conn->from) {
1881 conn->from = 0;
1882 mem_free(d);
1883 retry_connection(conn, connection_state(S_MODIFIED));
1884 return;
1889 if (!conn->cached->etag)
1890 conn->cached->etag = d;
1891 else
1892 mem_free(d);
1895 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1896 if (d) {
1897 unsigned char *extension = get_extension_from_uri(uri);
1898 enum stream_encoding file_encoding;
1900 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1901 mem_free_if(extension);
1903 /* If the content is encoded, we want to preserve the encoding
1904 * if it is implied by the extension, so that saving the URI
1905 * will leave the saved file with the correct encoding. */
1906 #ifdef CONFIG_GZIP
1907 if (file_encoding != ENCODING_GZIP
1908 && (!c_strcasecmp(d, "gzip") || !c_strcasecmp(d, "x-gzip")))
1909 conn->content_encoding = ENCODING_GZIP;
1910 if (!c_strcasecmp(d, "deflate") || !c_strcasecmp(d, "x-deflate"))
1911 conn->content_encoding = ENCODING_DEFLATE;
1912 #endif
1914 #ifdef CONFIG_BZIP2
1915 if (file_encoding != ENCODING_BZIP2
1916 && (!c_strcasecmp(d, "bzip2") || !c_strcasecmp(d, "x-bzip2")))
1917 conn->content_encoding = ENCODING_BZIP2;
1918 #endif
1920 #ifdef CONFIG_LZMA
1921 if (file_encoding != ENCODING_LZMA
1922 && (!c_strcasecmp(d, "lzma") || !c_strcasecmp(d, "x-lzma")))
1923 conn->content_encoding = ENCODING_LZMA;
1924 #endif
1925 mem_free(d);
1928 if (conn->content_encoding != ENCODING_NONE) {
1929 mem_free_if(conn->cached->encoding_info);
1930 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1933 if (http->length == -1 || http->close)
1934 socket->state = SOCKET_END_ONCLOSE;
1936 read_http_data(socket, rb);