Bug 1013: Don't assume errno is between 0 and 100000
[elinks.git] / src / protocol / http / http.c
blob544b3a939d37fff2b3c99ffc5d6cd1066ee36531
1 /* Internal "http" protocol implementation */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <errno.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifdef HAVE_UNISTD_H
12 #include <unistd.h>
13 #endif
14 #ifdef HAVE_FCNTL_H
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
16 #endif
17 #ifdef HAVE_LIMITS_H
18 #include <limits.h>
19 #endif
21 #include "elinks.h"
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
50 #ifdef CONFIG_GSSAPI
51 #include "http_negotiate.h"
52 #endif
54 struct http_version {
55 int major;
56 int minor;
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info {
69 enum blacklist_flags bl_flags;
70 struct http_version recv_version;
71 struct http_version sent_version;
73 int close;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
77 int length;
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
82 #define CHUNK_SIZE -1
83 int chunk_remaining;
85 int code;
89 static struct auth_entry proxy_auth;
91 static unsigned char *accept_charset = NULL;
94 static struct option_info http_options[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
96 "http", 0,
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
101 "bugs", 0,
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it can trigger\n"
107 "bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
116 "This is a problem for a lot of web discussion boards and the like.\n"
117 "If they will do strange things to you, try to play with this.")),
119 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
120 "post_no_keepalive", 0, 0,
121 N_("Disable keepalive connection after POST request.")),
123 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
124 "http10", 0, 0,
125 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
127 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
128 "proxy", 0,
129 N_("HTTP proxy configuration.")),
131 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
132 "host", 0, "",
133 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
134 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
136 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
137 "user", 0, "",
138 N_("Proxy authentication username.")),
140 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
141 "passwd", 0, "",
142 N_("Proxy authentication password.")),
145 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
146 "referer", 0,
147 N_("HTTP referer sending options. HTTP referer is a special header\n"
148 "sent in the HTTP requests, which is supposed to contain the previous\n"
149 "page visited by the browser. This way, the server can know what link\n"
150 "did you follow when accessing that page. However, this behaviour\n"
151 "can unfortunately considerably affect privacy and can lead even to a\n"
152 "security problem on some badly designed web pages.")),
154 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
155 "policy", 0,
156 REFERER_NONE, REFERER_TRUE, REFERER_TRUE,
157 N_("Mode of sending HTTP referer:\n"
158 "0 is send no referer\n"
159 "1 is send current URL as referer\n"
160 "2 is send fixed fake referer\n"
161 "3 is send previous URL as referer (correct, but insecure)")),
163 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
164 "fake", 0, "",
165 N_("Fake referer to be sent when policy is 2.")),
168 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
169 "accept_language", 0, "",
170 N_("Send Accept-Language header.")),
172 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
173 "accept_ui_language", 0, 1,
174 N_("Request localised versions of documents from web-servers (using the\n"
175 "Accept-Language header) using the language you have configured for\n"
176 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
177 "value available to scripts). Note that some see this as a potential\n"
178 "security risk because it tells web-masters and the FBI sniffers about\n"
179 "your language preference.")),
181 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
182 "trace", 0, 0,
183 N_("If active, all HTTP requests are sent with TRACE as their method\n"
184 "rather than GET or POST. This is useful for debugging of both ELinks\n"
185 "and various server-side scripts --- the server only returns the client's\n"
186 "request back to the client verbatim. Note that this type of request may\n"
187 "not be enabled on all servers.")),
189 /* OSNews.com is supposed to be relying on the textmode token, at least. */
190 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
191 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
192 N_("Change the User Agent ID. That means identification string, which\n"
193 "is sent to HTTP server when a document is requested. The 'textmode'\n"
194 "token in the first field is our silent attempt to establish this as\n"
195 "a standard for new textmode user agents, so that the webmasters can\n"
196 "have just a single uniform test for these if they are e.g. pushing\n"
197 "some lite version to them automagically.\n"
198 "Use \" \" if you don't want any User-Agent header to be sent at all.\n"
199 "%v in the string means ELinks version,\n"
200 "%s in the string means system identification,\n"
201 "%t in the string means size of the terminal,\n"
202 "%b in the string means number of bars displayed by ELinks.")),
205 INIT_OPT_TREE("protocol", N_("HTTPS"),
206 "https", 0,
207 N_("HTTPS-specific options.")),
209 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
210 "proxy", 0,
211 N_("HTTPS proxy configuration.")),
213 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
214 "host", 0, "",
215 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
216 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
217 NULL_OPTION_INFO,
220 static void done_http();
222 struct module http_protocol_module = struct_module(
223 /* name: */ N_("HTTP"),
224 /* options: */ http_options,
225 /* hooks: */ NULL,
226 /* submodules: */ NULL,
227 /* data: */ NULL,
228 /* init: */ NULL,
229 /* done: */ done_http
233 static void
234 done_http(void)
236 mem_free_if(proxy_auth.realm);
237 mem_free_if(proxy_auth.nonce);
238 mem_free_if(proxy_auth.opaque);
240 free_blacklist();
242 if (accept_charset)
243 mem_free(accept_charset);
246 static void
247 init_accept_charset(void)
249 struct string ac;
250 unsigned char *cs;
251 int i;
253 if (!init_string(&ac)) return;
255 for (i = 0; (cs = get_cp_mime_name(i)); i++) {
256 if (ac.length) {
257 add_to_string(&ac, ", ");
258 } else {
259 add_to_string(&ac, "Accept-Charset: ");
261 add_to_string(&ac, cs);
264 if (ac.length) {
265 add_crlf_to_string(&ac);
268 accept_charset = squeezastring(&ac);
270 done_string(&ac);
274 unsigned char *
275 subst_user_agent(unsigned char *fmt, unsigned char *version,
276 unsigned char *sysname, unsigned char *termsize)
278 struct string agent;
280 if (!init_string(&agent)) return NULL;
282 while (*fmt) {
283 int p;
285 for (p = 0; fmt[p] && fmt[p] != '%'; p++);
287 add_bytes_to_string(&agent, fmt, p);
288 fmt += p;
290 if (*fmt != '%') continue;
292 fmt++;
293 switch (*fmt) {
294 case 'b':
295 if (!list_empty(sessions)) {
296 unsigned char bs[4] = "";
297 int blen = 0;
298 struct session *ses = sessions.prev;
299 int bars = ses->status.show_status_bar
300 + ses->status.show_tabs_bar
301 + ses->status.show_title_bar;
303 ulongcat(bs, &blen, bars, 2, 0);
304 add_to_string(&agent, bs);
306 break;
307 case 'v':
308 add_to_string(&agent, version);
309 break;
310 case 's':
311 add_to_string(&agent, sysname);
312 break;
313 case 't':
314 if (termsize)
315 add_to_string(&agent, termsize);
316 break;
317 default:
318 add_bytes_to_string(&agent, fmt - 1, 2);
319 break;
321 if (*fmt) fmt++;
324 return agent.source;
327 static void
328 add_url_to_http_string(struct string *header, struct uri *uri, int components)
330 /* This block substitues spaces in URL by %20s. This is
331 * certainly not the right place where to do it, but now the
332 * behaviour is at least improved compared to what we had
333 * before. We should probably encode all URLs as early as
334 * possible, and possibly decode them back in protocol
335 * backends. --pasky */
336 unsigned char *string = get_uri_string(uri, components);
337 unsigned char *data = string;
339 if (!string) return;
341 while (*data) {
342 int len = strcspn(data, " \t\r\n\\");
344 add_bytes_to_string(header, data, len);
346 if (!data[len]) break;
348 if (data[len++] == '\\')
349 add_char_to_string(header, '/');
350 else
351 add_to_string(header, "%20");
353 data += len;
356 mem_free(string);
359 /* Parse from @end - 1 to @start and set *@value to integer found.
360 * It returns -1 if not a number, 0 otherwise.
361 * @end should be > @start. */
362 static int
363 revstr2num(unsigned char *start, unsigned char *end, int *value)
365 int q = 1, val = 0;
367 do {
368 --end;
369 if (!isdigit(*end)) return -1; /* NaN */
370 val += (*end - '0') * q;
371 q *= 10;
372 } while (end > start);
374 *value = val;
375 return 0;
378 /* This function extracts code, major and minor version from string
379 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
380 * It returns a negative value on error, 0 on success.
382 static int
383 get_http_code(struct read_buffer *rb, int *code, struct http_version *version)
385 unsigned char *head = rb->data;
386 unsigned char *start;
388 *code = 0;
389 version->major = 0;
390 version->minor = 0;
392 /* Ignore spaces. */
393 while (*head == ' ') head++;
395 /* HTTP/ */
396 if (toupper(*head) != 'H' || toupper(*++head) != 'T' ||
397 toupper(*++head) != 'T' || toupper(*++head) != 'P'
398 || *++head != '/')
399 return -1;
401 /* Version */
402 start = ++head;
403 /* Find next '.' */
404 while (*head && *head != '.') head++;
405 /* Sanity check. */
406 if (!*head || !(head - start)
407 || (head - start) > 4
408 || !isdigit(*(head + 1)))
409 return -2;
411 /* Extract major version number. */
412 if (revstr2num(start, head, &version->major)) return -3; /* NaN */
414 start = head + 1;
416 /* Find next ' '. */
417 while (*head && *head != ' ') head++;
418 /* Sanity check. */
419 if (!*head || !(head - start) || (head - start) > 4) return -4;
421 /* Extract minor version number. */
422 if (revstr2num(start, head, &version->minor)) return -5; /* NaN */
424 /* Ignore spaces. */
425 while (*head == ' ') head++;
427 /* Sanity check for code. */
428 if (head[0] < '1' || head[0] > '9' ||
429 !isdigit(head[1]) ||
430 !isdigit(head[2]))
431 return -6; /* Invalid code. */
433 /* Extract code. */
434 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] - '0';
436 return 0;
439 static int
440 check_http_server_bugs(struct uri *uri, struct http_connection_info *http,
441 unsigned char *head)
443 unsigned char *server;
444 const unsigned char *const *s;
445 static const unsigned char *const buggy_servers[] = {
446 "mod_czech/3.1.0",
447 "Purveyor",
448 "Netscape-Enterprise",
449 NULL
452 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
453 || HTTP_1_0(http->sent_version))
454 return 0;
456 server = parse_header(head, "Server", NULL);
457 if (!server)
458 return 0;
460 for (s = buggy_servers; *s; s++) {
461 if (strstr(server, *s)) {
462 add_blacklist_entry(uri, SERVER_BLACKLIST_HTTP10);
463 break;
467 mem_free(server);
468 return (*s != NULL);
471 static void
472 http_end_request(struct connection *conn, struct connection_state state,
473 int notrunc)
475 shutdown_connection_stream(conn);
477 if (conn->info && !((struct http_connection_info *) conn->info)->close
478 && (!conn->socket->ssl) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
480 || !conn->uri->post)) {
481 if (is_in_state(state, S_OK) && conn->cached)
482 normalize_cache_entry(conn->cached, !notrunc ? conn->from : -1);
483 set_connection_state(conn, state);
484 add_keepalive_connection(conn, HTTP_KEEPALIVE_TIMEOUT, NULL);
485 } else {
486 abort_connection(conn, state);
490 static void http_send_header(struct socket *);
492 void
493 http_protocol_handler(struct connection *conn)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn)) {
498 make_connection(conn->socket, conn->uri, http_send_header,
499 conn->cache_mode >= CACHE_MODE_FORCE_RELOAD);
500 } else {
501 http_send_header(conn->socket);
505 void
506 proxy_protocol_handler(struct connection *conn)
508 http_protocol_handler(conn);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 struct http_connection_info *
517 init_http_connection_info(struct connection *conn, int major, int minor, int close)
519 struct http_connection_info *http;
521 http = mem_calloc(1, sizeof(*http));
522 if (!http) {
523 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
524 return NULL;
527 http->sent_version.major = major;
528 http->sent_version.minor = minor;
529 http->close = close;
531 /* The CGI code uses this too and blacklisting expects a host name. */
532 if (conn->proxied_uri->protocol != PROTOCOL_FILE)
533 http->bl_flags = get_blacklist_flags(conn->proxied_uri);
535 if (http->bl_flags & SERVER_BLACKLIST_HTTP10
536 || get_opt_bool("protocol.http.bugs.http10")) {
537 http->sent_version.major = 1;
538 http->sent_version.minor = 0;
541 /* If called from HTTPS proxy connection the connection info might have
542 * already been allocated. */
543 mem_free_set(&conn->info, http);
545 return http;
548 static void
549 accept_encoding_header(struct string *header)
551 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
552 int comma = 0;
554 add_to_string(header, "Accept-Encoding: ");
556 #ifdef CONFIG_BZIP2
557 add_to_string(header, "bzip2");
558 comma = 1;
559 #endif
561 #ifdef CONFIG_GZIP
562 if (comma) add_to_string(header, ", ");
563 add_to_string(header, "deflate, gzip");
564 comma = 1;
565 #endif
567 #ifdef CONFIG_LZMA
568 if (comma) add_to_string(header, ", ");
569 add_to_string(header, "lzma");
570 #endif
571 add_crlf_to_string(header);
572 #endif
575 static void
576 http_send_header(struct socket *socket)
578 struct connection *conn = socket->conn;
579 struct http_connection_info *http;
580 int trace = get_opt_bool("protocol.http.trace");
581 struct string header;
582 unsigned char *post_data = NULL;
583 struct auth_entry *entry = NULL;
584 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
585 unsigned char *optstr;
586 int use_connect, talking_to_proxy;
588 /* Sanity check for a host */
589 if (!uri || !uri->host || !*uri->host || !uri->hostlen) {
590 http_end_request(conn, connection_state(S_BAD_URL), 0);
591 return;
594 http = init_http_connection_info(conn, 1, 1, 0);
595 if (!http) return;
597 if (!init_string(&header)) {
598 http_end_request(conn, connection_state(S_OUT_OF_MEM), 0);
599 return;
602 if (!conn->cached) conn->cached = find_in_cache(uri);
604 talking_to_proxy = IS_PROXY_URI(conn->uri) && !conn->socket->ssl;
605 use_connect = connection_is_https_proxy(conn) && !conn->socket->ssl;
607 if (trace) {
608 add_to_string(&header, "TRACE ");
609 } else if (use_connect) {
610 add_to_string(&header, "CONNECT ");
611 /* In CONNECT requests, we send only a subset of the
612 * headers to the proxy. See the "CONNECT:" comments
613 * below. After the CONNECT request succeeds, we
614 * negotiate TLS with the real server and make a new
615 * HTTP request that includes all the headers. */
616 } else if (uri->post) {
617 add_to_string(&header, "POST ");
618 conn->unrestartable = 1;
619 } else {
620 add_to_string(&header, "GET ");
623 if (!talking_to_proxy) {
624 add_char_to_string(&header, '/');
627 if (use_connect) {
628 /* Add port if it was specified or the default port */
629 add_uri_to_string(&header, uri, URI_HTTP_CONNECT);
630 } else {
631 if (connection_is_https_proxy(conn) && conn->socket->ssl) {
632 add_url_to_http_string(&header, uri, URI_DATA);
634 } else if (talking_to_proxy) {
635 add_url_to_http_string(&header, uri, URI_PROXY);
637 } else {
638 add_url_to_http_string(&header, conn->uri, URI_DATA);
642 add_to_string(&header, " HTTP/");
643 add_long_to_string(&header, http->sent_version.major);
644 add_char_to_string(&header, '.');
645 add_long_to_string(&header, http->sent_version.minor);
646 add_crlf_to_string(&header);
648 /* CONNECT: Sending a Host header seems pointless as the same
649 * information is already in the CONNECT line. It's harmless
650 * though and Mozilla does it too. */
651 add_to_string(&header, "Host: ");
652 add_uri_to_string(&header, uri, URI_HTTP_HOST);
653 add_crlf_to_string(&header);
655 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
656 if (talking_to_proxy) {
657 unsigned char *user = get_opt_str("protocol.http.proxy.user");
658 unsigned char *passwd = get_opt_str("protocol.http.proxy.passwd");
660 if (proxy_auth.digest) {
661 unsigned char *response;
662 int userlen = int_min(strlen(user), AUTH_USER_MAXLEN - 1);
663 int passwordlen = int_min(strlen(passwd), AUTH_PASSWORD_MAXLEN - 1);
665 if (userlen)
666 memcpy(proxy_auth.user, user, userlen);
667 proxy_auth.user[userlen] = '\0';
668 if (passwordlen)
669 memcpy(proxy_auth.password, passwd, passwordlen);
670 proxy_auth.password[passwordlen] = '\0';
672 /* FIXME: @uri is the proxied URI. Maybe the passed URI
673 * should be the proxy URI aka conn->uri. --jonas */
674 response = get_http_auth_digest_response(&proxy_auth, uri);
675 if (response) {
676 add_to_string(&header, "Proxy-Authorization: Digest ");
677 add_to_string(&header, response);
678 add_crlf_to_string(&header);
680 mem_free(response);
683 } else {
684 if (user[0]) {
685 unsigned char *proxy_data;
687 proxy_data = straconcat(user, ":", passwd, (unsigned char *) NULL);
688 if (proxy_data) {
689 unsigned char *proxy_64 = base64_encode(proxy_data);
691 if (proxy_64) {
692 add_to_string(&header, "Proxy-Authorization: Basic ");
693 add_to_string(&header, proxy_64);
694 add_crlf_to_string(&header);
695 mem_free(proxy_64);
697 mem_free(proxy_data);
703 /* CONNECT: User-Agent does not reveal anything about the
704 * resource we're fetching, and it may help the proxy return
705 * better error messages. */
706 optstr = get_opt_str("protocol.http.user_agent");
707 if (*optstr && strcmp(optstr, " ")) {
708 unsigned char *ustr, ts[64] = "";
710 add_to_string(&header, "User-Agent: ");
712 if (!list_empty(terminals)) {
713 unsigned int tslen = 0;
714 struct terminal *term = terminals.prev;
716 ulongcat(ts, &tslen, term->width, 3, 0);
717 ts[tslen++] = 'x';
718 ulongcat(ts, &tslen, term->height, 3, 0);
720 ustr = subst_user_agent(optstr, VERSION_STRING, system_name,
721 ts);
723 if (ustr) {
724 add_to_string(&header, ustr);
725 mem_free(ustr);
728 add_crlf_to_string(&header);
731 /* CONNECT: Referer probably is a secret page in the HTTPS
732 * server, so don't reveal it to the proxy. */
733 if (!use_connect) {
734 switch (get_opt_int("protocol.http.referer.policy")) {
735 case REFERER_NONE:
736 /* oh well */
737 break;
739 case REFERER_FAKE:
740 optstr = get_opt_str("protocol.http.referer.fake");
741 if (!optstr[0]) break;
742 add_to_string(&header, "Referer: ");
743 add_to_string(&header, optstr);
744 add_crlf_to_string(&header);
745 break;
747 case REFERER_TRUE:
748 if (!conn->referrer) break;
749 add_to_string(&header, "Referer: ");
750 add_url_to_http_string(&header, conn->referrer, URI_HTTP_REFERRER);
751 add_crlf_to_string(&header);
752 break;
754 case REFERER_SAME_URL:
755 add_to_string(&header, "Referer: ");
756 add_url_to_http_string(&header, uri, URI_HTTP_REFERRER);
757 add_crlf_to_string(&header);
758 break;
762 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
763 * because they do not reveal anything about the resource
764 * we're going to request via TLS, and they may affect the
765 * error message if the CONNECT request fails.
767 * If ELinks is ever changed to vary its Accept headers based
768 * on what it intends to do with the returned resource, e.g.
769 * sending "Accept: text/css" when it wants an external
770 * stylesheet, then it should do that only in the inner GET
771 * and not in the outer CONNECT. */
772 add_to_string(&header, "Accept: */*");
773 add_crlf_to_string(&header);
775 accept_encoding_header(&header);
777 if (!accept_charset) {
778 init_accept_charset();
781 if (!(http->bl_flags & SERVER_BLACKLIST_NO_CHARSET)
782 && !get_opt_bool("protocol.http.bugs.accept_charset")
783 && accept_charset) {
784 add_to_string(&header, accept_charset);
787 optstr = get_opt_str("protocol.http.accept_language");
788 if (optstr[0]) {
789 add_to_string(&header, "Accept-Language: ");
790 add_to_string(&header, optstr);
791 add_crlf_to_string(&header);
793 #ifdef CONFIG_NLS
794 else if (get_opt_bool("protocol.http.accept_ui_language")) {
795 unsigned char *code = language_to_iso639(current_language);
797 if (code) {
798 add_to_string(&header, "Accept-Language: ");
799 add_to_string(&header, code);
800 add_crlf_to_string(&header);
803 #endif
805 /* CONNECT: Proxy-Connection is intended to be seen by the
806 * proxy. If the CONNECT request succeeds, then the proxy
807 * will forward the remainder of the TCP connection to the
808 * origin server, and Proxy-Connection does not matter; but
809 * if the request fails, then Proxy-Connection may matter. */
810 /* FIXME: What about post-HTTP/1.1?? --Zas */
811 if (HTTP_1_1(http->sent_version)) {
812 if (!IS_PROXY_URI(conn->uri)) {
813 add_to_string(&header, "Connection: ");
814 } else {
815 add_to_string(&header, "Proxy-Connection: ");
818 if (!uri->post || !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
819 add_to_string(&header, "Keep-Alive");
820 } else {
821 add_to_string(&header, "close");
823 add_crlf_to_string(&header);
826 /* CONNECT: Do not tell the proxy anything we have cached
827 * about the resource. */
828 if (!use_connect && conn->cached) {
829 if (!conn->cached->incomplete && conn->cached->head
830 && conn->cache_mode <= CACHE_MODE_CHECK_IF_MODIFIED) {
831 if (conn->cached->last_modified) {
832 add_to_string(&header, "If-Modified-Since: ");
833 add_to_string(&header, conn->cached->last_modified);
834 add_crlf_to_string(&header);
836 if (conn->cached->etag) {
837 add_to_string(&header, "If-None-Match: ");
838 add_to_string(&header, conn->cached->etag);
839 add_crlf_to_string(&header);
844 /* CONNECT: Let's send cache control headers to the proxy too;
845 * they may affect DNS caching. */
846 if (conn->cache_mode >= CACHE_MODE_FORCE_RELOAD) {
847 add_to_string(&header, "Pragma: no-cache");
848 add_crlf_to_string(&header);
849 add_to_string(&header, "Cache-Control: no-cache");
850 add_crlf_to_string(&header);
853 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
854 * do anything good with that information anyway. */
855 if (!use_connect && (conn->from || conn->progress->start > 0)) {
856 /* conn->from takes precedence. conn->progress.start is set only the first
857 * time, then conn->from gets updated and in case of any retries
858 * etc we have everything interesting in conn->from already. */
859 add_to_string(&header, "Range: bytes=");
860 add_long_to_string(&header, conn->from ? conn->from : conn->progress->start);
861 add_char_to_string(&header, '-');
862 add_crlf_to_string(&header);
865 /* CONNECT: The Authorization header is for the origin server only. */
866 if (!use_connect) {
867 #ifdef CONFIG_GSSAPI
868 if (http_negotiate_output(uri, &header) != 0)
869 #endif
870 entry = find_auth(uri);
873 if (entry) {
874 if (entry->digest) {
875 unsigned char *response;
877 response = get_http_auth_digest_response(entry, uri);
878 if (response) {
879 add_to_string(&header, "Authorization: Digest ");
880 add_to_string(&header, response);
881 add_crlf_to_string(&header);
883 mem_free(response);
886 } else {
887 /* RFC2617 section 2 [Basic Authentication Scheme]
889 * To receive authorization, the client sends the userid
890 * and password, separated by a single colon (":")
891 * character, within a base64 [7] encoded string in the
892 * credentials. */
893 unsigned char *id;
895 /* Create base64 encoded string. */
896 id = straconcat(entry->user, ":", entry->password,
897 (unsigned char *) NULL);
898 if (id) {
899 unsigned char *base64 = base64_encode(id);
901 mem_free_set(&id, base64);
904 if (id) {
905 add_to_string(&header, "Authorization: Basic ");
906 add_to_string(&header, id);
907 add_crlf_to_string(&header);
908 mem_free(id);
913 /* CONNECT: Any POST data is for the origin server only. */
914 if (!use_connect && uri->post) {
915 /* We search for first '\n' in uri->post to get content type
916 * as set by get_form_uri(). This '\n' is dropped if any
917 * and replaced by correct '\r\n' termination here. */
918 unsigned char *postend = strchr(uri->post, '\n');
920 if (postend) {
921 add_to_string(&header, "Content-Type: ");
922 add_bytes_to_string(&header, uri->post, postend - uri->post);
923 add_crlf_to_string(&header);
926 post_data = postend ? postend + 1 : uri->post;
927 add_to_string(&header, "Content-Length: ");
928 add_long_to_string(&header, strlen(post_data) / 2);
929 add_crlf_to_string(&header);
932 #ifdef CONFIG_COOKIES
933 /* CONNECT: Cookies are for the origin server only. */
934 if (!use_connect) {
935 struct string *cookies = send_cookies(uri);
937 if (cookies) {
938 add_to_string(&header, "Cookie: ");
939 add_string_to_string(&header, cookies);
940 add_crlf_to_string(&header);
941 done_string(cookies);
944 #endif
946 add_crlf_to_string(&header);
948 /* CONNECT: Any POST data is for the origin server only.
949 * This was already checked above and post_data is NULL
950 * in that case. Verified with an assertion below. */
951 if (post_data) {
952 #define POST_BUFFER_SIZE 4096
953 unsigned char *post = post_data;
954 unsigned char buffer[POST_BUFFER_SIZE];
955 int n = 0;
957 assert(!use_connect); /* see comment above */
959 while (post[0] && post[1]) {
960 int h1, h2;
962 h1 = unhx(post[0]);
963 assertm(h1 >= 0 && h1 < 16, "h1 in the POST buffer is %d (%d/%c)", h1, post[0], post[0]);
964 if_assert_failed h1 = 0;
966 h2 = unhx(post[1]);
967 assertm(h2 >= 0 && h2 < 16, "h2 in the POST buffer is %d (%d/%c)", h2, post[1], post[1]);
968 if_assert_failed h2 = 0;
970 buffer[n++] = (h1<<4) + h2;
971 post += 2;
972 if (n == POST_BUFFER_SIZE) {
973 add_bytes_to_string(&header, buffer, n);
974 n = 0;
978 if (n)
979 add_bytes_to_string(&header, buffer, n);
980 #undef POST_BUFFER_SIZE
983 request_from_socket(socket, header.source, header.length,
984 connection_state(S_SENT),
985 SOCKET_END_ONCLOSE, http_got_header);
986 done_string(&header);
990 /* This function decompresses the data block given in @data (if it was
991 * compressed), which is long @len bytes. The decompressed data block is given
992 * back to the world as the return value and its length is stored into
993 * @new_len. After this function returns, the caller will discard all the @len
994 * input bytes, so this function must use all of them unless an error occurs.
996 * In this function, value of either http->chunk_remaining or http->length is
997 * being changed (it depends on if chunked mode is used or not).
999 * Note that the function is still a little esotheric for me. Don't take it
1000 * lightly and don't mess with it without grave reason! If you dare to touch
1001 * this without testing the changes on slashdot, freshmeat and cvsweb
1002 * (including revision history), don't dare to send me any patches! ;) --pasky
1004 * This function gotta die. */
1005 static unsigned char *
1006 decompress_data(struct connection *conn, unsigned char *data, int len,
1007 int *new_len)
1009 struct http_connection_info *http = conn->info;
1010 enum { NORMAL, FINISHING } state = NORMAL;
1011 int did_read = 0;
1012 int *length_of_block;
1013 unsigned char *output = NULL;
1015 #define BIG_READ 65536
1017 if (http->length == LEN_CHUNKED) {
1018 if (http->chunk_remaining == CHUNK_ZERO_SIZE)
1019 state = FINISHING;
1020 length_of_block = &http->chunk_remaining;
1021 } else {
1022 length_of_block = &http->length;
1023 if (!*length_of_block) {
1024 /* Going to finish this decoding bussiness. */
1025 state = FINISHING;
1029 if (conn->content_encoding == ENCODING_NONE) {
1030 *new_len = len;
1031 if (*length_of_block > 0) *length_of_block -= len;
1032 return data;
1035 *new_len = 0; /* new_len must be zero if we would ever return NULL */
1037 if (conn->stream_pipes[0] == -1
1038 && (c_pipe(conn->stream_pipes) < 0
1039 || set_nonblocking_fd(conn->stream_pipes[0]) < 0
1040 || set_nonblocking_fd(conn->stream_pipes[1]) < 0)) {
1041 return NULL;
1044 do {
1045 unsigned char *tmp;
1047 if (state == NORMAL) {
1048 /* ... we aren't finishing yet. */
1049 int written = safe_write(conn->stream_pipes[1], data, len);
1051 if (written >= 0) {
1052 data += written;
1053 len -= written;
1055 /* In non-keep-alive connections http->length == -1, so the test below */
1056 if (*length_of_block > 0)
1057 *length_of_block -= written;
1058 /* http->length is 0 at the end of block for all modes: keep-alive,
1059 * non-keep-alive and chunked */
1060 if (!http->length) {
1061 /* That's all, folks - let's finish this. */
1062 state = FINISHING;
1063 } else if (!len) {
1064 /* We've done for this round (but not done
1065 * completely). Thus we will get out with
1066 * what we have and leave what we wrote to
1067 * the next round - we have to do that since
1068 * we MUST NOT ever empty the pipe completely
1069 * - this would cause a disaster for
1070 * read_encoded(), which would simply not
1071 * work right then. */
1072 return output;
1077 if (!conn->stream) {
1078 conn->stream = open_encoded(conn->stream_pipes[0],
1079 conn->content_encoding);
1080 if (!conn->stream) return NULL;
1083 tmp = mem_realloc(output, *new_len + BIG_READ);
1084 if (!tmp) break;
1085 output = tmp;
1087 did_read = read_encoded(conn->stream, output + *new_len, BIG_READ);
1089 /* Do not break from the loop if did_read == 0. It
1090 * means no decoded data is available yet, but some may
1091 * become available later. This happens especially with
1092 * the bzip2 decoder, which needs an entire compressed
1093 * block as input before it generates any output. */
1094 if (did_read < 0) {
1095 state = FINISHING;
1096 break;
1098 *new_len += did_read;
1099 } while (len || (did_read == BIG_READ));
1101 if (state == FINISHING) shutdown_connection_stream(conn);
1102 return output;
1105 static int
1106 is_line_in_buffer(struct read_buffer *rb)
1108 int l;
1110 for (l = 0; l < rb->length; l++) {
1111 unsigned char a0 = rb->data[l];
1113 if (a0 == ASCII_LF)
1114 return l + 1;
1115 if (a0 == ASCII_CR) {
1116 if (rb->data[l + 1] == ASCII_LF
1117 && l < rb->length - 1)
1118 return l + 2;
1119 if (l == rb->length - 1)
1120 return 0;
1122 if (a0 < ' ')
1123 return -1;
1125 return 0;
1128 static void read_http_data(struct socket *socket, struct read_buffer *rb);
1130 static void
1131 read_more_http_data(struct connection *conn, struct read_buffer *rb,
1132 int already_got_anything)
1134 struct connection_state state = already_got_anything
1135 ? connection_state(S_TRANS) : conn->state;
1137 read_from_socket(conn->socket, rb, state, read_http_data);
1140 static void
1141 read_http_data_done(struct connection *conn)
1143 struct http_connection_info *http = conn->info;
1145 /* There's no content but an error so just print
1146 * that instead of nothing. */
1147 if (!conn->from) {
1148 if (http->code >= 400) {
1149 http_error_document(conn, http->code);
1151 } else {
1152 /* This is not an error, thus fine. No need generate any
1153 * document, as this may be empty and it's not a problem.
1154 * In case of 3xx, we're probably just getting kicked to
1155 * another page anyway. And in case of 2xx, the document
1156 * may indeed be empty and thus the user should see it so. */
1160 http_end_request(conn, connection_state(S_OK), 0);
1163 /* Returns:
1164 * -1 on error
1165 * 0 if more to read
1166 * 1 if done
1168 static int
1169 read_chunked_http_data(struct connection *conn, struct read_buffer *rb)
1171 struct http_connection_info *http = conn->info;
1172 int total_data_len = 0;
1174 while (1) {
1175 /* Chunked. Good luck! */
1176 /* See RFC2616, section 3.6.1. Basically, it looks like:
1177 * 1234 ; a = b ; c = d\r\n
1178 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1179 * 0\r\n
1180 * \r\n */
1181 if (http->chunk_remaining == CHUNK_DATA_END) {
1182 int l = is_line_in_buffer(rb);
1184 if (l) {
1185 if (l == -1) {
1186 /* Invalid character in buffer. */
1187 return -1;
1190 /* Remove everything to the EOLN. */
1191 kill_buffer_data(rb, l);
1192 if (l <= 2) {
1193 /* Empty line. */
1194 return 2;
1196 continue;
1199 } else if (http->chunk_remaining == CHUNK_SIZE) {
1200 int l = is_line_in_buffer(rb);
1202 if (l) {
1203 unsigned char *de;
1204 int n = 0;
1206 if (l != -1) {
1207 errno = 0;
1208 n = strtol(rb->data, (char **) &de, 16);
1209 if (errno || !*de) {
1210 return -1;
1214 if (l == -1 || de == rb->data) {
1215 return -1;
1218 /* Remove everything to the EOLN. */
1219 kill_buffer_data(rb, l);
1220 http->chunk_remaining = n;
1221 if (!http->chunk_remaining)
1222 http->chunk_remaining = CHUNK_ZERO_SIZE;
1223 continue;
1226 } else {
1227 unsigned char *data;
1228 int data_len;
1229 int zero = (http->chunk_remaining == CHUNK_ZERO_SIZE);
1230 int len = zero ? 0 : http->chunk_remaining;
1232 /* Maybe everything necessary didn't come yet.. */
1233 int_upper_bound(&len, rb->length);
1234 conn->received += len;
1236 data = decompress_data(conn, rb->data, len, &data_len);
1238 if (add_fragment(conn->cached, conn->from,
1239 data, data_len) == 1)
1240 conn->tries = 0;
1242 if (data && data != rb->data) mem_free(data);
1244 conn->from += data_len;
1245 total_data_len += data_len;
1247 kill_buffer_data(rb, len);
1249 if (zero) {
1250 /* Last chunk has zero length, so this is last
1251 * chunk, we finished decompression just now
1252 * and now we can happily finish reading this
1253 * stuff. */
1254 http->chunk_remaining = CHUNK_DATA_END;
1255 continue;
1258 if (!http->chunk_remaining && rb->length > 0) {
1259 /* Eat newline succeeding each chunk. */
1260 if (rb->data[0] == ASCII_LF) {
1261 kill_buffer_data(rb, 1);
1262 } else {
1263 if (rb->data[0] != ASCII_CR
1264 || (rb->length >= 2
1265 && rb->data[1] != ASCII_LF)) {
1266 return -1;
1268 if (rb->length < 2) break;
1269 kill_buffer_data(rb, 2);
1271 http->chunk_remaining = CHUNK_SIZE;
1272 continue;
1275 break;
1278 /* More to read. */
1279 return !!total_data_len;
1282 /* Returns 0 if more data, 1 if done. */
1283 static int
1284 read_normal_http_data(struct connection *conn, struct read_buffer *rb)
1286 struct http_connection_info *http = conn->info;
1287 unsigned char *data;
1288 int data_len;
1289 int len = rb->length;
1291 if (http->length >= 0 && http->length < len) {
1292 /* We won't read more than we have to go. */
1293 len = http->length;
1296 conn->received += len;
1298 data = decompress_data(conn, rb->data, len, &data_len);
1300 if (add_fragment(conn->cached, conn->from, data, data_len) == 1)
1301 conn->tries = 0;
1303 if (data && data != rb->data) mem_free(data);
1305 conn->from += data_len;
1307 kill_buffer_data(rb, len);
1309 if (!http->length && conn->socket->state == SOCKET_RETRY_ONCLOSE) {
1310 return 2;
1313 return !!data_len;
1316 static void
1317 read_http_data(struct socket *socket, struct read_buffer *rb)
1319 struct connection *conn = socket->conn;
1320 struct http_connection_info *http = conn->info;
1321 int ret;
1323 if (socket->state == SOCKET_CLOSED) {
1324 if (conn->content_encoding && http->length == -1) {
1325 /* Flush decompression first. */
1326 http->length = 0;
1327 } else {
1328 read_http_data_done(conn);
1329 return;
1333 if (http->length != LEN_CHUNKED) {
1334 ret = read_normal_http_data(conn, rb);
1336 } else {
1337 ret = read_chunked_http_data(conn, rb);
1340 switch (ret) {
1341 case 0:
1342 read_more_http_data(conn, rb, 0);
1343 break;
1344 case 1:
1345 read_more_http_data(conn, rb, 1);
1346 break;
1347 case 2:
1348 read_http_data_done(conn);
1349 break;
1350 default:
1351 assertm(ret == -1, "Unexpected return value: %d", ret);
1352 abort_connection(conn, connection_state(S_HTTP_ERROR));
1356 /* Returns offset of the header end, zero if more data is needed, -1 when
1357 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1358 * come. */
1359 static int
1360 get_header(struct read_buffer *rb)
1362 int i;
1364 /* XXX: We will have to do some guess about whether an HTTP header is
1365 * coming or not, in order to support HTTP/0.9 reply correctly. This
1366 * means a little code duplication with get_http_code(). --pasky */
1367 if (rb->length > 4 && strncasecmp(rb->data, "HTTP/", 5))
1368 return -2;
1370 for (i = 0; i < rb->length; i++) {
1371 unsigned char a0 = rb->data[i];
1372 unsigned char a1 = rb->data[i + 1];
1374 if (a0 == 0) {
1375 rb->data[i] = ' ';
1376 continue;
1378 if (a0 == ASCII_LF && a1 == ASCII_LF
1379 && i < rb->length - 1)
1380 return i + 2;
1381 if (a0 == ASCII_CR && i < rb->length - 3) {
1382 if (a1 == ASCII_CR) continue;
1383 if (a1 != ASCII_LF) return -1;
1384 if (rb->data[i + 2] == ASCII_CR) {
1385 if (rb->data[i + 3] != ASCII_LF) return -1;
1386 return i + 4;
1391 return 0;
1394 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1395 static int
1396 check_http_authentication(struct connection *conn, struct uri *uri,
1397 unsigned char *header, unsigned char *header_field)
1399 unsigned char *str, *d;
1400 int ret = 0;
1402 d = parse_header(header, header_field, &str);
1403 while (d) {
1404 if (!strncasecmp(d, "Basic", 5)) {
1405 unsigned char *realm = get_header_param(d, "realm");
1407 if (realm) {
1408 add_auth_entry(uri, realm, NULL, NULL, 0);
1409 mem_free(realm);
1410 mem_free(d);
1411 break;
1413 } else if (!strncasecmp(d, "Digest", 6)) {
1414 unsigned char *realm = get_header_param(d, "realm");
1415 unsigned char *nonce = get_header_param(d, "nonce");
1416 unsigned char *opaque = get_header_param(d, "opaque");
1418 add_auth_entry(uri, realm, nonce, opaque, 1);
1420 mem_free_if(realm);
1421 mem_free_if(nonce);
1422 mem_free_if(opaque);
1423 mem_free(d);
1424 break;
1426 #ifdef CONFIG_GSSAPI
1427 else if (!strncasecmp(d, HTTPNEG_GSS_STR, HTTPNEG_GSS_STRLEN)) {
1428 if (http_negotiate_input(conn, uri, HTTPNEG_GSS, str)==0)
1429 ret = 1;
1430 mem_free(d);
1431 break;
1433 else if (!strncasecmp(d, HTTPNEG_NEG_STR, HTTPNEG_NEG_STRLEN)) {
1434 if (http_negotiate_input(conn, uri, HTTPNEG_NEG, str)==0)
1435 ret = 1;
1436 mem_free(d);
1437 break;
1439 #endif
1440 mem_free(d);
1441 d = parse_header(str, header_field, &str);
1443 return ret;
1447 void
1448 http_got_header(struct socket *socket, struct read_buffer *rb)
1450 struct connection *conn = socket->conn;
1451 struct http_connection_info *http = conn->info;
1452 unsigned char *head;
1453 #ifdef CONFIG_COOKIES
1454 unsigned char *cookie, *ch;
1455 #endif
1456 unsigned char *d;
1457 struct uri *uri = conn->proxied_uri; /* Set to the real uri */
1458 struct http_version version = { 0, 9 };
1459 struct connection_state state = (!is_in_state(conn->state, S_PROC)
1460 ? connection_state(S_GETH)
1461 : connection_state(S_PROC));
1462 int a, h = 200;
1463 int cf;
1465 if (socket->state == SOCKET_CLOSED) {
1466 if (!conn->tries && uri->host) {
1467 if (http->bl_flags & SERVER_BLACKLIST_NO_CHARSET) {
1468 del_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1469 } else {
1470 add_blacklist_entry(uri, SERVER_BLACKLIST_NO_CHARSET);
1471 conn->tries = -1;
1474 retry_connection(conn, connection_state(S_CANT_READ));
1475 return;
1477 socket->state = SOCKET_RETRY_ONCLOSE;
1479 again:
1480 a = get_header(rb);
1481 if (a == -1) {
1482 abort_connection(conn, connection_state(S_HTTP_ERROR));
1483 return;
1485 if (!a) {
1486 read_from_socket(conn->socket, rb, state, http_got_header);
1487 return;
1489 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1490 * the get_http_code call; @h and @version have already been
1491 * initialized with the right values. */
1492 if (a == -2) a = 0;
1493 if ((a && get_http_code(rb, &h, &version))
1494 || h == 101) {
1495 abort_connection(conn, connection_state(S_HTTP_ERROR));
1496 return;
1499 /* When no header, HTTP/0.9 document. That's always text/html,
1500 * according to
1501 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1502 /* FIXME: This usage of fake protocol headers for setting up the
1503 * content type has been obsoleted by the @content_type member of
1504 * {struct cache_entry}. */
1505 head = (a ? memacpy(rb->data, a)
1506 : stracpy("\r\nContent-Type: text/html\r\n"));
1507 if (!head) {
1508 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1509 return;
1512 if (check_http_server_bugs(uri, http, head)) {
1513 mem_free(head);
1514 retry_connection(conn, connection_state(S_RESTART));
1515 return;
1518 #ifdef CONFIG_CGI
1519 if (uri->protocol == PROTOCOL_FILE) {
1520 /* ``Status'' is not a standard HTTP header field although some
1521 * HTTP servers like www.php.net uses it for some reason. It should
1522 * only be used for CGI scripts so that it does not interfere
1523 * with status code depended handling for ``normal'' HTTP like
1524 * redirects. */
1525 d = parse_header(head, "Status", NULL);
1526 if (d) {
1527 int h2 = atoi(d);
1529 mem_free(d);
1530 if (h2 >= 100 && h2 < 600) h = h2;
1531 if (h == 101) {
1532 mem_free(head);
1533 abort_connection(conn, connection_state(S_HTTP_ERROR));
1534 return;
1538 #endif
1540 #ifdef CONFIG_COOKIES
1541 ch = head;
1542 while ((cookie = parse_header(ch, "Set-Cookie", &ch))) {
1543 set_cookie(uri, cookie);
1544 mem_free(cookie);
1546 #endif
1547 http->code = h;
1549 if (h == 100) {
1550 mem_free(head);
1551 state = connection_state(S_PROC);
1552 kill_buffer_data(rb, a);
1553 goto again;
1555 if (h < 200) {
1556 mem_free(head);
1557 abort_connection(conn, connection_state(S_HTTP_ERROR));
1558 return;
1560 if (h == 304) {
1561 mem_free(head);
1562 http_end_request(conn, connection_state(S_OK), 1);
1563 return;
1565 if (h == 204) {
1566 mem_free(head);
1567 http_end_request(conn, connection_state(S_HTTP_204), 0);
1568 return;
1570 if (h == 200 && connection_is_https_proxy(conn) && !conn->socket->ssl) {
1571 #ifdef CONFIG_SSL
1572 mem_free(head);
1573 socket->need_ssl = 1;
1574 complete_connect_socket(socket, uri, http_send_header);
1575 #else
1576 abort_connection(conn, connection_state(S_SSL_ERROR));
1577 #endif
1578 return;
1581 conn->cached = get_cache_entry(conn->uri);
1582 if (!conn->cached) {
1583 mem_free(head);
1584 abort_connection(conn, connection_state(S_OUT_OF_MEM));
1585 return;
1587 conn->cached->cgi = conn->cgi;
1588 mem_free_set(&conn->cached->head, head);
1590 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1591 struct cache_entry *cached = conn->cached;
1593 /* I am not entirely sure in what order we should process these
1594 * headers and if we should still process Cache-Control max-age
1595 * if we already set max age to date mentioned in Expires.
1596 * --jonas */
1597 /* Ensure that when ever cached->max_age is set, cached->expired
1598 * is also set, so the cache management knows max_age contains a
1599 * valid time. If on the other hand no caching is requested
1600 * cached->expire should be set to zero. */
1601 if ((d = parse_header(cached->head, "Expires", NULL))) {
1602 /* Convert date to seconds. */
1603 time_t expires = parse_date(&d, NULL, 0, 1);
1605 mem_free(d);
1607 if (expires && cached->cache_mode != CACHE_MODE_NEVER) {
1608 timeval_from_seconds(&cached->max_age, expires);
1609 cached->expire = 1;
1613 if ((d = parse_header(cached->head, "Pragma", NULL))) {
1614 if (strstr(d, "no-cache")) {
1615 cached->cache_mode = CACHE_MODE_NEVER;
1616 cached->expire = 0;
1618 mem_free(d);
1621 if (cached->cache_mode != CACHE_MODE_NEVER
1622 && (d = parse_header(cached->head, "Cache-Control", NULL))) {
1623 if (strstr(d, "no-cache") || strstr(d, "must-revalidate")) {
1624 cached->cache_mode = CACHE_MODE_NEVER;
1625 cached->expire = 0;
1627 } else {
1628 unsigned char *pos = strstr(d, "max-age=");
1630 assert(cached->cache_mode != CACHE_MODE_NEVER);
1632 if (pos) {
1633 /* Grab the number of seconds. */
1634 timeval_T max_age;
1636 timeval_from_seconds(&max_age, atol(pos + 8));
1637 timeval_now(&cached->max_age);
1638 timeval_add_interval(&cached->max_age, &max_age);
1640 cached->expire = 1;
1644 mem_free(d);
1648 /* XXX: Is there some reason why NOT to follow the Location header
1649 * for any status? If the server didn't mean it, it wouldn't send
1650 * it, after all...? --pasky */
1651 if (h == 201 || h == 301 || h == 302 || h == 303 || h == 307) {
1652 d = parse_header(conn->cached->head, "Location", NULL);
1653 if (d) {
1654 int use_get_method = (h == 303);
1656 /* A note from RFC 2616 section 10.3.3:
1657 * RFC 1945 and RFC 2068 specify that the client is not
1658 * allowed to change the method on the redirected
1659 * request. However, most existing user agent
1660 * implementations treat 302 as if it were a 303
1661 * response, performing a GET on the Location
1662 * field-value regardless of the original request
1663 * method. */
1664 /* So POST must not be redirected to GET, but some
1665 * BUGGY message boards rely on it :-( */
1666 if (h == 302
1667 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1668 use_get_method = 1;
1670 redirect_cache(conn->cached, d, use_get_method, -1);
1671 mem_free(d);
1675 if (h == 401) {
1676 if (check_http_authentication(conn, uri,
1677 conn->cached->head, "WWW-Authenticate")) {
1678 retry_connection(conn, connection_state(S_RESTART));
1679 return;
1683 if (h == 407) {
1684 unsigned char *str;
1686 d = parse_header(conn->cached->head, "Proxy-Authenticate", &str);
1687 while (d) {
1688 if (!strncasecmp(d, "Basic", 5)) {
1689 unsigned char *realm = get_header_param(d, "realm");
1691 if (realm) {
1692 mem_free_set(&proxy_auth.realm, realm);
1693 proxy_auth.digest = 0;
1694 mem_free(d);
1695 break;
1698 } else if (!strncasecmp(d, "Digest", 6)) {
1699 unsigned char *realm = get_header_param(d, "realm");
1700 unsigned char *nonce = get_header_param(d, "nonce");
1701 unsigned char *opaque = get_header_param(d, "opaque");
1703 mem_free_set(&proxy_auth.realm, realm);
1704 mem_free_set(&proxy_auth.nonce, nonce);
1705 mem_free_set(&proxy_auth.opaque, opaque);
1706 proxy_auth.digest = 1;
1708 mem_free(d);
1709 break;
1712 mem_free(d);
1713 d = parse_header(str, "Proxy-Authenticate", &str);
1717 kill_buffer_data(rb, a);
1718 http->close = 0;
1719 http->length = -1;
1720 http->recv_version = version;
1722 if ((d = parse_header(conn->cached->head, "Connection", NULL))
1723 || (d = parse_header(conn->cached->head, "Proxy-Connection", NULL))) {
1724 if (!strcasecmp(d, "close")) http->close = 1;
1725 mem_free(d);
1726 } else if (PRE_HTTP_1_1(version)) {
1727 http->close = 1;
1730 cf = conn->from;
1731 conn->from = 0;
1732 d = parse_header(conn->cached->head, "Content-Range", NULL);
1733 if (d) {
1734 if (strlen(d) > 6) {
1735 d[5] = 0;
1736 if (isdigit(d[6]) && !strcasecmp(d, "bytes")) {
1737 int f;
1739 errno = 0;
1740 f = strtol(d + 6, NULL, 10);
1742 if (!errno && f >= 0) conn->from = f;
1745 mem_free(d);
1747 if (cf && !conn->from && !conn->unrestartable) conn->unrestartable = 1;
1748 if ((conn->progress->start <= 0 && conn->from > cf) || conn->from < 0) {
1749 /* We don't want this if conn->progress.start because then conn->from will
1750 * be probably value of conn->progress.start, while cf is 0. */
1751 abort_connection(conn, connection_state(S_HTTP_ERROR));
1752 return;
1755 #if 0
1757 struct status *s;
1758 foreach (s, conn->downloads) {
1759 fprintf(stderr, "conn %p status %p pri %d st %d er %d :: ce %s",
1760 conn, s, s->pri, s->state, s->prev_error,
1761 s->cached ? s->cached->url : (unsigned char *) "N-U-L-L");
1764 #endif
1766 if (conn->progress->start >= 0) {
1767 /* Update to the real value which we've got from Content-Range. */
1768 conn->progress->seek = conn->from;
1770 conn->progress->start = conn->from;
1772 d = parse_header(conn->cached->head, "Content-Length", NULL);
1773 if (d) {
1774 unsigned char *ep;
1775 int l;
1777 errno = 0;
1778 l = strtol(d, (char **) &ep, 10);
1780 if (!errno && !*ep && l >= 0) {
1781 if (!http->close || POST_HTTP_1_0(version))
1782 http->length = l;
1783 conn->est_length = conn->from + l;
1785 mem_free(d);
1788 if (!conn->unrestartable) {
1789 d = parse_header(conn->cached->head, "Accept-Ranges", NULL);
1791 if (d) {
1792 if (!strcasecmp(d, "none"))
1793 conn->unrestartable = 1;
1794 mem_free(d);
1795 } else {
1796 if (!conn->from)
1797 conn->unrestartable = 1;
1801 d = parse_header(conn->cached->head, "Transfer-Encoding", NULL);
1802 if (d) {
1803 if (!strcasecmp(d, "chunked")) {
1804 http->length = LEN_CHUNKED;
1805 http->chunk_remaining = CHUNK_SIZE;
1807 mem_free(d);
1809 if (!http->close && http->length == -1) http->close = 1;
1811 d = parse_header(conn->cached->head, "Last-Modified", NULL);
1812 if (d) {
1813 if (conn->cached->last_modified && strcasecmp(conn->cached->last_modified, d)) {
1814 delete_entry_content(conn->cached);
1815 if (conn->from) {
1816 conn->from = 0;
1817 mem_free(d);
1818 retry_connection(conn, connection_state(S_MODIFIED));
1819 return;
1822 if (!conn->cached->last_modified) conn->cached->last_modified = d;
1823 else mem_free(d);
1825 if (!conn->cached->last_modified) {
1826 d = parse_header(conn->cached->head, "Date", NULL);
1827 if (d) conn->cached->last_modified = d;
1830 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1831 d = parse_header(conn->cached->head, "ETag", NULL);
1832 if (d) {
1833 if (conn->cached->etag) {
1834 unsigned char *old_tag = conn->cached->etag;
1835 unsigned char *new_tag = d;
1837 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1839 if (new_tag[0] == 'W' && new_tag[1] == '/')
1840 new_tag += 2;
1842 if (old_tag[0] == 'W' && old_tag[1] == '/')
1843 old_tag += 2;
1845 if (strcmp(new_tag, old_tag)) {
1846 delete_entry_content(conn->cached);
1847 if (conn->from) {
1848 conn->from = 0;
1849 mem_free(d);
1850 retry_connection(conn, connection_state(S_MODIFIED));
1851 return;
1856 if (!conn->cached->etag)
1857 conn->cached->etag = d;
1858 else
1859 mem_free(d);
1862 d = parse_header(conn->cached->head, "Content-Encoding", NULL);
1863 if (d) {
1864 unsigned char *extension = get_extension_from_uri(uri);
1865 enum stream_encoding file_encoding;
1867 file_encoding = extension ? guess_encoding(extension) : ENCODING_NONE;
1868 mem_free_if(extension);
1870 /* If the content is encoded, we want to preserve the encoding
1871 * if it is implied by the extension, so that saving the URI
1872 * will leave the saved file with the correct encoding. */
1873 #ifdef CONFIG_GZIP
1874 if (file_encoding != ENCODING_GZIP
1875 && (!strcasecmp(d, "gzip") || !strcasecmp(d, "x-gzip")))
1876 conn->content_encoding = ENCODING_GZIP;
1877 if (!strcasecmp(d, "deflate") || !strcasecmp(d, "x-deflate"))
1878 conn->content_encoding = ENCODING_DEFLATE;
1879 #endif
1881 #ifdef CONFIG_BZIP2
1882 if (file_encoding != ENCODING_BZIP2
1883 && (!strcasecmp(d, "bzip2") || !strcasecmp(d, "x-bzip2")))
1884 conn->content_encoding = ENCODING_BZIP2;
1885 #endif
1887 #ifdef CONFIG_LZMA
1888 if (file_encoding != ENCODING_LZMA
1889 && (!strcasecmp(d, "lzma") || !strcasecmp(d, "x-lzma")))
1890 conn->content_encoding = ENCODING_LZMA;
1891 #endif
1892 mem_free(d);
1895 if (conn->content_encoding != ENCODING_NONE) {
1896 mem_free_if(conn->cached->encoding_info);
1897 conn->cached->encoding_info = stracpy(get_encoding_name(conn->content_encoding));
1900 if (http->length == -1 || http->close)
1901 socket->state = SOCKET_END_ONCLOSE;
1903 read_http_data(socket, rb);