1 /* Internal "http" protocol implementation */
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
51 #include "http_negotiate.h"
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info
{
69 enum blacklist_flags bl_flags
;
70 struct http_version recv_version
;
71 struct http_version sent_version
;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
89 static struct auth_entry proxy_auth
;
91 static unsigned char *accept_charset
= NULL
;
94 static struct option_info http_options
[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it can trigger\n"
107 "bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
116 "This is a problem for a lot of web discussion boards and the like.\n"
117 "If they will do strange things to you, try to play with this.")),
119 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
120 "post_no_keepalive", 0, 0,
121 N_("Disable keepalive connection after POST request.")),
123 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
125 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
127 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
129 N_("HTTP proxy configuration.")),
131 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
133 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
134 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
136 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
138 N_("Proxy authentication username.")),
140 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
142 N_("Proxy authentication password.")),
145 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
147 N_("HTTP referer sending options. HTTP referer is a special header\n"
148 "sent in the HTTP requests, which is supposed to contain the previous\n"
149 "page visited by the browser. This way, the server can know what link\n"
150 "did you follow when accessing that page. However, this behaviour\n"
151 "can unfortunately considerably affect privacy and can lead even to a\n"
152 "security problem on some badly designed web pages.")),
154 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
156 REFERER_NONE
, REFERER_TRUE
, REFERER_TRUE
,
157 N_("Mode of sending HTTP referer:\n"
158 "0 is send no referer\n"
159 "1 is send current URL as referer\n"
160 "2 is send fixed fake referer\n"
161 "3 is send previous URL as referer (correct, but insecure)")),
163 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
165 N_("Fake referer to be sent when policy is 2.")),
168 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
169 "accept_language", 0, "",
170 N_("Send Accept-Language header.")),
172 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
173 "accept_ui_language", 0, 1,
174 N_("Request localised versions of documents from web-servers (using the\n"
175 "Accept-Language header) using the language you have configured for\n"
176 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
177 "value available to scripts). Note that some see this as a potential\n"
178 "security risk because it tells web-masters and the FBI sniffers about\n"
179 "your language preference.")),
181 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
183 N_("If active, all HTTP requests are sent with TRACE as their method\n"
184 "rather than GET or POST. This is useful for debugging of both ELinks\n"
185 "and various server-side scripts --- the server only returns the client's\n"
186 "request back to the client verbatim. Note that this type of request may\n"
187 "not be enabled on all servers.")),
189 /* OSNews.com is supposed to be relying on the textmode token, at least. */
190 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
191 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
192 N_("Change the User Agent ID. That means identification string, which\n"
193 "is sent to HTTP server when a document is requested. The 'textmode'\n"
194 "token in the first field is our silent attempt to establish this as\n"
195 "a standard for new textmode user agents, so that the webmasters can\n"
196 "have just a single uniform test for these if they are e.g. pushing\n"
197 "some lite version to them automagically.\n"
198 "Use \" \" if you don't want any User-Agent header to be sent at all.\n"
199 "%v in the string means ELinks version,\n"
200 "%s in the string means system identification,\n"
201 "%t in the string means size of the terminal,\n"
202 "%b in the string means number of bars displayed by ELinks.")),
205 INIT_OPT_TREE("protocol", N_("HTTPS"),
207 N_("HTTPS-specific options.")),
209 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
211 N_("HTTPS proxy configuration.")),
213 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
215 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
216 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
220 static void done_http();
222 struct module http_protocol_module
= struct_module(
223 /* name: */ N_("HTTP"),
224 /* options: */ http_options
,
226 /* submodules: */ NULL
,
229 /* done: */ done_http
236 mem_free_if(proxy_auth
.realm
);
237 mem_free_if(proxy_auth
.nonce
);
238 mem_free_if(proxy_auth
.opaque
);
243 mem_free(accept_charset
);
247 init_accept_charset(void)
253 if (!init_string(&ac
)) return;
255 for (i
= 0; (cs
= get_cp_mime_name(i
)); i
++) {
257 add_to_string(&ac
, ", ");
259 add_to_string(&ac
, "Accept-Charset: ");
261 add_to_string(&ac
, cs
);
265 add_crlf_to_string(&ac
);
268 accept_charset
= squeezastring(&ac
);
275 subst_user_agent(unsigned char *fmt
, unsigned char *version
,
276 unsigned char *sysname
, unsigned char *termsize
)
280 if (!init_string(&agent
)) return NULL
;
285 for (p
= 0; fmt
[p
] && fmt
[p
] != '%'; p
++);
287 add_bytes_to_string(&agent
, fmt
, p
);
290 if (*fmt
!= '%') continue;
295 if (!list_empty(sessions
)) {
296 unsigned char bs
[4] = "";
298 struct session
*ses
= sessions
.prev
;
299 int bars
= ses
->status
.show_status_bar
300 + ses
->status
.show_tabs_bar
301 + ses
->status
.show_title_bar
;
303 ulongcat(bs
, &blen
, bars
, 2, 0);
304 add_to_string(&agent
, bs
);
308 add_to_string(&agent
, version
);
311 add_to_string(&agent
, sysname
);
315 add_to_string(&agent
, termsize
);
318 add_bytes_to_string(&agent
, fmt
- 1, 2);
328 add_url_to_http_string(struct string
*header
, struct uri
*uri
, int components
)
330 /* This block substitues spaces in URL by %20s. This is
331 * certainly not the right place where to do it, but now the
332 * behaviour is at least improved compared to what we had
333 * before. We should probably encode all URLs as early as
334 * possible, and possibly decode them back in protocol
335 * backends. --pasky */
336 unsigned char *string
= get_uri_string(uri
, components
);
337 unsigned char *data
= string
;
342 int len
= strcspn(data
, " \t\r\n\\");
344 add_bytes_to_string(header
, data
, len
);
346 if (!data
[len
]) break;
348 if (data
[len
++] == '\\')
349 add_char_to_string(header
, '/');
351 add_to_string(header
, "%20");
359 /* Parse from @end - 1 to @start and set *@value to integer found.
360 * It returns -1 if not a number, 0 otherwise.
361 * @end should be > @start. */
363 revstr2num(unsigned char *start
, unsigned char *end
, int *value
)
369 if (!isdigit(*end
)) return -1; /* NaN */
370 val
+= (*end
- '0') * q
;
372 } while (end
> start
);
378 /* This function extracts code, major and minor version from string
379 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
380 * It returns a negative value on error, 0 on success.
383 get_http_code(struct read_buffer
*rb
, int *code
, struct http_version
*version
)
385 unsigned char *head
= rb
->data
;
386 unsigned char *start
;
393 while (*head
== ' ') head
++;
396 if (toupper(*head
) != 'H' || toupper(*++head
) != 'T' ||
397 toupper(*++head
) != 'T' || toupper(*++head
) != 'P'
404 while (*head
&& *head
!= '.') head
++;
406 if (!*head
|| !(head
- start
)
407 || (head
- start
) > 4
408 || !isdigit(*(head
+ 1)))
411 /* Extract major version number. */
412 if (revstr2num(start
, head
, &version
->major
)) return -3; /* NaN */
417 while (*head
&& *head
!= ' ') head
++;
419 if (!*head
|| !(head
- start
) || (head
- start
) > 4) return -4;
421 /* Extract minor version number. */
422 if (revstr2num(start
, head
, &version
->minor
)) return -5; /* NaN */
425 while (*head
== ' ') head
++;
427 /* Sanity check for code. */
428 if (head
[0] < '1' || head
[0] > '9' ||
431 return -6; /* Invalid code. */
434 *code
= (head
[0] - '0') * 100 + (head
[1] - '0') * 10 + head
[2] - '0';
440 check_http_server_bugs(struct uri
*uri
, struct http_connection_info
*http
,
443 unsigned char *server
;
444 const unsigned char *const *s
;
445 static const unsigned char *const buggy_servers
[] = {
448 "Netscape-Enterprise",
452 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
453 || HTTP_1_0(http
->sent_version
))
456 server
= parse_header(head
, "Server", NULL
);
460 for (s
= buggy_servers
; *s
; s
++) {
461 if (strstr(server
, *s
)) {
462 add_blacklist_entry(uri
, SERVER_BLACKLIST_HTTP10
);
472 http_end_request(struct connection
*conn
, struct connection_state state
,
475 shutdown_connection_stream(conn
);
477 if (conn
->info
&& !((struct http_connection_info
*) conn
->info
)->close
478 && (!conn
->socket
->ssl
) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
480 || !conn
->uri
->post
)) {
481 if (is_in_state(state
, S_OK
) && conn
->cached
)
482 normalize_cache_entry(conn
->cached
, !notrunc
? conn
->from
: -1);
483 set_connection_state(conn
, state
);
484 add_keepalive_connection(conn
, HTTP_KEEPALIVE_TIMEOUT
, NULL
);
486 abort_connection(conn
, state
);
490 static void http_send_header(struct socket
*);
493 http_protocol_handler(struct connection
*conn
)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn
)) {
498 make_connection(conn
->socket
, conn
->uri
, http_send_header
,
499 conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
);
501 http_send_header(conn
->socket
);
506 proxy_protocol_handler(struct connection
*conn
)
508 http_protocol_handler(conn
);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 struct http_connection_info
*
517 init_http_connection_info(struct connection
*conn
, int major
, int minor
, int close
)
519 struct http_connection_info
*http
;
521 http
= mem_calloc(1, sizeof(*http
));
523 http_end_request(conn
, connection_state(S_OUT_OF_MEM
), 0);
527 http
->sent_version
.major
= major
;
528 http
->sent_version
.minor
= minor
;
531 /* The CGI code uses this too and blacklisting expects a host name. */
532 if (conn
->proxied_uri
->protocol
!= PROTOCOL_FILE
)
533 http
->bl_flags
= get_blacklist_flags(conn
->proxied_uri
);
535 if (http
->bl_flags
& SERVER_BLACKLIST_HTTP10
536 || get_opt_bool("protocol.http.bugs.http10")) {
537 http
->sent_version
.major
= 1;
538 http
->sent_version
.minor
= 0;
541 /* If called from HTTPS proxy connection the connection info might have
542 * already been allocated. */
543 mem_free_set(&conn
->info
, http
);
549 accept_encoding_header(struct string
*header
)
551 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
554 add_to_string(header
, "Accept-Encoding: ");
557 add_to_string(header
, "bzip2");
562 if (comma
) add_to_string(header
, ", ");
563 add_to_string(header
, "deflate, gzip");
568 if (comma
) add_to_string(header
, ", ");
569 add_to_string(header
, "lzma");
571 add_crlf_to_string(header
);
576 http_send_header(struct socket
*socket
)
578 struct connection
*conn
= socket
->conn
;
579 struct http_connection_info
*http
;
580 int trace
= get_opt_bool("protocol.http.trace");
581 struct string header
;
582 unsigned char *post_data
= NULL
;
583 struct auth_entry
*entry
= NULL
;
584 struct uri
*uri
= conn
->proxied_uri
; /* Set to the real uri */
585 unsigned char *optstr
;
586 int use_connect
, talking_to_proxy
;
588 /* Sanity check for a host */
589 if (!uri
|| !uri
->host
|| !*uri
->host
|| !uri
->hostlen
) {
590 http_end_request(conn
, connection_state(S_BAD_URL
), 0);
594 http
= init_http_connection_info(conn
, 1, 1, 0);
597 if (!init_string(&header
)) {
598 http_end_request(conn
, connection_state(S_OUT_OF_MEM
), 0);
602 if (!conn
->cached
) conn
->cached
= find_in_cache(uri
);
604 talking_to_proxy
= IS_PROXY_URI(conn
->uri
) && !conn
->socket
->ssl
;
605 use_connect
= connection_is_https_proxy(conn
) && !conn
->socket
->ssl
;
608 add_to_string(&header
, "TRACE ");
609 } else if (use_connect
) {
610 add_to_string(&header
, "CONNECT ");
611 /* In CONNECT requests, we send only a subset of the
612 * headers to the proxy. See the "CONNECT:" comments
613 * below. After the CONNECT request succeeds, we
614 * negotiate TLS with the real server and make a new
615 * HTTP request that includes all the headers. */
616 } else if (uri
->post
) {
617 add_to_string(&header
, "POST ");
618 conn
->unrestartable
= 1;
620 add_to_string(&header
, "GET ");
623 if (!talking_to_proxy
) {
624 add_char_to_string(&header
, '/');
628 /* Add port if it was specified or the default port */
629 add_uri_to_string(&header
, uri
, URI_HTTP_CONNECT
);
631 if (connection_is_https_proxy(conn
) && conn
->socket
->ssl
) {
632 add_url_to_http_string(&header
, uri
, URI_DATA
);
634 } else if (talking_to_proxy
) {
635 add_url_to_http_string(&header
, uri
, URI_PROXY
);
638 add_url_to_http_string(&header
, conn
->uri
, URI_DATA
);
642 add_to_string(&header
, " HTTP/");
643 add_long_to_string(&header
, http
->sent_version
.major
);
644 add_char_to_string(&header
, '.');
645 add_long_to_string(&header
, http
->sent_version
.minor
);
646 add_crlf_to_string(&header
);
648 /* CONNECT: Sending a Host header seems pointless as the same
649 * information is already in the CONNECT line. It's harmless
650 * though and Mozilla does it too. */
651 add_to_string(&header
, "Host: ");
652 add_uri_to_string(&header
, uri
, URI_HTTP_HOST
);
653 add_crlf_to_string(&header
);
655 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
656 if (talking_to_proxy
) {
657 unsigned char *user
= get_opt_str("protocol.http.proxy.user");
658 unsigned char *passwd
= get_opt_str("protocol.http.proxy.passwd");
660 if (proxy_auth
.digest
) {
661 unsigned char *response
;
662 int userlen
= int_min(strlen(user
), AUTH_USER_MAXLEN
- 1);
663 int passwordlen
= int_min(strlen(passwd
), AUTH_PASSWORD_MAXLEN
- 1);
666 memcpy(proxy_auth
.user
, user
, userlen
);
667 proxy_auth
.user
[userlen
] = '\0';
669 memcpy(proxy_auth
.password
, passwd
, passwordlen
);
670 proxy_auth
.password
[passwordlen
] = '\0';
672 /* FIXME: @uri is the proxied URI. Maybe the passed URI
673 * should be the proxy URI aka conn->uri. --jonas */
674 response
= get_http_auth_digest_response(&proxy_auth
, uri
);
676 add_to_string(&header
, "Proxy-Authorization: Digest ");
677 add_to_string(&header
, response
);
678 add_crlf_to_string(&header
);
685 unsigned char *proxy_data
;
687 proxy_data
= straconcat(user
, ":", passwd
, (unsigned char *) NULL
);
689 unsigned char *proxy_64
= base64_encode(proxy_data
);
692 add_to_string(&header
, "Proxy-Authorization: Basic ");
693 add_to_string(&header
, proxy_64
);
694 add_crlf_to_string(&header
);
697 mem_free(proxy_data
);
703 /* CONNECT: User-Agent does not reveal anything about the
704 * resource we're fetching, and it may help the proxy return
705 * better error messages. */
706 optstr
= get_opt_str("protocol.http.user_agent");
707 if (*optstr
&& strcmp(optstr
, " ")) {
708 unsigned char *ustr
, ts
[64] = "";
710 add_to_string(&header
, "User-Agent: ");
712 if (!list_empty(terminals
)) {
713 unsigned int tslen
= 0;
714 struct terminal
*term
= terminals
.prev
;
716 ulongcat(ts
, &tslen
, term
->width
, 3, 0);
718 ulongcat(ts
, &tslen
, term
->height
, 3, 0);
720 ustr
= subst_user_agent(optstr
, VERSION_STRING
, system_name
,
724 add_to_string(&header
, ustr
);
728 add_crlf_to_string(&header
);
731 /* CONNECT: Referer probably is a secret page in the HTTPS
732 * server, so don't reveal it to the proxy. */
734 switch (get_opt_int("protocol.http.referer.policy")) {
740 optstr
= get_opt_str("protocol.http.referer.fake");
741 if (!optstr
[0]) break;
742 add_to_string(&header
, "Referer: ");
743 add_to_string(&header
, optstr
);
744 add_crlf_to_string(&header
);
748 if (!conn
->referrer
) break;
749 add_to_string(&header
, "Referer: ");
750 add_url_to_http_string(&header
, conn
->referrer
, URI_HTTP_REFERRER
);
751 add_crlf_to_string(&header
);
754 case REFERER_SAME_URL
:
755 add_to_string(&header
, "Referer: ");
756 add_url_to_http_string(&header
, uri
, URI_HTTP_REFERRER
);
757 add_crlf_to_string(&header
);
762 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
763 * because they do not reveal anything about the resource
764 * we're going to request via TLS, and they may affect the
765 * error message if the CONNECT request fails.
767 * If ELinks is ever changed to vary its Accept headers based
768 * on what it intends to do with the returned resource, e.g.
769 * sending "Accept: text/css" when it wants an external
770 * stylesheet, then it should do that only in the inner GET
771 * and not in the outer CONNECT. */
772 add_to_string(&header
, "Accept: */*");
773 add_crlf_to_string(&header
);
775 accept_encoding_header(&header
);
777 if (!accept_charset
) {
778 init_accept_charset();
781 if (!(http
->bl_flags
& SERVER_BLACKLIST_NO_CHARSET
)
782 && !get_opt_bool("protocol.http.bugs.accept_charset")
784 add_to_string(&header
, accept_charset
);
787 optstr
= get_opt_str("protocol.http.accept_language");
789 add_to_string(&header
, "Accept-Language: ");
790 add_to_string(&header
, optstr
);
791 add_crlf_to_string(&header
);
794 else if (get_opt_bool("protocol.http.accept_ui_language")) {
795 unsigned char *code
= language_to_iso639(current_language
);
798 add_to_string(&header
, "Accept-Language: ");
799 add_to_string(&header
, code
);
800 add_crlf_to_string(&header
);
805 /* CONNECT: Proxy-Connection is intended to be seen by the
806 * proxy. If the CONNECT request succeeds, then the proxy
807 * will forward the remainder of the TCP connection to the
808 * origin server, and Proxy-Connection does not matter; but
809 * if the request fails, then Proxy-Connection may matter. */
810 /* FIXME: What about post-HTTP/1.1?? --Zas */
811 if (HTTP_1_1(http
->sent_version
)) {
812 if (!IS_PROXY_URI(conn
->uri
)) {
813 add_to_string(&header
, "Connection: ");
815 add_to_string(&header
, "Proxy-Connection: ");
818 if (!uri
->post
|| !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
819 add_to_string(&header
, "Keep-Alive");
821 add_to_string(&header
, "close");
823 add_crlf_to_string(&header
);
826 /* CONNECT: Do not tell the proxy anything we have cached
827 * about the resource. */
828 if (!use_connect
&& conn
->cached
) {
829 if (!conn
->cached
->incomplete
&& conn
->cached
->head
830 && conn
->cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
) {
831 if (conn
->cached
->last_modified
) {
832 add_to_string(&header
, "If-Modified-Since: ");
833 add_to_string(&header
, conn
->cached
->last_modified
);
834 add_crlf_to_string(&header
);
836 if (conn
->cached
->etag
) {
837 add_to_string(&header
, "If-None-Match: ");
838 add_to_string(&header
, conn
->cached
->etag
);
839 add_crlf_to_string(&header
);
844 /* CONNECT: Let's send cache control headers to the proxy too;
845 * they may affect DNS caching. */
846 if (conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
) {
847 add_to_string(&header
, "Pragma: no-cache");
848 add_crlf_to_string(&header
);
849 add_to_string(&header
, "Cache-Control: no-cache");
850 add_crlf_to_string(&header
);
853 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
854 * do anything good with that information anyway. */
855 if (!use_connect
&& (conn
->from
|| conn
->progress
->start
> 0)) {
856 /* conn->from takes precedence. conn->progress.start is set only the first
857 * time, then conn->from gets updated and in case of any retries
858 * etc we have everything interesting in conn->from already. */
859 add_to_string(&header
, "Range: bytes=");
860 add_long_to_string(&header
, conn
->from
? conn
->from
: conn
->progress
->start
);
861 add_char_to_string(&header
, '-');
862 add_crlf_to_string(&header
);
865 /* CONNECT: The Authorization header is for the origin server only. */
868 if (http_negotiate_output(uri
, &header
) != 0)
870 entry
= find_auth(uri
);
875 unsigned char *response
;
877 response
= get_http_auth_digest_response(entry
, uri
);
879 add_to_string(&header
, "Authorization: Digest ");
880 add_to_string(&header
, response
);
881 add_crlf_to_string(&header
);
887 /* RFC2617 section 2 [Basic Authentication Scheme]
889 * To receive authorization, the client sends the userid
890 * and password, separated by a single colon (":")
891 * character, within a base64 [7] encoded string in the
895 /* Create base64 encoded string. */
896 id
= straconcat(entry
->user
, ":", entry
->password
,
897 (unsigned char *) NULL
);
899 unsigned char *base64
= base64_encode(id
);
901 mem_free_set(&id
, base64
);
905 add_to_string(&header
, "Authorization: Basic ");
906 add_to_string(&header
, id
);
907 add_crlf_to_string(&header
);
913 /* CONNECT: Any POST data is for the origin server only. */
914 if (!use_connect
&& uri
->post
) {
915 /* We search for first '\n' in uri->post to get content type
916 * as set by get_form_uri(). This '\n' is dropped if any
917 * and replaced by correct '\r\n' termination here. */
918 unsigned char *postend
= strchr(uri
->post
, '\n');
921 add_to_string(&header
, "Content-Type: ");
922 add_bytes_to_string(&header
, uri
->post
, postend
- uri
->post
);
923 add_crlf_to_string(&header
);
926 post_data
= postend
? postend
+ 1 : uri
->post
;
927 add_to_string(&header
, "Content-Length: ");
928 add_long_to_string(&header
, strlen(post_data
) / 2);
929 add_crlf_to_string(&header
);
932 #ifdef CONFIG_COOKIES
933 /* CONNECT: Cookies are for the origin server only. */
935 struct string
*cookies
= send_cookies(uri
);
938 add_to_string(&header
, "Cookie: ");
939 add_string_to_string(&header
, cookies
);
940 add_crlf_to_string(&header
);
941 done_string(cookies
);
946 add_crlf_to_string(&header
);
948 /* CONNECT: Any POST data is for the origin server only.
949 * This was already checked above and post_data is NULL
950 * in that case. Verified with an assertion below. */
952 #define POST_BUFFER_SIZE 4096
953 unsigned char *post
= post_data
;
954 unsigned char buffer
[POST_BUFFER_SIZE
];
957 assert(!use_connect
); /* see comment above */
959 while (post
[0] && post
[1]) {
963 assertm(h1
>= 0 && h1
< 16, "h1 in the POST buffer is %d (%d/%c)", h1
, post
[0], post
[0]);
964 if_assert_failed h1
= 0;
967 assertm(h2
>= 0 && h2
< 16, "h2 in the POST buffer is %d (%d/%c)", h2
, post
[1], post
[1]);
968 if_assert_failed h2
= 0;
970 buffer
[n
++] = (h1
<<4) + h2
;
972 if (n
== POST_BUFFER_SIZE
) {
973 add_bytes_to_string(&header
, buffer
, n
);
979 add_bytes_to_string(&header
, buffer
, n
);
980 #undef POST_BUFFER_SIZE
983 request_from_socket(socket
, header
.source
, header
.length
,
984 connection_state(S_SENT
),
985 SOCKET_END_ONCLOSE
, http_got_header
);
986 done_string(&header
);
990 /* This function decompresses the data block given in @data (if it was
991 * compressed), which is long @len bytes. The decompressed data block is given
992 * back to the world as the return value and its length is stored into
993 * @new_len. After this function returns, the caller will discard all the @len
994 * input bytes, so this function must use all of them unless an error occurs.
996 * In this function, value of either http->chunk_remaining or http->length is
997 * being changed (it depends on if chunked mode is used or not).
999 * Note that the function is still a little esotheric for me. Don't take it
1000 * lightly and don't mess with it without grave reason! If you dare to touch
1001 * this without testing the changes on slashdot, freshmeat and cvsweb
1002 * (including revision history), don't dare to send me any patches! ;) --pasky
1004 * This function gotta die. */
1005 static unsigned char *
1006 decompress_data(struct connection
*conn
, unsigned char *data
, int len
,
1009 struct http_connection_info
*http
= conn
->info
;
1010 enum { NORMAL
, FINISHING
} state
= NORMAL
;
1012 int *length_of_block
;
1013 unsigned char *output
= NULL
;
1015 #define BIG_READ 65536
1017 if (http
->length
== LEN_CHUNKED
) {
1018 if (http
->chunk_remaining
== CHUNK_ZERO_SIZE
)
1020 length_of_block
= &http
->chunk_remaining
;
1022 length_of_block
= &http
->length
;
1023 if (!*length_of_block
) {
1024 /* Going to finish this decoding bussiness. */
1029 if (conn
->content_encoding
== ENCODING_NONE
) {
1031 if (*length_of_block
> 0) *length_of_block
-= len
;
1035 *new_len
= 0; /* new_len must be zero if we would ever return NULL */
1037 if (conn
->stream_pipes
[0] == -1
1038 && (c_pipe(conn
->stream_pipes
) < 0
1039 || set_nonblocking_fd(conn
->stream_pipes
[0]) < 0
1040 || set_nonblocking_fd(conn
->stream_pipes
[1]) < 0)) {
1047 if (state
== NORMAL
) {
1048 /* ... we aren't finishing yet. */
1049 int written
= safe_write(conn
->stream_pipes
[1], data
, len
);
1055 /* In non-keep-alive connections http->length == -1, so the test below */
1056 if (*length_of_block
> 0)
1057 *length_of_block
-= written
;
1058 /* http->length is 0 at the end of block for all modes: keep-alive,
1059 * non-keep-alive and chunked */
1060 if (!http
->length
) {
1061 /* That's all, folks - let's finish this. */
1064 /* We've done for this round (but not done
1065 * completely). Thus we will get out with
1066 * what we have and leave what we wrote to
1067 * the next round - we have to do that since
1068 * we MUST NOT ever empty the pipe completely
1069 * - this would cause a disaster for
1070 * read_encoded(), which would simply not
1071 * work right then. */
1077 if (!conn
->stream
) {
1078 conn
->stream
= open_encoded(conn
->stream_pipes
[0],
1079 conn
->content_encoding
);
1080 if (!conn
->stream
) return NULL
;
1083 tmp
= mem_realloc(output
, *new_len
+ BIG_READ
);
1087 did_read
= read_encoded(conn
->stream
, output
+ *new_len
, BIG_READ
);
1089 /* Do not break from the loop if did_read == 0. It
1090 * means no decoded data is available yet, but some may
1091 * become available later. This happens especially with
1092 * the bzip2 decoder, which needs an entire compressed
1093 * block as input before it generates any output. */
1098 *new_len
+= did_read
;
1099 } while (len
|| (did_read
== BIG_READ
));
1101 if (state
== FINISHING
) shutdown_connection_stream(conn
);
1106 is_line_in_buffer(struct read_buffer
*rb
)
1110 for (l
= 0; l
< rb
->length
; l
++) {
1111 unsigned char a0
= rb
->data
[l
];
1115 if (a0
== ASCII_CR
) {
1116 if (rb
->data
[l
+ 1] == ASCII_LF
1117 && l
< rb
->length
- 1)
1119 if (l
== rb
->length
- 1)
1128 static void read_http_data(struct socket
*socket
, struct read_buffer
*rb
);
1131 read_more_http_data(struct connection
*conn
, struct read_buffer
*rb
,
1132 int already_got_anything
)
1134 struct connection_state state
= already_got_anything
1135 ? connection_state(S_TRANS
) : conn
->state
;
1137 read_from_socket(conn
->socket
, rb
, state
, read_http_data
);
1141 read_http_data_done(struct connection
*conn
)
1143 struct http_connection_info
*http
= conn
->info
;
1145 /* There's no content but an error so just print
1146 * that instead of nothing. */
1148 if (http
->code
>= 400) {
1149 http_error_document(conn
, http
->code
);
1152 /* This is not an error, thus fine. No need generate any
1153 * document, as this may be empty and it's not a problem.
1154 * In case of 3xx, we're probably just getting kicked to
1155 * another page anyway. And in case of 2xx, the document
1156 * may indeed be empty and thus the user should see it so. */
1160 http_end_request(conn
, connection_state(S_OK
), 0);
1169 read_chunked_http_data(struct connection
*conn
, struct read_buffer
*rb
)
1171 struct http_connection_info
*http
= conn
->info
;
1172 int total_data_len
= 0;
1175 /* Chunked. Good luck! */
1176 /* See RFC2616, section 3.6.1. Basically, it looks like:
1177 * 1234 ; a = b ; c = d\r\n
1178 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1181 if (http
->chunk_remaining
== CHUNK_DATA_END
) {
1182 int l
= is_line_in_buffer(rb
);
1186 /* Invalid character in buffer. */
1190 /* Remove everything to the EOLN. */
1191 kill_buffer_data(rb
, l
);
1199 } else if (http
->chunk_remaining
== CHUNK_SIZE
) {
1200 int l
= is_line_in_buffer(rb
);
1208 n
= strtol(rb
->data
, (char **) &de
, 16);
1209 if (errno
|| !*de
) {
1214 if (l
== -1 || de
== rb
->data
) {
1218 /* Remove everything to the EOLN. */
1219 kill_buffer_data(rb
, l
);
1220 http
->chunk_remaining
= n
;
1221 if (!http
->chunk_remaining
)
1222 http
->chunk_remaining
= CHUNK_ZERO_SIZE
;
1227 unsigned char *data
;
1229 int zero
= (http
->chunk_remaining
== CHUNK_ZERO_SIZE
);
1230 int len
= zero
? 0 : http
->chunk_remaining
;
1232 /* Maybe everything necessary didn't come yet.. */
1233 int_upper_bound(&len
, rb
->length
);
1234 conn
->received
+= len
;
1236 data
= decompress_data(conn
, rb
->data
, len
, &data_len
);
1238 if (add_fragment(conn
->cached
, conn
->from
,
1239 data
, data_len
) == 1)
1242 if (data
&& data
!= rb
->data
) mem_free(data
);
1244 conn
->from
+= data_len
;
1245 total_data_len
+= data_len
;
1247 kill_buffer_data(rb
, len
);
1250 /* Last chunk has zero length, so this is last
1251 * chunk, we finished decompression just now
1252 * and now we can happily finish reading this
1254 http
->chunk_remaining
= CHUNK_DATA_END
;
1258 if (!http
->chunk_remaining
&& rb
->length
> 0) {
1259 /* Eat newline succeeding each chunk. */
1260 if (rb
->data
[0] == ASCII_LF
) {
1261 kill_buffer_data(rb
, 1);
1263 if (rb
->data
[0] != ASCII_CR
1265 && rb
->data
[1] != ASCII_LF
)) {
1268 if (rb
->length
< 2) break;
1269 kill_buffer_data(rb
, 2);
1271 http
->chunk_remaining
= CHUNK_SIZE
;
1279 return !!total_data_len
;
1282 /* Returns 0 if more data, 1 if done. */
1284 read_normal_http_data(struct connection
*conn
, struct read_buffer
*rb
)
1286 struct http_connection_info
*http
= conn
->info
;
1287 unsigned char *data
;
1289 int len
= rb
->length
;
1291 if (http
->length
>= 0 && http
->length
< len
) {
1292 /* We won't read more than we have to go. */
1296 conn
->received
+= len
;
1298 data
= decompress_data(conn
, rb
->data
, len
, &data_len
);
1300 if (add_fragment(conn
->cached
, conn
->from
, data
, data_len
) == 1)
1303 if (data
&& data
!= rb
->data
) mem_free(data
);
1305 conn
->from
+= data_len
;
1307 kill_buffer_data(rb
, len
);
1309 if (!http
->length
&& conn
->socket
->state
== SOCKET_RETRY_ONCLOSE
) {
1317 read_http_data(struct socket
*socket
, struct read_buffer
*rb
)
1319 struct connection
*conn
= socket
->conn
;
1320 struct http_connection_info
*http
= conn
->info
;
1323 if (socket
->state
== SOCKET_CLOSED
) {
1324 if (conn
->content_encoding
&& http
->length
== -1) {
1325 /* Flush decompression first. */
1328 read_http_data_done(conn
);
1333 if (http
->length
!= LEN_CHUNKED
) {
1334 ret
= read_normal_http_data(conn
, rb
);
1337 ret
= read_chunked_http_data(conn
, rb
);
1342 read_more_http_data(conn
, rb
, 0);
1345 read_more_http_data(conn
, rb
, 1);
1348 read_http_data_done(conn
);
1351 assertm(ret
== -1, "Unexpected return value: %d", ret
);
1352 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1356 /* Returns offset of the header end, zero if more data is needed, -1 when
1357 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1360 get_header(struct read_buffer
*rb
)
1364 /* XXX: We will have to do some guess about whether an HTTP header is
1365 * coming or not, in order to support HTTP/0.9 reply correctly. This
1366 * means a little code duplication with get_http_code(). --pasky */
1367 if (rb
->length
> 4 && strncasecmp(rb
->data
, "HTTP/", 5))
1370 for (i
= 0; i
< rb
->length
; i
++) {
1371 unsigned char a0
= rb
->data
[i
];
1372 unsigned char a1
= rb
->data
[i
+ 1];
1378 if (a0
== ASCII_LF
&& a1
== ASCII_LF
1379 && i
< rb
->length
- 1)
1381 if (a0
== ASCII_CR
&& i
< rb
->length
- 3) {
1382 if (a1
== ASCII_CR
) continue;
1383 if (a1
!= ASCII_LF
) return -1;
1384 if (rb
->data
[i
+ 2] == ASCII_CR
) {
1385 if (rb
->data
[i
+ 3] != ASCII_LF
) return -1;
1394 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1396 check_http_authentication(struct connection
*conn
, struct uri
*uri
,
1397 unsigned char *header
, unsigned char *header_field
)
1399 unsigned char *str
, *d
;
1402 d
= parse_header(header
, header_field
, &str
);
1404 if (!strncasecmp(d
, "Basic", 5)) {
1405 unsigned char *realm
= get_header_param(d
, "realm");
1408 add_auth_entry(uri
, realm
, NULL
, NULL
, 0);
1413 } else if (!strncasecmp(d
, "Digest", 6)) {
1414 unsigned char *realm
= get_header_param(d
, "realm");
1415 unsigned char *nonce
= get_header_param(d
, "nonce");
1416 unsigned char *opaque
= get_header_param(d
, "opaque");
1418 add_auth_entry(uri
, realm
, nonce
, opaque
, 1);
1422 mem_free_if(opaque
);
1426 #ifdef CONFIG_GSSAPI
1427 else if (!strncasecmp(d
, HTTPNEG_GSS_STR
, HTTPNEG_GSS_STRLEN
)) {
1428 if (http_negotiate_input(conn
, uri
, HTTPNEG_GSS
, str
)==0)
1433 else if (!strncasecmp(d
, HTTPNEG_NEG_STR
, HTTPNEG_NEG_STRLEN
)) {
1434 if (http_negotiate_input(conn
, uri
, HTTPNEG_NEG
, str
)==0)
1441 d
= parse_header(str
, header_field
, &str
);
1448 http_got_header(struct socket
*socket
, struct read_buffer
*rb
)
1450 struct connection
*conn
= socket
->conn
;
1451 struct http_connection_info
*http
= conn
->info
;
1452 unsigned char *head
;
1453 #ifdef CONFIG_COOKIES
1454 unsigned char *cookie
, *ch
;
1457 struct uri
*uri
= conn
->proxied_uri
; /* Set to the real uri */
1458 struct http_version version
= { 0, 9 };
1459 struct connection_state state
= (!is_in_state(conn
->state
, S_PROC
)
1460 ? connection_state(S_GETH
)
1461 : connection_state(S_PROC
));
1465 if (socket
->state
== SOCKET_CLOSED
) {
1466 if (!conn
->tries
&& uri
->host
) {
1467 if (http
->bl_flags
& SERVER_BLACKLIST_NO_CHARSET
) {
1468 del_blacklist_entry(uri
, SERVER_BLACKLIST_NO_CHARSET
);
1470 add_blacklist_entry(uri
, SERVER_BLACKLIST_NO_CHARSET
);
1474 retry_connection(conn
, connection_state(S_CANT_READ
));
1477 socket
->state
= SOCKET_RETRY_ONCLOSE
;
1482 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1486 read_from_socket(conn
->socket
, rb
, state
, http_got_header
);
1489 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1490 * the get_http_code call; @h and @version have already been
1491 * initialized with the right values. */
1493 if ((a
&& get_http_code(rb
, &h
, &version
))
1495 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1499 /* When no header, HTTP/0.9 document. That's always text/html,
1501 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1502 /* FIXME: This usage of fake protocol headers for setting up the
1503 * content type has been obsoleted by the @content_type member of
1504 * {struct cache_entry}. */
1505 head
= (a
? memacpy(rb
->data
, a
)
1506 : stracpy("\r\nContent-Type: text/html\r\n"));
1508 abort_connection(conn
, connection_state(S_OUT_OF_MEM
));
1512 if (check_http_server_bugs(uri
, http
, head
)) {
1514 retry_connection(conn
, connection_state(S_RESTART
));
1519 if (uri
->protocol
== PROTOCOL_FILE
) {
1520 /* ``Status'' is not a standard HTTP header field although some
1521 * HTTP servers like www.php.net uses it for some reason. It should
1522 * only be used for CGI scripts so that it does not interfere
1523 * with status code depended handling for ``normal'' HTTP like
1525 d
= parse_header(head
, "Status", NULL
);
1530 if (h2
>= 100 && h2
< 600) h
= h2
;
1533 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1540 #ifdef CONFIG_COOKIES
1542 while ((cookie
= parse_header(ch
, "Set-Cookie", &ch
))) {
1543 set_cookie(uri
, cookie
);
1551 state
= connection_state(S_PROC
);
1552 kill_buffer_data(rb
, a
);
1557 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1562 http_end_request(conn
, connection_state(S_OK
), 1);
1567 http_end_request(conn
, connection_state(S_HTTP_204
), 0);
1570 if (h
== 200 && connection_is_https_proxy(conn
) && !conn
->socket
->ssl
) {
1573 socket
->need_ssl
= 1;
1574 complete_connect_socket(socket
, uri
, http_send_header
);
1576 abort_connection(conn
, connection_state(S_SSL_ERROR
));
1581 conn
->cached
= get_cache_entry(conn
->uri
);
1582 if (!conn
->cached
) {
1584 abort_connection(conn
, connection_state(S_OUT_OF_MEM
));
1587 conn
->cached
->cgi
= conn
->cgi
;
1588 mem_free_set(&conn
->cached
->head
, head
);
1590 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1591 struct cache_entry
*cached
= conn
->cached
;
1593 /* I am not entirely sure in what order we should process these
1594 * headers and if we should still process Cache-Control max-age
1595 * if we already set max age to date mentioned in Expires.
1597 /* Ensure that when ever cached->max_age is set, cached->expired
1598 * is also set, so the cache management knows max_age contains a
1599 * valid time. If on the other hand no caching is requested
1600 * cached->expire should be set to zero. */
1601 if ((d
= parse_header(cached
->head
, "Expires", NULL
))) {
1602 /* Convert date to seconds. */
1603 time_t expires
= parse_date(&d
, NULL
, 0, 1);
1607 if (expires
&& cached
->cache_mode
!= CACHE_MODE_NEVER
) {
1608 timeval_from_seconds(&cached
->max_age
, expires
);
1613 if ((d
= parse_header(cached
->head
, "Pragma", NULL
))) {
1614 if (strstr(d
, "no-cache")) {
1615 cached
->cache_mode
= CACHE_MODE_NEVER
;
1621 if (cached
->cache_mode
!= CACHE_MODE_NEVER
1622 && (d
= parse_header(cached
->head
, "Cache-Control", NULL
))) {
1623 if (strstr(d
, "no-cache") || strstr(d
, "must-revalidate")) {
1624 cached
->cache_mode
= CACHE_MODE_NEVER
;
1628 unsigned char *pos
= strstr(d
, "max-age=");
1630 assert(cached
->cache_mode
!= CACHE_MODE_NEVER
);
1633 /* Grab the number of seconds. */
1636 timeval_from_seconds(&max_age
, atol(pos
+ 8));
1637 timeval_now(&cached
->max_age
);
1638 timeval_add_interval(&cached
->max_age
, &max_age
);
1648 /* XXX: Is there some reason why NOT to follow the Location header
1649 * for any status? If the server didn't mean it, it wouldn't send
1650 * it, after all...? --pasky */
1651 if (h
== 201 || h
== 301 || h
== 302 || h
== 303 || h
== 307) {
1652 d
= parse_header(conn
->cached
->head
, "Location", NULL
);
1654 int use_get_method
= (h
== 303);
1656 /* A note from RFC 2616 section 10.3.3:
1657 * RFC 1945 and RFC 2068 specify that the client is not
1658 * allowed to change the method on the redirected
1659 * request. However, most existing user agent
1660 * implementations treat 302 as if it were a 303
1661 * response, performing a GET on the Location
1662 * field-value regardless of the original request
1664 /* So POST must not be redirected to GET, but some
1665 * BUGGY message boards rely on it :-( */
1667 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1670 redirect_cache(conn
->cached
, d
, use_get_method
, -1);
1676 if (check_http_authentication(conn
, uri
,
1677 conn
->cached
->head
, "WWW-Authenticate")) {
1678 retry_connection(conn
, connection_state(S_RESTART
));
1686 d
= parse_header(conn
->cached
->head
, "Proxy-Authenticate", &str
);
1688 if (!strncasecmp(d
, "Basic", 5)) {
1689 unsigned char *realm
= get_header_param(d
, "realm");
1692 mem_free_set(&proxy_auth
.realm
, realm
);
1693 proxy_auth
.digest
= 0;
1698 } else if (!strncasecmp(d
, "Digest", 6)) {
1699 unsigned char *realm
= get_header_param(d
, "realm");
1700 unsigned char *nonce
= get_header_param(d
, "nonce");
1701 unsigned char *opaque
= get_header_param(d
, "opaque");
1703 mem_free_set(&proxy_auth
.realm
, realm
);
1704 mem_free_set(&proxy_auth
.nonce
, nonce
);
1705 mem_free_set(&proxy_auth
.opaque
, opaque
);
1706 proxy_auth
.digest
= 1;
1713 d
= parse_header(str
, "Proxy-Authenticate", &str
);
1717 kill_buffer_data(rb
, a
);
1720 http
->recv_version
= version
;
1722 if ((d
= parse_header(conn
->cached
->head
, "Connection", NULL
))
1723 || (d
= parse_header(conn
->cached
->head
, "Proxy-Connection", NULL
))) {
1724 if (!strcasecmp(d
, "close")) http
->close
= 1;
1726 } else if (PRE_HTTP_1_1(version
)) {
1732 d
= parse_header(conn
->cached
->head
, "Content-Range", NULL
);
1734 if (strlen(d
) > 6) {
1736 if (isdigit(d
[6]) && !strcasecmp(d
, "bytes")) {
1740 f
= strtol(d
+ 6, NULL
, 10);
1742 if (!errno
&& f
>= 0) conn
->from
= f
;
1747 if (cf
&& !conn
->from
&& !conn
->unrestartable
) conn
->unrestartable
= 1;
1748 if ((conn
->progress
->start
<= 0 && conn
->from
> cf
) || conn
->from
< 0) {
1749 /* We don't want this if conn->progress.start because then conn->from will
1750 * be probably value of conn->progress.start, while cf is 0. */
1751 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1758 foreach (s
, conn
->downloads
) {
1759 fprintf(stderr
, "conn %p status %p pri %d st %d er %d :: ce %s",
1760 conn
, s
, s
->pri
, s
->state
, s
->prev_error
,
1761 s
->cached
? s
->cached
->url
: (unsigned char *) "N-U-L-L");
1766 if (conn
->progress
->start
>= 0) {
1767 /* Update to the real value which we've got from Content-Range. */
1768 conn
->progress
->seek
= conn
->from
;
1770 conn
->progress
->start
= conn
->from
;
1772 d
= parse_header(conn
->cached
->head
, "Content-Length", NULL
);
1778 l
= strtol(d
, (char **) &ep
, 10);
1780 if (!errno
&& !*ep
&& l
>= 0) {
1781 if (!http
->close
|| POST_HTTP_1_0(version
))
1783 conn
->est_length
= conn
->from
+ l
;
1788 if (!conn
->unrestartable
) {
1789 d
= parse_header(conn
->cached
->head
, "Accept-Ranges", NULL
);
1792 if (!strcasecmp(d
, "none"))
1793 conn
->unrestartable
= 1;
1797 conn
->unrestartable
= 1;
1801 d
= parse_header(conn
->cached
->head
, "Transfer-Encoding", NULL
);
1803 if (!strcasecmp(d
, "chunked")) {
1804 http
->length
= LEN_CHUNKED
;
1805 http
->chunk_remaining
= CHUNK_SIZE
;
1809 if (!http
->close
&& http
->length
== -1) http
->close
= 1;
1811 d
= parse_header(conn
->cached
->head
, "Last-Modified", NULL
);
1813 if (conn
->cached
->last_modified
&& strcasecmp(conn
->cached
->last_modified
, d
)) {
1814 delete_entry_content(conn
->cached
);
1818 retry_connection(conn
, connection_state(S_MODIFIED
));
1822 if (!conn
->cached
->last_modified
) conn
->cached
->last_modified
= d
;
1825 if (!conn
->cached
->last_modified
) {
1826 d
= parse_header(conn
->cached
->head
, "Date", NULL
);
1827 if (d
) conn
->cached
->last_modified
= d
;
1830 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1831 d
= parse_header(conn
->cached
->head
, "ETag", NULL
);
1833 if (conn
->cached
->etag
) {
1834 unsigned char *old_tag
= conn
->cached
->etag
;
1835 unsigned char *new_tag
= d
;
1837 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1839 if (new_tag
[0] == 'W' && new_tag
[1] == '/')
1842 if (old_tag
[0] == 'W' && old_tag
[1] == '/')
1845 if (strcmp(new_tag
, old_tag
)) {
1846 delete_entry_content(conn
->cached
);
1850 retry_connection(conn
, connection_state(S_MODIFIED
));
1856 if (!conn
->cached
->etag
)
1857 conn
->cached
->etag
= d
;
1862 d
= parse_header(conn
->cached
->head
, "Content-Encoding", NULL
);
1864 unsigned char *extension
= get_extension_from_uri(uri
);
1865 enum stream_encoding file_encoding
;
1867 file_encoding
= extension
? guess_encoding(extension
) : ENCODING_NONE
;
1868 mem_free_if(extension
);
1870 /* If the content is encoded, we want to preserve the encoding
1871 * if it is implied by the extension, so that saving the URI
1872 * will leave the saved file with the correct encoding. */
1874 if (file_encoding
!= ENCODING_GZIP
1875 && (!strcasecmp(d
, "gzip") || !strcasecmp(d
, "x-gzip")))
1876 conn
->content_encoding
= ENCODING_GZIP
;
1877 if (!strcasecmp(d
, "deflate") || !strcasecmp(d
, "x-deflate"))
1878 conn
->content_encoding
= ENCODING_DEFLATE
;
1882 if (file_encoding
!= ENCODING_BZIP2
1883 && (!strcasecmp(d
, "bzip2") || !strcasecmp(d
, "x-bzip2")))
1884 conn
->content_encoding
= ENCODING_BZIP2
;
1888 if (file_encoding
!= ENCODING_LZMA
1889 && (!strcasecmp(d
, "lzma") || !strcasecmp(d
, "x-lzma")))
1890 conn
->content_encoding
= ENCODING_LZMA
;
1895 if (conn
->content_encoding
!= ENCODING_NONE
) {
1896 mem_free_if(conn
->cached
->encoding_info
);
1897 conn
->cached
->encoding_info
= stracpy(get_encoding_name(conn
->content_encoding
));
1900 if (http
->length
== -1 || http
->close
)
1901 socket
->state
= SOCKET_END_ONCLOSE
;
1903 read_http_data(socket
, rb
);