1 /* Internal "http" protocol implementation */
15 #include <fcntl.h> /* OS/2 needs this after sys/types.h */
23 #include "cache/cache.h"
24 #include "config/options.h"
25 #include "cookies/cookies.h"
26 #include "intl/charsets.h"
27 #include "intl/gettext/libintl.h"
28 #include "main/module.h"
29 #include "network/connection.h"
30 #include "network/progress.h"
31 #include "network/socket.h"
32 #include "osdep/ascii.h"
33 #include "osdep/osdep.h"
34 #include "osdep/sysname.h"
35 #include "protocol/auth/auth.h"
36 #include "protocol/auth/digest.h"
37 #include "protocol/date.h"
38 #include "protocol/header.h"
39 #include "protocol/http/blacklist.h"
40 #include "protocol/http/codes.h"
41 #include "protocol/http/http.h"
42 #include "protocol/uri.h"
43 #include "session/session.h"
44 #include "terminal/terminal.h"
45 #include "util/base64.h"
46 #include "util/conv.h"
47 #include "util/memory.h"
48 #include "util/string.h"
51 #include "http_negotiate.h"
59 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
60 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
61 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
62 #define PRE_HTTP_1_0(x) ((x).major < 1)
63 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
64 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
65 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
68 struct http_connection_info
{
69 enum blacklist_flags bl_flags
;
70 struct http_version recv_version
;
71 struct http_version sent_version
;
75 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
76 #define LEN_FINISHED 0
79 /* Either bytes coming in this chunk yet or "parser state". */
80 #define CHUNK_DATA_END -3
81 #define CHUNK_ZERO_SIZE -2
89 static struct auth_entry proxy_auth
;
91 static unsigned char *accept_charset
= NULL
;
94 static struct option_info http_options
[] = {
95 INIT_OPT_TREE("protocol", N_("HTTP"),
97 N_("HTTP-specific options.")),
100 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
102 N_("Server-side HTTP bugs workarounds.")),
104 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
105 "accept_charset", 0, 1,
106 N_("The Accept-Charset header is quite long and sending it can trigger\n"
107 "bugs in some rarely found servers.")),
109 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
110 "allow_blacklist", 0, 1,
111 N_("Allow blacklisting of buggy servers.")),
113 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
114 "broken_302_redirect", 0, 1,
115 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
116 "This is a problem for a lot of web discussion boards and the like.\n"
117 "If they will do strange things to you, try to play with this.")),
119 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
120 "post_no_keepalive", 0, 0,
121 N_("Disable keepalive connection after POST request.")),
123 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
125 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
127 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
129 N_("HTTP proxy configuration.")),
131 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
133 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
134 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
136 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
138 N_("Proxy authentication username.")),
140 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
142 N_("Proxy authentication password.")),
145 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
147 N_("HTTP referer sending options. HTTP referer is a special header\n"
148 "sent in the HTTP requests, which is supposed to contain the previous\n"
149 "page visited by the browser. This way, the server can know what link\n"
150 "did you follow when accessing that page. However, this behaviour\n"
151 "can unfortunately considerably affect privacy and can lead even to a\n"
152 "security problem on some badly designed web pages.")),
154 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
156 REFERER_NONE
, REFERER_TRUE
, REFERER_TRUE
,
157 N_("Mode of sending HTTP referer:\n"
158 "0 is send no referer\n"
159 "1 is send current URL as referer\n"
160 "2 is send fixed fake referer\n"
161 "3 is send previous URL as referer (correct, but insecure)")),
163 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
165 N_("Fake referer to be sent when policy is 2.")),
168 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
169 "accept_language", 0, "",
170 N_("Send Accept-Language header.")),
172 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
173 "accept_ui_language", 0, 1,
174 N_("Request localised versions of documents from web-servers (using the\n"
175 "Accept-Language header) using the language you have configured for\n"
176 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
177 "value available to scripts). Note that some see this as a potential\n"
178 "security risk because it tells web-masters and the FBI sniffers about\n"
179 "your language preference.")),
181 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
183 N_("If active, all HTTP requests are sent with TRACE as their method\n"
184 "rather than GET or POST. This is useful for debugging of both ELinks\n"
185 "and various server-side scripts --- the server only returns the client's\n"
186 "request back to the client verbatim. Note that this type of request may\n"
187 "not be enabled on all servers.")),
189 /* OSNews.com is supposed to be relying on the textmode token, at least. */
190 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
191 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
192 N_("Change the User Agent ID. That means identification string, which\n"
193 "is sent to HTTP server when a document is requested. The 'textmode'\n"
194 "token in the first field is our silent attempt to establish this as\n"
195 "a standard for new textmode user agents, so that the webmasters can\n"
196 "have just a single uniform test for these if they are e.g. pushing\n"
197 "some lite version to them automagically.\n"
198 "%v in the string means ELinks version\n"
199 "%s in the string means system identification\n"
200 "%t in the string means size of the terminal\n"
201 "%b in the string means number of bars displayed by ELinks\n"
202 "Use \" \" if you don't want any User-Agent header to be sent at all.")),
205 INIT_OPT_TREE("protocol", N_("HTTPS"),
207 N_("HTTPS-specific options.")),
209 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
211 N_("HTTPS proxy configuration.")),
213 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
215 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
216 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
220 static void done_http();
222 struct module http_protocol_module
= struct_module(
223 /* name: */ N_("HTTP"),
224 /* options: */ http_options
,
226 /* submodules: */ NULL
,
229 /* done: */ done_http
236 mem_free_if(proxy_auth
.realm
);
237 mem_free_if(proxy_auth
.nonce
);
238 mem_free_if(proxy_auth
.opaque
);
243 mem_free(accept_charset
);
247 init_accept_charset(void)
253 if (!init_string(&ac
)) return;
255 for (i
= 0; (cs
= get_cp_mime_name(i
)); i
++) {
257 add_to_string(&ac
, ", ");
259 add_to_string(&ac
, "Accept-Charset: ");
261 add_to_string(&ac
, cs
);
265 add_crlf_to_string(&ac
);
268 accept_charset
= squeezastring(&ac
);
275 subst_user_agent(unsigned char *fmt
, unsigned char *version
,
276 unsigned char *sysname
, unsigned char *termsize
)
280 if (!init_string(&agent
)) return NULL
;
285 for (p
= 0; fmt
[p
] && fmt
[p
] != '%'; p
++);
287 add_bytes_to_string(&agent
, fmt
, p
);
290 if (*fmt
!= '%') continue;
295 if (!list_empty(sessions
)) {
296 unsigned char bs
[4] = "";
298 struct session
*ses
= sessions
.prev
;
299 int bars
= ses
->status
.show_status_bar
300 + ses
->status
.show_tabs_bar
301 + ses
->status
.show_title_bar
;
303 ulongcat(bs
, &blen
, bars
, 2, 0);
304 add_to_string(&agent
, bs
);
308 add_to_string(&agent
, version
);
311 add_to_string(&agent
, sysname
);
315 add_to_string(&agent
, termsize
);
318 add_bytes_to_string(&agent
, fmt
- 1, 2);
328 add_url_to_http_string(struct string
*header
, struct uri
*uri
, int components
)
330 /* This block substitues spaces in URL by %20s. This is
331 * certainly not the right place where to do it, but now the
332 * behaviour is at least improved compared to what we had
333 * before. We should probably encode all URLs as early as
334 * possible, and possibly decode them back in protocol
335 * backends. --pasky */
336 unsigned char *string
= get_uri_string(uri
, components
);
337 unsigned char *data
= string
;
342 int len
= strcspn(data
, " \t\r\n\\");
344 add_bytes_to_string(header
, data
, len
);
346 if (!data
[len
]) break;
348 if (data
[len
++] == '\\')
349 add_char_to_string(header
, '/');
351 add_to_string(header
, "%20");
359 /* Parse from @end - 1 to @start and set *@value to integer found.
360 * It returns -1 if not a number, 0 otherwise.
361 * @end should be > @start. */
363 revstr2num(unsigned char *start
, unsigned char *end
, int *value
)
369 if (!isdigit(*end
)) return -1; /* NaN */
370 val
+= (*end
- '0') * q
;
372 } while (end
> start
);
378 /* This function extracts code, major and minor version from string
379 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
380 * It returns a negative value on error, 0 on success.
383 get_http_code(struct read_buffer
*rb
, int *code
, struct http_version
*version
)
385 unsigned char *head
= rb
->data
;
386 unsigned char *start
;
393 while (*head
== ' ') head
++;
396 if (toupper(*head
) != 'H' || toupper(*++head
) != 'T' ||
397 toupper(*++head
) != 'T' || toupper(*++head
) != 'P'
404 while (*head
&& *head
!= '.') head
++;
406 if (!*head
|| !(head
- start
)
407 || (head
- start
) > 4
408 || !isdigit(*(head
+ 1)))
411 /* Extract major version number. */
412 if (revstr2num(start
, head
, &version
->major
)) return -3; /* NaN */
417 while (*head
&& *head
!= ' ') head
++;
419 if (!*head
|| !(head
- start
) || (head
- start
) > 4) return -4;
421 /* Extract minor version number. */
422 if (revstr2num(start
, head
, &version
->minor
)) return -5; /* NaN */
425 while (*head
== ' ') head
++;
427 /* Sanity check for code. */
428 if (head
[0] < '1' || head
[0] > '9' ||
431 return -6; /* Invalid code. */
434 *code
= (head
[0] - '0') * 100 + (head
[1] - '0') * 10 + head
[2] - '0';
440 check_http_server_bugs(struct uri
*uri
, struct http_connection_info
*http
,
443 unsigned char *server
;
444 const unsigned char *const *s
;
445 static const unsigned char *const buggy_servers
[] = {
448 "Netscape-Enterprise",
452 if (!get_opt_bool("protocol.http.bugs.allow_blacklist")
453 || HTTP_1_0(http
->sent_version
))
456 server
= parse_header(head
, "Server", NULL
);
460 for (s
= buggy_servers
; *s
; s
++) {
461 if (strstr(server
, *s
)) {
462 add_blacklist_entry(uri
, SERVER_BLACKLIST_HTTP10
);
472 http_end_request(struct connection
*conn
, enum connection_state state
,
475 shutdown_connection_stream(conn
);
477 if (conn
->info
&& !((struct http_connection_info
*) conn
->info
)->close
478 && (!conn
->socket
->ssl
) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive")
480 || !conn
->uri
->post
)) {
481 if (state
== S_OK
&& conn
->cached
)
482 normalize_cache_entry(conn
->cached
, !notrunc
? conn
->from
: -1);
483 set_connection_state(conn
, state
);
484 add_keepalive_connection(conn
, HTTP_KEEPALIVE_TIMEOUT
, NULL
);
486 abort_connection(conn
, state
);
490 static void http_send_header(struct socket
*);
493 http_protocol_handler(struct connection
*conn
)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn
)) {
498 make_connection(conn
->socket
, conn
->uri
, http_send_header
,
499 conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
);
501 http_send_header(conn
->socket
);
506 proxy_protocol_handler(struct connection
*conn
)
508 http_protocol_handler(conn
);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 struct http_connection_info
*
517 init_http_connection_info(struct connection
*conn
, int major
, int minor
, int close
)
519 struct http_connection_info
*http
;
521 http
= mem_calloc(1, sizeof(*http
));
523 http_end_request(conn
, S_OUT_OF_MEM
, 0);
527 http
->sent_version
.major
= major
;
528 http
->sent_version
.minor
= minor
;
531 /* The CGI code uses this too and blacklisting expects a host name. */
532 if (conn
->proxied_uri
->protocol
!= PROTOCOL_FILE
)
533 http
->bl_flags
= get_blacklist_flags(conn
->proxied_uri
);
535 if (http
->bl_flags
& SERVER_BLACKLIST_HTTP10
536 || get_opt_bool("protocol.http.bugs.http10")) {
537 http
->sent_version
.major
= 1;
538 http
->sent_version
.minor
= 0;
541 /* If called from HTTPS proxy connection the connection info might have
542 * already been allocated. */
543 mem_free_set(&conn
->info
, http
);
549 http_send_header(struct socket
*socket
)
551 struct connection
*conn
= socket
->conn
;
552 struct http_connection_info
*http
;
553 int trace
= get_opt_bool("protocol.http.trace");
554 struct string header
;
555 unsigned char *post_data
= NULL
;
556 struct auth_entry
*entry
= NULL
;
557 struct uri
*uri
= conn
->proxied_uri
; /* Set to the real uri */
558 unsigned char *optstr
;
559 int use_connect
, talking_to_proxy
;
561 /* Sanity check for a host */
562 if (!uri
|| !uri
->host
|| !*uri
->host
|| !uri
->hostlen
) {
563 http_end_request(conn
, S_BAD_URL
, 0);
567 http
= init_http_connection_info(conn
, 1, 1, 0);
570 if (!init_string(&header
)) {
571 http_end_request(conn
, S_OUT_OF_MEM
, 0);
575 if (!conn
->cached
) conn
->cached
= find_in_cache(uri
);
577 talking_to_proxy
= IS_PROXY_URI(conn
->uri
) && !conn
->socket
->ssl
;
578 use_connect
= connection_is_https_proxy(conn
) && !conn
->socket
->ssl
;
581 add_to_string(&header
, "TRACE ");
582 } else if (use_connect
) {
583 add_to_string(&header
, "CONNECT ");
584 /* In CONNECT requests, we send only a subset of the
585 * headers to the proxy. See the "CONNECT:" comments
586 * below. After the CONNECT request succeeds, we
587 * negotiate TLS with the real server and make a new
588 * HTTP request that includes all the headers. */
589 } else if (uri
->post
) {
590 add_to_string(&header
, "POST ");
591 conn
->unrestartable
= 1;
593 add_to_string(&header
, "GET ");
596 if (!talking_to_proxy
) {
597 add_char_to_string(&header
, '/');
601 /* Add port if it was specified or the default port */
602 add_uri_to_string(&header
, uri
, URI_HTTP_CONNECT
);
604 if (connection_is_https_proxy(conn
) && conn
->socket
->ssl
) {
605 add_url_to_http_string(&header
, uri
, URI_DATA
);
607 } else if (talking_to_proxy
) {
608 add_url_to_http_string(&header
, uri
, URI_PROXY
);
611 add_url_to_http_string(&header
, conn
->uri
, URI_DATA
);
615 add_to_string(&header
, " HTTP/");
616 add_long_to_string(&header
, http
->sent_version
.major
);
617 add_char_to_string(&header
, '.');
618 add_long_to_string(&header
, http
->sent_version
.minor
);
619 add_crlf_to_string(&header
);
621 /* CONNECT: Sending a Host header seems pointless as the same
622 * information is already in the CONNECT line. It's harmless
623 * though and Mozilla does it too. */
624 add_to_string(&header
, "Host: ");
625 add_uri_to_string(&header
, uri
, URI_HTTP_HOST
);
626 add_crlf_to_string(&header
);
628 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
629 if (talking_to_proxy
) {
630 unsigned char *user
= get_opt_str("protocol.http.proxy.user");
631 unsigned char *passwd
= get_opt_str("protocol.http.proxy.passwd");
633 if (proxy_auth
.digest
) {
634 unsigned char *response
;
635 int userlen
= int_min(strlen(user
), AUTH_USER_MAXLEN
- 1);
636 int passwordlen
= int_min(strlen(passwd
), AUTH_PASSWORD_MAXLEN
- 1);
639 memcpy(proxy_auth
.user
, user
, userlen
);
640 proxy_auth
.user
[userlen
] = '\0';
642 memcpy(proxy_auth
.password
, passwd
, passwordlen
);
643 proxy_auth
.password
[passwordlen
] = '\0';
645 /* FIXME: @uri is the proxied URI. Maybe the passed URI
646 * should be the proxy URI aka conn->uri. --jonas */
647 response
= get_http_auth_digest_response(&proxy_auth
, uri
);
649 add_to_string(&header
, "Proxy-Authorization: Digest ");
650 add_to_string(&header
, response
);
651 add_crlf_to_string(&header
);
658 unsigned char *proxy_data
;
660 proxy_data
= straconcat(user
, ":", passwd
, (unsigned char *) NULL
);
662 unsigned char *proxy_64
= base64_encode(proxy_data
);
665 add_to_string(&header
, "Proxy-Authorization: Basic ");
666 add_to_string(&header
, proxy_64
);
667 add_crlf_to_string(&header
);
670 mem_free(proxy_data
);
676 /* CONNECT: User-Agent does not reveal anything about the
677 * resource we're fetching, and it may help the proxy return
678 * better error messages. */
679 optstr
= get_opt_str("protocol.http.user_agent");
680 if (*optstr
&& strcmp(optstr
, " ")) {
681 unsigned char *ustr
, ts
[64] = "";
683 add_to_string(&header
, "User-Agent: ");
685 if (!list_empty(terminals
)) {
686 unsigned int tslen
= 0;
687 struct terminal
*term
= terminals
.prev
;
689 ulongcat(ts
, &tslen
, term
->width
, 3, 0);
691 ulongcat(ts
, &tslen
, term
->height
, 3, 0);
693 ustr
= subst_user_agent(optstr
, VERSION_STRING
, system_name
,
697 add_to_string(&header
, ustr
);
701 add_crlf_to_string(&header
);
704 /* CONNECT: Referer probably is a secret page in the HTTPS
705 * server, so don't reveal it to the proxy. */
707 switch (get_opt_int("protocol.http.referer.policy")) {
713 optstr
= get_opt_str("protocol.http.referer.fake");
714 if (!optstr
[0]) break;
715 add_to_string(&header
, "Referer: ");
716 add_to_string(&header
, optstr
);
717 add_crlf_to_string(&header
);
721 if (!conn
->referrer
) break;
722 add_to_string(&header
, "Referer: ");
723 add_url_to_http_string(&header
, conn
->referrer
, URI_HTTP_REFERRER
);
724 add_crlf_to_string(&header
);
727 case REFERER_SAME_URL
:
728 add_to_string(&header
, "Referer: ");
729 add_url_to_http_string(&header
, uri
, URI_HTTP_REFERRER
);
730 add_crlf_to_string(&header
);
735 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
736 * because they do not reveal anything about the resource
737 * we're going to request via TLS, and they may affect the
738 * error message if the CONNECT request fails.
740 * If ELinks is ever changed to vary its Accept headers based
741 * on what it intends to do with the returned resource, e.g.
742 * sending "Accept: text/css" when it wants an external
743 * stylesheet, then it should do that only in the inner GET
744 * and not in the outer CONNECT. */
745 add_to_string(&header
, "Accept: */*");
746 add_crlf_to_string(&header
);
748 /* TODO: Make this encoding.c function. */
749 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2)
750 add_to_string(&header
, "Accept-Encoding: ");
753 add_to_string(&header
, "bzip2");
759 add_to_string(&header
, ", ");
762 add_to_string(&header
, "gzip");
764 add_crlf_to_string(&header
);
767 if (!accept_charset
) {
768 init_accept_charset();
771 if (!(http
->bl_flags
& SERVER_BLACKLIST_NO_CHARSET
)
772 && !get_opt_bool("protocol.http.bugs.accept_charset")
774 add_to_string(&header
, accept_charset
);
777 optstr
= get_opt_str("protocol.http.accept_language");
779 add_to_string(&header
, "Accept-Language: ");
780 add_to_string(&header
, optstr
);
781 add_crlf_to_string(&header
);
784 else if (get_opt_bool("protocol.http.accept_ui_language")) {
785 unsigned char *code
= language_to_iso639(current_language
);
788 add_to_string(&header
, "Accept-Language: ");
789 add_to_string(&header
, code
);
790 add_crlf_to_string(&header
);
795 /* CONNECT: Proxy-Connection is intended to be seen by the
796 * proxy. If the CONNECT request succeeds, then the proxy
797 * will forward the remainder of the TCP connection to the
798 * origin server, and Proxy-Connection does not matter; but
799 * if the request fails, then Proxy-Connection may matter. */
800 /* FIXME: What about post-HTTP/1.1?? --Zas */
801 if (HTTP_1_1(http
->sent_version
)) {
802 if (!IS_PROXY_URI(conn
->uri
)) {
803 add_to_string(&header
, "Connection: ");
805 add_to_string(&header
, "Proxy-Connection: ");
808 if (!uri
->post
|| !get_opt_bool("protocol.http.bugs.post_no_keepalive")) {
809 add_to_string(&header
, "Keep-Alive");
811 add_to_string(&header
, "close");
813 add_crlf_to_string(&header
);
816 /* CONNECT: Do not tell the proxy anything we have cached
817 * about the resource. */
818 if (!use_connect
&& conn
->cached
) {
819 if (!conn
->cached
->incomplete
&& conn
->cached
->head
820 && conn
->cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
) {
821 if (conn
->cached
->last_modified
) {
822 add_to_string(&header
, "If-Modified-Since: ");
823 add_to_string(&header
, conn
->cached
->last_modified
);
824 add_crlf_to_string(&header
);
826 if (conn
->cached
->etag
) {
827 add_to_string(&header
, "If-None-Match: ");
828 add_to_string(&header
, conn
->cached
->etag
);
829 add_crlf_to_string(&header
);
834 /* CONNECT: Let's send cache control headers to the proxy too;
835 * they may affect DNS caching. */
836 if (conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
) {
837 add_to_string(&header
, "Pragma: no-cache");
838 add_crlf_to_string(&header
);
839 add_to_string(&header
, "Cache-Control: no-cache");
840 add_crlf_to_string(&header
);
843 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
844 * do anything good with that information anyway. */
845 if (!use_connect
&& (conn
->from
|| conn
->progress
->start
> 0)) {
846 /* conn->from takes precedence. conn->progress.start is set only the first
847 * time, then conn->from gets updated and in case of any retries
848 * etc we have everything interesting in conn->from already. */
849 add_to_string(&header
, "Range: bytes=");
850 add_long_to_string(&header
, conn
->from
? conn
->from
: conn
->progress
->start
);
851 add_char_to_string(&header
, '-');
852 add_crlf_to_string(&header
);
855 /* CONNECT: The Authorization header is for the origin server only. */
858 if (http_negotiate_output(uri
, &header
) != 0)
860 entry
= find_auth(uri
);
865 unsigned char *response
;
867 response
= get_http_auth_digest_response(entry
, uri
);
869 add_to_string(&header
, "Authorization: Digest ");
870 add_to_string(&header
, response
);
871 add_crlf_to_string(&header
);
877 /* RFC2617 section 2 [Basic Authentication Scheme]
879 * To receive authorization, the client sends the userid
880 * and password, separated by a single colon (":")
881 * character, within a base64 [7] encoded string in the
885 /* Create base64 encoded string. */
886 id
= straconcat(entry
->user
, ":", entry
->password
,
887 (unsigned char *) NULL
);
889 unsigned char *base64
= base64_encode(id
);
891 mem_free_set(&id
, base64
);
895 add_to_string(&header
, "Authorization: Basic ");
896 add_to_string(&header
, id
);
897 add_crlf_to_string(&header
);
903 /* CONNECT: Any POST data is for the origin server only. */
904 if (!use_connect
&& uri
->post
) {
905 /* We search for first '\n' in uri->post to get content type
906 * as set by get_form_uri(). This '\n' is dropped if any
907 * and replaced by correct '\r\n' termination here. */
908 unsigned char *postend
= strchr(uri
->post
, '\n');
911 add_to_string(&header
, "Content-Type: ");
912 add_bytes_to_string(&header
, uri
->post
, postend
- uri
->post
);
913 add_crlf_to_string(&header
);
916 post_data
= postend
? postend
+ 1 : uri
->post
;
917 add_to_string(&header
, "Content-Length: ");
918 add_long_to_string(&header
, strlen(post_data
) / 2);
919 add_crlf_to_string(&header
);
922 #ifdef CONFIG_COOKIES
923 /* CONNECT: Cookies are for the origin server only. */
925 struct string
*cookies
= send_cookies(uri
);
928 add_to_string(&header
, "Cookie: ");
929 add_string_to_string(&header
, cookies
);
930 add_crlf_to_string(&header
);
931 done_string(cookies
);
936 add_crlf_to_string(&header
);
938 /* CONNECT: Any POST data is for the origin server only.
939 * This was already checked above and post_data is NULL
940 * in that case. Verified with an assertion below. */
942 #define POST_BUFFER_SIZE 4096
943 unsigned char *post
= post_data
;
944 unsigned char buffer
[POST_BUFFER_SIZE
];
947 assert(!use_connect
); /* see comment above */
949 while (post
[0] && post
[1]) {
953 assertm(h1
>= 0 && h1
< 16, "h1 in the POST buffer is %d (%d/%c)", h1
, post
[0], post
[0]);
954 if_assert_failed h1
= 0;
957 assertm(h2
>= 0 && h2
< 16, "h2 in the POST buffer is %d (%d/%c)", h2
, post
[1], post
[1]);
958 if_assert_failed h2
= 0;
960 buffer
[n
++] = (h1
<<4) + h2
;
962 if (n
== POST_BUFFER_SIZE
) {
963 add_bytes_to_string(&header
, buffer
, n
);
969 add_bytes_to_string(&header
, buffer
, n
);
970 #undef POST_BUFFER_SIZE
973 request_from_socket(socket
, header
.source
, header
.length
, S_SENT
,
974 SOCKET_END_ONCLOSE
, http_got_header
);
975 done_string(&header
);
979 /* This function decompresses the data block given in @data (if it was
980 * compressed), which is long @len bytes. The decompressed data block is given
981 * back to the world as the return value and its length is stored into
984 * In this function, value of either http->chunk_remaining or http->length is
985 * being changed (it depends on if chunked mode is used or not).
987 * Note that the function is still a little esotheric for me. Don't take it
988 * lightly and don't mess with it without grave reason! If you dare to touch
989 * this without testing the changes on slashdot, freshmeat and cvsweb
990 * (including revision history), don't dare to send me any patches! ;) --pasky
992 * This function gotta die. */
993 static unsigned char *
994 decompress_data(struct connection
*conn
, unsigned char *data
, int len
,
997 struct http_connection_info
*http
= conn
->info
;
998 /* to_read is number of bytes to be read from the decoder. It is 65536
999 * (then we are just emptying the decoder buffer as we finished the walk
1000 * through the incoming stream already) or PIPE_BUF / 2 (when we are
1001 * still walking through the stream - then we write PIPE_BUF / 2 to the
1002 * pipe and read it back to the decoder ASAP; the point is that we can't
1003 * write more than PIPE_BUF to the pipe at once, but we also have to
1004 * never let read_encoded() (gzread(), in fact) to empty the pipe - that
1005 * causes further malfunction of zlib :[ ... so we will make sure that
1006 * we will always have at least PIPE_BUF / 2 + 1 in the pipe (returning
1007 * early otherwise)). */
1008 enum { NORMAL
, FINISHING
} state
= NORMAL
;
1010 int *length_of_block
;
1011 unsigned char *output
= NULL
;
1013 length_of_block
= (http
->length
== LEN_CHUNKED
? &http
->chunk_remaining
1016 #define BIG_READ 65536
1017 if (!*length_of_block
) {
1018 /* Going to finish this decoding bussiness. */
1022 if (conn
->content_encoding
== ENCODING_NONE
) {
1024 if (*length_of_block
> 0) *length_of_block
-= len
;
1028 *new_len
= 0; /* new_len must be zero if we would ever return NULL */
1030 if (conn
->stream_pipes
[0] == -1
1031 && (c_pipe(conn
->stream_pipes
) < 0
1032 || set_nonblocking_fd(conn
->stream_pipes
[0]) < 0
1033 || set_nonblocking_fd(conn
->stream_pipes
[1]) < 0)) {
1038 /* The initial value is used only when state == NORMAL.
1039 * Unconditional initialization avoids a GCC warning. */
1040 int to_read
= PIPE_BUF
/ 2;
1042 if (state
== NORMAL
) {
1043 /* ... we aren't finishing yet. */
1046 written
= safe_write(conn
->stream_pipes
[1], data
,
1047 len
> to_read
? to_read
: len
);
1053 /* In non-keep-alive connections http->length == -1, so the test below */
1054 if (*length_of_block
> 0)
1055 *length_of_block
-= written
;
1056 /* http->length is 0 at the end of block for all modes: keep-alive,
1057 * non-keep-alive and chunked */
1058 if (!http
->length
) {
1059 /* That's all, folks - let's finish this. */
1062 /* We've done for this round (but not done
1063 * completely). Thus we will get out with
1064 * what we have and leave what we wrote to
1065 * the next round - we have to do that since
1066 * we MUST NOT ever empty the pipe completely
1067 * - this would cause a disaster for
1068 * read_encoded(), which would simply not
1069 * work right then. */
1075 if (!conn
->stream
) {
1076 conn
->stream
= open_encoded(conn
->stream_pipes
[0],
1077 conn
->content_encoding
);
1078 if (!conn
->stream
) return NULL
;
1081 output
= (unsigned char *) mem_realloc(output
, *new_len
+ BIG_READ
);
1084 did_read
= read_encoded(conn
->stream
, output
+ *new_len
, BIG_READ
);
1086 if (did_read
> 0) *new_len
+= did_read
;
1087 else if (did_read
== -1) {
1088 mem_free_set(&output
, NULL
);
1090 break; /* Loop prevention (bug 517), is this correct ? --Zas */
1092 } while (len
|| did_read
== BIG_READ
);
1094 shutdown_connection_stream(conn
);
1099 is_line_in_buffer(struct read_buffer
*rb
)
1103 for (l
= 0; l
< rb
->length
; l
++) {
1104 unsigned char a0
= rb
->data
[l
];
1108 if (a0
== ASCII_CR
) {
1109 if (rb
->data
[l
+ 1] == ASCII_LF
1110 && l
< rb
->length
- 1)
1112 if (l
== rb
->length
- 1)
1121 static void read_http_data(struct socket
*socket
, struct read_buffer
*rb
);
1124 read_more_http_data(struct connection
*conn
, struct read_buffer
*rb
,
1125 int already_got_anything
)
1127 enum connection_state state
= already_got_anything
? S_TRANS
: conn
->state
;
1129 read_from_socket(conn
->socket
, rb
, state
, read_http_data
);
1133 read_http_data_done(struct connection
*conn
)
1135 struct http_connection_info
*http
= conn
->info
;
1137 /* There's no content but an error so just print
1138 * that instead of nothing. */
1140 if (http
->code
>= 400) {
1141 http_error_document(conn
, http
->code
);
1144 /* This is not an error, thus fine. No need generate any
1145 * document, as this may be empty and it's not a problem.
1146 * In case of 3xx, we're probably just getting kicked to
1147 * another page anyway. And in case of 2xx, the document
1148 * may indeed be empty and thus the user should see it so. */
1152 http_end_request(conn
, S_OK
, 0);
1161 read_chunked_http_data(struct connection
*conn
, struct read_buffer
*rb
)
1163 struct http_connection_info
*http
= conn
->info
;
1164 int total_data_len
= 0;
1167 /* Chunked. Good luck! */
1168 /* See RFC2616, section 3.6.1. Basically, it looks like:
1169 * 1234 ; a = b ; c = d\r\n
1170 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1173 if (http
->chunk_remaining
== CHUNK_DATA_END
) {
1174 int l
= is_line_in_buffer(rb
);
1178 /* Invalid character in buffer. */
1182 /* Remove everything to the EOLN. */
1183 kill_buffer_data(rb
, l
);
1191 } else if (http
->chunk_remaining
== CHUNK_SIZE
) {
1192 int l
= is_line_in_buffer(rb
);
1200 n
= strtol(rb
->data
, (char **) &de
, 16);
1201 if (errno
|| !*de
) {
1206 if (l
== -1 || de
== rb
->data
) {
1210 /* Remove everything to the EOLN. */
1211 kill_buffer_data(rb
, l
);
1212 http
->chunk_remaining
= n
;
1213 if (!http
->chunk_remaining
)
1214 http
->chunk_remaining
= CHUNK_ZERO_SIZE
;
1219 unsigned char *data
;
1222 int zero
= (http
->chunk_remaining
== CHUNK_ZERO_SIZE
);
1224 if (zero
) http
->chunk_remaining
= 0;
1225 len
= http
->chunk_remaining
;
1227 /* Maybe everything necessary didn't come yet.. */
1228 int_upper_bound(&len
, rb
->length
);
1229 conn
->received
+= len
;
1231 data
= decompress_data(conn
, rb
->data
, len
, &data_len
);
1233 if (add_fragment(conn
->cached
, conn
->from
,
1234 data
, data_len
) == 1)
1237 if (data
&& data
!= rb
->data
) mem_free(data
);
1239 conn
->from
+= data_len
;
1240 total_data_len
+= data_len
;
1242 kill_buffer_data(rb
, len
);
1245 /* Last chunk has zero length, so this is last
1246 * chunk, we finished decompression just now
1247 * and now we can happily finish reading this
1249 http
->chunk_remaining
= CHUNK_DATA_END
;
1253 if (!http
->chunk_remaining
&& rb
->length
> 0) {
1254 /* Eat newline succeeding each chunk. */
1255 if (rb
->data
[0] == ASCII_LF
) {
1256 kill_buffer_data(rb
, 1);
1258 if (rb
->data
[0] != ASCII_CR
1260 && rb
->data
[1] != ASCII_LF
)) {
1263 if (rb
->length
< 2) break;
1264 kill_buffer_data(rb
, 2);
1266 http
->chunk_remaining
= CHUNK_SIZE
;
1274 return !!total_data_len
;
1277 /* Returns 0 if more data, 1 if done. */
1279 read_normal_http_data(struct connection
*conn
, struct read_buffer
*rb
)
1281 struct http_connection_info
*http
= conn
->info
;
1282 unsigned char *data
;
1284 int len
= rb
->length
;
1286 if (http
->length
>= 0 && http
->length
< len
) {
1287 /* We won't read more than we have to go. */
1291 conn
->received
+= len
;
1293 data
= decompress_data(conn
, rb
->data
, len
, &data_len
);
1295 if (add_fragment(conn
->cached
, conn
->from
, data
, data_len
) == 1)
1298 if (data
&& data
!= rb
->data
) mem_free(data
);
1300 conn
->from
+= data_len
;
1302 kill_buffer_data(rb
, len
);
1304 if (!http
->length
&& conn
->socket
->state
== SOCKET_RETRY_ONCLOSE
) {
1312 read_http_data(struct socket
*socket
, struct read_buffer
*rb
)
1314 struct connection
*conn
= socket
->conn
;
1315 struct http_connection_info
*http
= conn
->info
;
1318 if (socket
->state
== SOCKET_CLOSED
) {
1319 if (conn
->content_encoding
&& http
->length
== -1) {
1320 /* Flush decompression first. */
1323 read_http_data_done(conn
);
1328 if (http
->length
!= LEN_CHUNKED
) {
1329 ret
= read_normal_http_data(conn
, rb
);
1332 ret
= read_chunked_http_data(conn
, rb
);
1337 read_more_http_data(conn
, rb
, 0);
1340 read_more_http_data(conn
, rb
, 1);
1343 read_http_data_done(conn
);
1346 assertm(ret
== -1, "Unexpected return value: %d", ret
);
1347 abort_connection(conn
, S_HTTP_ERROR
);
1351 /* Returns offset of the header end, zero if more data is needed, -1 when
1352 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1355 get_header(struct read_buffer
*rb
)
1359 /* XXX: We will have to do some guess about whether an HTTP header is
1360 * coming or not, in order to support HTTP/0.9 reply correctly. This
1361 * means a little code duplication with get_http_code(). --pasky */
1362 if (rb
->length
> 4 && strncasecmp(rb
->data
, "HTTP/", 5))
1365 for (i
= 0; i
< rb
->length
; i
++) {
1366 unsigned char a0
= rb
->data
[i
];
1367 unsigned char a1
= rb
->data
[i
+ 1];
1373 if (a0
== ASCII_LF
&& a1
== ASCII_LF
1374 && i
< rb
->length
- 1)
1376 if (a0
== ASCII_CR
&& i
< rb
->length
- 3) {
1377 if (a1
== ASCII_CR
) continue;
1378 if (a1
!= ASCII_LF
) return -1;
1379 if (rb
->data
[i
+ 2] == ASCII_CR
) {
1380 if (rb
->data
[i
+ 3] != ASCII_LF
) return -1;
1389 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1391 check_http_authentication(struct connection
*conn
, struct uri
*uri
,
1392 unsigned char *header
, unsigned char *header_field
)
1394 unsigned char *str
, *d
;
1397 d
= parse_header(header
, header_field
, &str
);
1399 if (!strncasecmp(d
, "Basic", 5)) {
1400 unsigned char *realm
= get_header_param(d
, "realm");
1403 add_auth_entry(uri
, realm
, NULL
, NULL
, 0);
1408 } else if (!strncasecmp(d
, "Digest", 6)) {
1409 unsigned char *realm
= get_header_param(d
, "realm");
1410 unsigned char *nonce
= get_header_param(d
, "nonce");
1411 unsigned char *opaque
= get_header_param(d
, "opaque");
1413 add_auth_entry(uri
, realm
, nonce
, opaque
, 1);
1417 mem_free_if(opaque
);
1421 #ifdef CONFIG_GSSAPI
1422 else if (!strncasecmp(d
, HTTPNEG_GSS_STR
, HTTPNEG_GSS_STRLEN
)) {
1423 if (http_negotiate_input(conn
, uri
, HTTPNEG_GSS
, str
)==0)
1428 else if (!strncasecmp(d
, HTTPNEG_NEG_STR
, HTTPNEG_NEG_STRLEN
)) {
1429 if (http_negotiate_input(conn
, uri
, HTTPNEG_NEG
, str
)==0)
1436 d
= parse_header(str
, header_field
, &str
);
1443 http_got_header(struct socket
*socket
, struct read_buffer
*rb
)
1445 struct connection
*conn
= socket
->conn
;
1446 struct http_connection_info
*http
= conn
->info
;
1447 unsigned char *head
;
1448 #ifdef CONFIG_COOKIES
1449 unsigned char *cookie
, *ch
;
1452 struct uri
*uri
= conn
->proxied_uri
; /* Set to the real uri */
1453 struct http_version version
;
1454 enum connection_state state
= (conn
->state
!= S_PROC
? S_GETH
: S_PROC
);
1458 if (socket
->state
== SOCKET_CLOSED
) {
1459 if (!conn
->tries
&& uri
->host
) {
1460 if (http
->bl_flags
& SERVER_BLACKLIST_NO_CHARSET
) {
1461 del_blacklist_entry(uri
, SERVER_BLACKLIST_NO_CHARSET
);
1463 add_blacklist_entry(uri
, SERVER_BLACKLIST_NO_CHARSET
);
1467 retry_connection(conn
, S_CANT_READ
);
1470 socket
->state
= SOCKET_RETRY_ONCLOSE
;
1475 abort_connection(conn
, S_HTTP_ERROR
);
1479 read_from_socket(conn
->socket
, rb
, state
, http_got_header
);
1483 if ((a
&& get_http_code(rb
, &h
, &version
))
1485 abort_connection(conn
, S_HTTP_ERROR
);
1489 /* When no header, HTTP/0.9 document. That's always text/html,
1491 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1492 /* FIXME: This usage of fake protocol headers for setting up the
1493 * content type has been obsoleted by the @content_type member of
1494 * {struct cache_entry}. */
1495 head
= (a
? memacpy(rb
->data
, a
)
1496 : stracpy("\r\nContent-Type: text/html\r\n"));
1498 abort_connection(conn
, S_OUT_OF_MEM
);
1502 if (check_http_server_bugs(uri
, http
, head
)) {
1504 retry_connection(conn
, S_RESTART
);
1509 if (uri
->protocol
== PROTOCOL_FILE
) {
1510 /* ``Status'' is not a standard HTTP header field although some
1511 * HTTP servers like www.php.net uses it for some reason. It should
1512 * only be used for CGI scripts so that it does not interfere
1513 * with status code depended handling for ``normal'' HTTP like
1515 d
= parse_header(head
, "Status", NULL
);
1520 if (h2
>= 100 && h2
< 600) h
= h2
;
1523 abort_connection(conn
, S_HTTP_ERROR
);
1530 #ifdef CONFIG_COOKIES
1532 while ((cookie
= parse_header(ch
, "Set-Cookie", &ch
))) {
1533 set_cookie(uri
, cookie
);
1542 kill_buffer_data(rb
, a
);
1547 abort_connection(conn
, S_HTTP_ERROR
);
1552 http_end_request(conn
, S_OK
, 1);
1557 http_end_request(conn
, S_HTTP_204
, 0);
1560 if (h
== 200 && connection_is_https_proxy(conn
) && !conn
->socket
->ssl
) {
1563 socket
->need_ssl
= 1;
1564 complete_connect_socket(socket
, uri
, http_send_header
);
1566 abort_connection(conn
, S_SSL_ERROR
);
1571 conn
->cached
= get_cache_entry(conn
->uri
);
1572 if (!conn
->cached
) {
1574 abort_connection(conn
, S_OUT_OF_MEM
);
1577 mem_free_set(&conn
->cached
->head
, head
);
1579 if (!get_opt_bool("document.cache.ignore_cache_control")) {
1580 struct cache_entry
*cached
= conn
->cached
;
1582 /* I am not entirely sure in what order we should process these
1583 * headers and if we should still process Cache-Control max-age
1584 * if we already set max age to date mentioned in Expires.
1586 /* Ensure that when ever cached->max_age is set, cached->expired
1587 * is also set, so the cache management knows max_age contains a
1588 * valid time. If on the other hand no caching is requested
1589 * cached->expire should be set to zero. */
1590 if ((d
= parse_header(cached
->head
, "Expires", NULL
))) {
1591 /* Convert date to seconds. */
1592 time_t expires
= parse_date(&d
, NULL
, 0, 1);
1596 if (expires
&& cached
->cache_mode
!= CACHE_MODE_NEVER
) {
1597 timeval_from_seconds(&cached
->max_age
, expires
);
1602 if ((d
= parse_header(cached
->head
, "Pragma", NULL
))) {
1603 if (strstr(d
, "no-cache")) {
1604 cached
->cache_mode
= CACHE_MODE_NEVER
;
1610 if (cached
->cache_mode
!= CACHE_MODE_NEVER
1611 && (d
= parse_header(cached
->head
, "Cache-Control", NULL
))) {
1612 if (strstr(d
, "no-cache") || strstr(d
, "must-revalidate")) {
1613 cached
->cache_mode
= CACHE_MODE_NEVER
;
1617 unsigned char *pos
= strstr(d
, "max-age=");
1619 assert(cached
->cache_mode
!= CACHE_MODE_NEVER
);
1622 /* Grab the number of seconds. */
1625 timeval_from_seconds(&max_age
, atol(pos
+ 8));
1626 timeval_now(&cached
->max_age
);
1627 timeval_add_interval(&cached
->max_age
, &max_age
);
1637 /* XXX: Is there some reason why NOT to follow the Location header
1638 * for any status? If the server didn't mean it, it wouldn't send
1639 * it, after all...? --pasky */
1640 if (h
== 201 || h
== 301 || h
== 302 || h
== 303 || h
== 307) {
1641 d
= parse_header(conn
->cached
->head
, "Location", NULL
);
1643 int use_get_method
= (h
== 303);
1645 /* A note from RFC 2616 section 10.3.3:
1646 * RFC 1945 and RFC 2068 specify that the client is not
1647 * allowed to change the method on the redirected
1648 * request. However, most existing user agent
1649 * implementations treat 302 as if it were a 303
1650 * response, performing a GET on the Location
1651 * field-value regardless of the original request
1653 /* So POST must not be redirected to GET, but some
1654 * BUGGY message boards rely on it :-( */
1656 && get_opt_bool("protocol.http.bugs.broken_302_redirect"))
1659 redirect_cache(conn
->cached
, d
, use_get_method
, -1);
1665 if (check_http_authentication(conn
, uri
,
1666 conn
->cached
->head
, "WWW-Authenticate")) {
1667 retry_connection(conn
, S_RESTART
);
1675 d
= parse_header(conn
->cached
->head
, "Proxy-Authenticate", &str
);
1677 if (!strncasecmp(d
, "Basic", 5)) {
1678 unsigned char *realm
= get_header_param(d
, "realm");
1681 mem_free_set(&proxy_auth
.realm
, realm
);
1682 proxy_auth
.digest
= 0;
1687 } else if (!strncasecmp(d
, "Digest", 6)) {
1688 unsigned char *realm
= get_header_param(d
, "realm");
1689 unsigned char *nonce
= get_header_param(d
, "nonce");
1690 unsigned char *opaque
= get_header_param(d
, "opaque");
1692 mem_free_set(&proxy_auth
.realm
, realm
);
1693 mem_free_set(&proxy_auth
.nonce
, nonce
);
1694 mem_free_set(&proxy_auth
.opaque
, opaque
);
1695 proxy_auth
.digest
= 1;
1702 d
= parse_header(str
, "Proxy-Authenticate", &str
);
1706 kill_buffer_data(rb
, a
);
1709 http
->recv_version
= version
;
1711 if ((d
= parse_header(conn
->cached
->head
, "Connection", NULL
))
1712 || (d
= parse_header(conn
->cached
->head
, "Proxy-Connection", NULL
))) {
1713 if (!strcasecmp(d
, "close")) http
->close
= 1;
1715 } else if (PRE_HTTP_1_1(version
)) {
1721 d
= parse_header(conn
->cached
->head
, "Content-Range", NULL
);
1723 if (strlen(d
) > 6) {
1725 if (isdigit(d
[6]) && !strcasecmp(d
, "bytes")) {
1729 f
= strtol(d
+ 6, NULL
, 10);
1731 if (!errno
&& f
>= 0) conn
->from
= f
;
1736 if (cf
&& !conn
->from
&& !conn
->unrestartable
) conn
->unrestartable
= 1;
1737 if ((conn
->progress
->start
<= 0 && conn
->from
> cf
) || conn
->from
< 0) {
1738 /* We don't want this if conn->progress.start because then conn->from will
1739 * be probably value of conn->progress.start, while cf is 0. */
1740 abort_connection(conn
, S_HTTP_ERROR
);
1747 foreach (s
, conn
->downloads
) {
1748 fprintf(stderr
, "conn %p status %p pri %d st %d er %d :: ce %s",
1749 conn
, s
, s
->pri
, s
->state
, s
->prev_error
,
1750 s
->cached
? s
->cached
->url
: (unsigned char *) "N-U-L-L");
1755 if (conn
->progress
->start
>= 0) {
1756 /* Update to the real value which we've got from Content-Range. */
1757 conn
->progress
->seek
= conn
->from
;
1759 conn
->progress
->start
= conn
->from
;
1761 d
= parse_header(conn
->cached
->head
, "Content-Length", NULL
);
1767 l
= strtol(d
, (char **) &ep
, 10);
1769 if (!errno
&& !*ep
&& l
>= 0) {
1770 if (!http
->close
|| POST_HTTP_1_0(version
))
1772 conn
->est_length
= conn
->from
+ l
;
1777 if (!conn
->unrestartable
) {
1778 d
= parse_header(conn
->cached
->head
, "Accept-Ranges", NULL
);
1781 if (!strcasecmp(d
, "none"))
1782 conn
->unrestartable
= 1;
1786 conn
->unrestartable
= 1;
1790 d
= parse_header(conn
->cached
->head
, "Transfer-Encoding", NULL
);
1792 if (!strcasecmp(d
, "chunked")) {
1793 http
->length
= LEN_CHUNKED
;
1794 http
->chunk_remaining
= CHUNK_SIZE
;
1798 if (!http
->close
&& http
->length
== -1) http
->close
= 1;
1800 d
= parse_header(conn
->cached
->head
, "Last-Modified", NULL
);
1802 if (conn
->cached
->last_modified
&& strcasecmp(conn
->cached
->last_modified
, d
)) {
1803 delete_entry_content(conn
->cached
);
1807 retry_connection(conn
, S_MODIFIED
);
1811 if (!conn
->cached
->last_modified
) conn
->cached
->last_modified
= d
;
1814 if (!conn
->cached
->last_modified
) {
1815 d
= parse_header(conn
->cached
->head
, "Date", NULL
);
1816 if (d
) conn
->cached
->last_modified
= d
;
1819 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1820 d
= parse_header(conn
->cached
->head
, "ETag", NULL
);
1822 if (conn
->cached
->etag
) {
1823 unsigned char *old_tag
= conn
->cached
->etag
;
1824 unsigned char *new_tag
= d
;
1826 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1828 if (new_tag
[0] == 'W' && new_tag
[1] == '/')
1831 if (old_tag
[0] == 'W' && old_tag
[1] == '/')
1834 if (strcmp(new_tag
, old_tag
)) {
1835 delete_entry_content(conn
->cached
);
1839 retry_connection(conn
, S_MODIFIED
);
1845 if (!conn
->cached
->etag
)
1846 conn
->cached
->etag
= d
;
1851 d
= parse_header(conn
->cached
->head
, "Content-Encoding", NULL
);
1853 unsigned char *extension
= get_extension_from_uri(uri
);
1854 enum stream_encoding file_encoding
;
1856 file_encoding
= extension
? guess_encoding(extension
) : ENCODING_NONE
;
1857 mem_free_if(extension
);
1859 /* If the content is encoded, we want to preserve the encoding
1860 * if it is implied by the extension, so that saving the URI
1861 * will leave the saved file with the correct encoding. */
1863 if (file_encoding
!= ENCODING_GZIP
1864 && (!strcasecmp(d
, "gzip") || !strcasecmp(d
, "x-gzip")))
1865 conn
->content_encoding
= ENCODING_GZIP
;
1869 if (file_encoding
!= ENCODING_BZIP2
1870 && (!strcasecmp(d
, "bzip2") || !strcasecmp(d
, "x-bzip2")))
1871 conn
->content_encoding
= ENCODING_BZIP2
;
1877 if (conn
->content_encoding
!= ENCODING_NONE
) {
1878 mem_free_if(conn
->cached
->encoding_info
);
1879 conn
->cached
->encoding_info
= stracpy(get_encoding_name(conn
->content_encoding
));
1882 if (http
->length
== -1
1883 || (PRE_HTTP_1_1(http
->recv_version
) && http
->close
))
1884 socket
->state
= SOCKET_END_ONCLOSE
;
1886 read_http_data(socket
, rb
);