1 /* Internal "http" protocol implementation */
17 #include "cache/cache.h"
18 #include "config/options.h"
19 #include "cookies/cookies.h"
20 #include "intl/charsets.h"
21 #include "intl/gettext/libintl.h"
22 #include "main/module.h"
23 #include "network/connection.h"
24 #include "network/progress.h"
25 #include "network/socket.h"
26 #include "osdep/ascii.h"
27 #include "osdep/osdep.h"
28 #include "osdep/sysname.h"
29 #include "protocol/auth/auth.h"
30 #include "protocol/auth/digest.h"
31 #include "protocol/date.h"
32 #include "protocol/header.h"
33 #include "protocol/http/blacklist.h"
34 #include "protocol/http/codes.h"
35 #include "protocol/http/http.h"
36 #include "protocol/uri.h"
37 #include "session/session.h"
38 #include "terminal/terminal.h"
39 #include "util/base64.h"
40 #include "util/conv.h"
41 #include "util/memory.h"
42 #include "util/string.h"
45 #include "http_negotiate.h"
48 /* These macros concern the struct http_version defined in the http.h */
49 #define HTTP_0_9(x) ((x).major == 0 && (x).minor == 9)
50 #define HTTP_1_0(x) ((x).major == 1 && (x).minor == 0)
51 #define HTTP_1_1(x) ((x).major == 1 && (x).minor == 1)
52 #define PRE_HTTP_1_0(x) ((x).major < 1)
53 #define PRE_HTTP_1_1(x) (PRE_HTTP_1_0(x) || HTTP_1_0(x))
54 #define POST_HTTP_1_0(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 0))
55 #define POST_HTTP_1_1(x) ((x).major > 1 || ((x).major == 1 && (x).minor > 1))
58 #define LEN_CHUNKED -2 /* == we get data in unknown number of chunks */
59 #define LEN_FINISHED 0
61 /* Either bytes coming in this chunk yet or "parser state". */
62 #define CHUNK_DATA_END -3
63 #define CHUNK_ZERO_SIZE -2
66 static struct auth_entry proxy_auth
;
68 static unsigned char *accept_charset
= NULL
;
71 static struct option_info http_options
[] = {
72 INIT_OPT_TREE("protocol", N_("HTTP"),
74 N_("HTTP-specific options.")),
77 INIT_OPT_TREE("protocol.http", N_("Server bug workarounds"),
79 N_("Server-side HTTP bugs workarounds.")),
81 INIT_OPT_BOOL("protocol.http.bugs", N_("Do not send Accept-Charset"),
82 "accept_charset", 0, 1,
83 N_("The Accept-Charset header is quite long and sending it can trigger\n"
84 "bugs in some rarely found servers.")),
86 INIT_OPT_BOOL("protocol.http.bugs", N_("Allow blacklisting"),
87 "allow_blacklist", 0, 1,
88 N_("Allow blacklisting of buggy servers.")),
90 INIT_OPT_BOOL("protocol.http.bugs", N_("Broken 302 redirects"),
91 "broken_302_redirect", 0, 1,
92 N_("Broken 302 redirect (violates RFC but compatible with Netscape).\n"
93 "This is a problem for a lot of web discussion boards and the like.\n"
94 "If they will do strange things to you, try to play with this.")),
96 INIT_OPT_BOOL("protocol.http.bugs", N_("No keepalive after POST requests"),
97 "post_no_keepalive", 0, 0,
98 N_("Disable keepalive connection after POST request.")),
100 INIT_OPT_BOOL("protocol.http.bugs", N_("Use HTTP/1.0"),
102 N_("Use HTTP/1.0 protocol instead of HTTP/1.1.")),
104 INIT_OPT_TREE("protocol.http", N_("Proxy configuration"),
106 N_("HTTP proxy configuration.")),
108 INIT_OPT_STRING("protocol.http.proxy", N_("Host and port-number"),
110 N_("Host and port-number (host:port) of the HTTP proxy, or blank.\n"
111 "If it's blank, HTTP_PROXY environment variable is checked as well.")),
113 INIT_OPT_STRING("protocol.http.proxy", N_("Username"),
115 N_("Proxy authentication username.")),
117 INIT_OPT_STRING("protocol.http.proxy", N_("Password"),
119 N_("Proxy authentication password.")),
122 INIT_OPT_TREE("protocol.http", N_("Referer sending"),
124 N_("HTTP referer sending options. HTTP referer is a special header\n"
125 "sent in the HTTP requests, which is supposed to contain the previous\n"
126 "page visited by the browser. This way, the server can know what link\n"
127 "did you follow when accessing that page. However, this behaviour\n"
128 "can unfortunately considerably affect privacy and can lead even to a\n"
129 "security problem on some badly designed web pages.")),
131 INIT_OPT_INT("protocol.http.referer", N_("Policy"),
133 REFERER_NONE
, REFERER_TRUE
, REFERER_TRUE
,
134 N_("Mode of sending HTTP referer:\n"
135 "0 is send no referer\n"
136 "1 is send current URL as referer\n"
137 "2 is send fixed fake referer\n"
138 "3 is send previous URL as referer (correct, but insecure)")),
140 INIT_OPT_STRING("protocol.http.referer", N_("Fake referer URL"),
142 N_("Fake referer to be sent when policy is 2.")),
145 INIT_OPT_STRING("protocol.http", N_("Send Accept-Language header"),
146 "accept_language", 0, "",
147 N_("Send Accept-Language header.")),
149 INIT_OPT_BOOL("protocol.http", N_("Use UI language as Accept-Language"),
150 "accept_ui_language", 0, 1,
151 N_("Request localised versions of documents from web-servers (using the\n"
152 "Accept-Language header) using the language you have configured for\n"
153 "ELinks' user-interface (this also affects navigator.language ECMAScript\n"
154 "value available to scripts). Note that some see this as a potential\n"
155 "security risk because it tells web-masters and the FBI sniffers about\n"
156 "your language preference.")),
158 /* After the compression support has been tested enough,
159 * we might wrap this option in #if CFG_DEBUG. */
160 INIT_OPT_BOOL("protocol.http", N_("Enable on-the-fly compression"),
162 N_("If enabled, the capability to receive compressed content (gzip and/or\n"
163 "bzip2) is announced to the server, which usually sends the reply\n"
164 "compressed, thus saving some bandwidth at slight CPU expense.\n"
166 "If ELinks displays a incomplete page or garbage, try disabling this\n"
167 "option. If that helps, there may be a bug in the decompression part\n"
168 "of ELinks. Please report such bugs.\n"
170 "If ELinks has been compiled without compression support, this option\n"
171 "has no effect. To check the supported features, see Help -> About.")),
173 INIT_OPT_BOOL("protocol.http", N_("Activate HTTP TRACE debugging"),
175 N_("If active, all HTTP requests are sent with TRACE as their method\n"
176 "rather than GET or POST. This is useful for debugging of both ELinks\n"
177 "and various server-side scripts --- the server only returns the client's\n"
178 "request back to the client verbatim. Note that this type of request may\n"
179 "not be enabled on all servers.")),
181 /* OSNews.com is supposed to be relying on the textmode token, at least. */
182 INIT_OPT_STRING("protocol.http", N_("User-agent identification"),
183 "user_agent", 0, "ELinks/%v (textmode; %s; %t-%b)",
184 N_("Change the User Agent ID. That means identification string, which\n"
185 "is sent to HTTP server when a document is requested. The 'textmode'\n"
186 "token in the first field is our silent attempt to establish this as\n"
187 "a standard for new textmode user agents, so that the webmasters can\n"
188 "have just a single uniform test for these if they are e.g. pushing\n"
189 "some lite version to them automagically.\n"
190 "Use \" \" if you don't want any User-Agent header to be sent at all.\n"
191 "%v in the string means ELinks version,\n"
192 "%s in the string means system identification,\n"
193 "%t in the string means size of the terminal,\n"
194 "%b in the string means number of bars displayed by ELinks.")),
197 INIT_OPT_TREE("protocol", N_("HTTPS"),
199 N_("HTTPS-specific options.")),
201 INIT_OPT_TREE("protocol.https", N_("Proxy configuration"),
203 N_("HTTPS proxy configuration.")),
205 INIT_OPT_STRING("protocol.https.proxy", N_("Host and port-number"),
207 N_("Host and port-number (host:port) of the HTTPS CONNECT proxy, or blank.\n"
208 "If it's blank, HTTPS_PROXY environment variable is checked as well.")),
212 static void done_http();
214 struct module http_protocol_module
= struct_module(
215 /* name: */ N_("HTTP"),
216 /* options: */ http_options
,
218 /* submodules: */ NULL
,
221 /* done: */ done_http
228 mem_free_if(proxy_auth
.realm
);
229 mem_free_if(proxy_auth
.nonce
);
230 mem_free_if(proxy_auth
.opaque
);
235 mem_free(accept_charset
);
239 init_accept_charset(void)
245 if (!init_string(&ac
)) return;
247 for (i
= 0; (cs
= get_cp_mime_name(i
)); i
++) {
249 add_to_string(&ac
, ", ");
251 add_to_string(&ac
, "Accept-Charset: ");
253 add_to_string(&ac
, cs
);
257 add_crlf_to_string(&ac
);
260 accept_charset
= squeezastring(&ac
);
267 subst_user_agent(unsigned char *fmt
, unsigned char *version
,
268 unsigned char *sysname
, unsigned char *termsize
)
272 if (!init_string(&agent
)) return NULL
;
277 for (p
= 0; fmt
[p
] && fmt
[p
] != '%'; p
++);
279 add_bytes_to_string(&agent
, fmt
, p
);
282 if (*fmt
!= '%') continue;
287 if (!list_empty(sessions
)) {
288 unsigned char bs
[4] = "";
290 struct session
*ses
= sessions
.prev
;
291 int bars
= ses
->status
.show_status_bar
292 + ses
->status
.show_tabs_bar
293 + ses
->status
.show_title_bar
;
295 ulongcat(bs
, &blen
, bars
, 2, 0);
296 add_to_string(&agent
, bs
);
300 add_to_string(&agent
, version
);
303 add_to_string(&agent
, sysname
);
307 add_to_string(&agent
, termsize
);
310 add_bytes_to_string(&agent
, fmt
- 1, 2);
320 add_url_to_http_string(struct string
*header
, struct uri
*uri
, int components
)
322 /* This block substitues spaces in URL by %20s. This is
323 * certainly not the right place where to do it, but now the
324 * behaviour is at least improved compared to what we had
325 * before. We should probably encode all URLs as early as
326 * possible, and possibly decode them back in protocol
327 * backends. --pasky */
328 unsigned char *string
= get_uri_string(uri
, components
);
329 unsigned char *data
= string
;
334 int len
= strcspn(data
, " \t\r\n\\");
336 add_bytes_to_string(header
, data
, len
);
338 if (!data
[len
]) break;
340 if (data
[len
++] == '\\')
341 add_char_to_string(header
, '/');
343 add_to_string(header
, "%20");
351 /* Parse from @end - 1 to @start and set *@value to integer found.
352 * It returns -1 if not a number, 0 otherwise.
353 * @end should be > @start. */
355 revstr2num(unsigned char *start
, unsigned char *end
, int *value
)
361 if (!isdigit(*end
)) return -1; /* NaN */
362 val
+= (*end
- '0') * q
;
364 } while (end
> start
);
370 /* This function extracts code, major and minor version from string
371 * "\s*HTTP/\d+.\d+\s+\d\d\d..."
372 * It returns a negative value on error, 0 on success.
375 get_http_code(struct read_buffer
*rb
, int *code
, struct http_version
*version
)
377 unsigned char *head
= rb
->data
;
378 unsigned char *start
;
385 while (*head
== ' ') head
++;
388 if (c_toupper(*head
) != 'H' || c_toupper(*++head
) != 'T' ||
389 c_toupper(*++head
) != 'T' || c_toupper(*++head
) != 'P'
396 while (*head
&& *head
!= '.') head
++;
398 if (!*head
|| !(head
- start
)
399 || (head
- start
) > 4
400 || !isdigit(*(head
+ 1)))
403 /* Extract major version number. */
404 if (revstr2num(start
, head
, &version
->major
)) return -3; /* NaN */
409 while (*head
&& *head
!= ' ') head
++;
411 if (!*head
|| !(head
- start
) || (head
- start
) > 4) return -4;
413 /* Extract minor version number. */
414 if (revstr2num(start
, head
, &version
->minor
)) return -5; /* NaN */
417 while (*head
== ' ') head
++;
419 /* Sanity check for code. */
420 if (head
[0] < '1' || head
[0] > '9' ||
423 return -6; /* Invalid code. */
426 *code
= (head
[0] - '0') * 100 + (head
[1] - '0') * 10 + head
[2] - '0';
432 check_http_server_bugs(struct uri
*uri
, struct http_connection_info
*http
,
435 unsigned char *server
;
436 const unsigned char *const *s
;
437 static const unsigned char *const buggy_servers
[] = {
440 "Netscape-Enterprise",
444 if (!get_opt_bool("protocol.http.bugs.allow_blacklist", NULL
)
445 || HTTP_1_0(http
->sent_version
))
448 server
= parse_header(head
, "Server", NULL
);
452 for (s
= buggy_servers
; *s
; s
++) {
453 if (strstr(server
, *s
)) {
454 add_blacklist_entry(uri
, SERVER_BLACKLIST_HTTP10
);
464 http_end_request(struct connection
*conn
, struct connection_state state
,
467 struct http_connection_info
*http
;
469 shutdown_connection_stream(conn
);
471 /* shutdown_connection_stream() should not change conn->info,
472 * but in case it does, read conn->info only after the call. */
475 done_http_post(&http
->post
);
477 if (http
&& !http
->close
478 && (!conn
->socket
->ssl
) /* We won't keep alive ssl connections */
479 && (!get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL
)
480 || !conn
->uri
->post
)) {
481 if (is_in_state(state
, S_OK
) && conn
->cached
)
482 normalize_cache_entry(conn
->cached
, !notrunc
? conn
->from
: -1);
483 set_connection_state(conn
, state
);
484 add_keepalive_connection(conn
, HTTP_KEEPALIVE_TIMEOUT
, NULL
);
486 abort_connection(conn
, state
);
490 static void http_send_header(struct socket
*);
493 http_protocol_handler(struct connection
*conn
)
495 /* setcstate(conn, S_CONN); */
497 if (!has_keepalive_connection(conn
)) {
498 make_connection(conn
->socket
, conn
->uri
, http_send_header
,
499 conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
);
501 http_send_header(conn
->socket
);
506 proxy_protocol_handler(struct connection
*conn
)
508 http_protocol_handler(conn
);
511 #define IS_PROXY_URI(x) ((x)->protocol == PROTOCOL_PROXY)
513 #define connection_is_https_proxy(conn) \
514 (IS_PROXY_URI((conn)->uri) && (conn)->proxied_uri->protocol == PROTOCOL_HTTPS)
516 /** connection.done points to this function if connection.info points
517 * to a struct http_connection_info. */
519 done_http_connection(struct connection
*conn
)
521 struct http_connection_info
*http
= conn
->info
;
523 done_http_post(&http
->post
);
529 struct http_connection_info
*
530 init_http_connection_info(struct connection
*conn
, int major
, int minor
, int close
)
532 struct http_connection_info
*http
;
534 http
= mem_calloc(1, sizeof(*http
));
536 http_end_request(conn
, connection_state(S_OUT_OF_MEM
), 0);
540 http
->sent_version
.major
= major
;
541 http
->sent_version
.minor
= minor
;
544 init_http_post(&http
->post
);
546 /* The CGI code uses this too and blacklisting expects a host name. */
547 if (conn
->proxied_uri
->protocol
!= PROTOCOL_FILE
)
548 http
->bl_flags
= get_blacklist_flags(conn
->proxied_uri
);
550 if (http
->bl_flags
& SERVER_BLACKLIST_HTTP10
551 || get_opt_bool("protocol.http.bugs.http10", NULL
)) {
552 http
->sent_version
.major
= 1;
553 http
->sent_version
.minor
= 0;
556 /* If called from HTTPS proxy connection the connection info might have
557 * already been allocated. */
562 mem_free_set(&conn
->info
, http
);
563 conn
->done
= done_http_connection
;
569 accept_encoding_header(struct string
*header
)
571 #if defined(CONFIG_GZIP) || defined(CONFIG_BZIP2) || defined(CONFIG_LZMA)
574 add_to_string(header
, "Accept-Encoding: ");
577 add_to_string(header
, "bzip2");
582 if (comma
) add_to_string(header
, ", ");
583 add_to_string(header
, "deflate, gzip");
588 if (comma
) add_to_string(header
, ", ");
589 add_to_string(header
, "lzma");
591 add_crlf_to_string(header
);
595 #define POST_BUFFER_SIZE 16384
596 #define BIG_READ 655360
599 send_more_post_data(struct socket
*socket
)
601 struct connection
*conn
= socket
->conn
;
602 struct http_connection_info
*http
= conn
->info
;
603 unsigned char buffer
[POST_BUFFER_SIZE
];
605 struct connection_state error
;
607 got
= read_http_post(&http
->post
, buffer
, POST_BUFFER_SIZE
, &error
);
609 http_end_request(conn
, error
, 0);
610 } else if (got
> 0) {
611 write_to_socket(socket
, buffer
, got
, connection_state(S_TRANS
),
612 send_more_post_data
);
613 } else { /* got == 0, meaning end of data */
614 /* Can't use request_from_socket() because there's no
615 * more data to write. */
616 struct read_buffer
*rb
= alloc_read_buffer(socket
);
618 socket
->state
= SOCKET_END_ONCLOSE
;
620 read_from_socket(socket
, rb
, connection_state(S_SENT
),
623 http_end_request(conn
, connection_state(S_OUT_OF_MEM
),
631 http_send_header(struct socket
*socket
)
633 struct connection
*conn
= socket
->conn
;
634 struct http_connection_info
*http
;
635 int trace
= get_opt_bool("protocol.http.trace", NULL
);
636 struct string header
;
637 unsigned char *post_data
= NULL
;
638 struct auth_entry
*entry
= NULL
;
639 struct uri
*uri
= conn
->proxied_uri
; /* Set to the real uri */
640 unsigned char *optstr
;
641 int use_connect
, talking_to_proxy
;
643 /* Sanity check for a host */
644 if (!uri
|| !uri
->host
|| !*uri
->host
|| !uri
->hostlen
) {
645 http_end_request(conn
, connection_state(S_BAD_URL
), 0);
649 http
= init_http_connection_info(conn
, 1, 1, 0);
652 if (!init_string(&header
)) {
653 http_end_request(conn
, connection_state(S_OUT_OF_MEM
), 0);
657 if (!conn
->cached
) conn
->cached
= find_in_cache(uri
);
659 talking_to_proxy
= IS_PROXY_URI(conn
->uri
) && !conn
->socket
->ssl
;
660 use_connect
= connection_is_https_proxy(conn
) && !conn
->socket
->ssl
;
663 add_to_string(&header
, "TRACE ");
664 } else if (use_connect
) {
665 add_to_string(&header
, "CONNECT ");
666 /* In CONNECT requests, we send only a subset of the
667 * headers to the proxy. See the "CONNECT:" comments
668 * below. After the CONNECT request succeeds, we
669 * negotiate TLS with the real server and make a new
670 * HTTP request that includes all the headers. */
671 } else if (uri
->post
) {
672 add_to_string(&header
, "POST ");
673 conn
->unrestartable
= 1;
675 add_to_string(&header
, "GET ");
678 if (!talking_to_proxy
) {
679 add_char_to_string(&header
, '/');
683 /* Add port if it was specified or the default port */
684 add_uri_to_string(&header
, uri
, URI_HTTP_CONNECT
);
686 if (connection_is_https_proxy(conn
) && conn
->socket
->ssl
) {
687 add_url_to_http_string(&header
, uri
, URI_DATA
);
689 } else if (talking_to_proxy
) {
690 add_url_to_http_string(&header
, uri
, URI_PROXY
);
693 add_url_to_http_string(&header
, conn
->uri
, URI_DATA
);
697 add_to_string(&header
, " HTTP/");
698 add_long_to_string(&header
, http
->sent_version
.major
);
699 add_char_to_string(&header
, '.');
700 add_long_to_string(&header
, http
->sent_version
.minor
);
701 add_crlf_to_string(&header
);
703 /* CONNECT: Sending a Host header seems pointless as the same
704 * information is already in the CONNECT line. It's harmless
705 * though and Mozilla does it too. */
706 add_to_string(&header
, "Host: ");
707 add_uri_to_string(&header
, uri
, URI_HTTP_HOST
);
708 add_crlf_to_string(&header
);
710 /* CONNECT: Proxy-Authorization is intended to be seen by the proxy. */
711 if (talking_to_proxy
) {
712 unsigned char *user
= get_opt_str("protocol.http.proxy.user", NULL
);
713 unsigned char *passwd
= get_opt_str("protocol.http.proxy.passwd", NULL
);
715 if (proxy_auth
.digest
) {
716 unsigned char *response
;
717 int userlen
= int_min(strlen(user
), AUTH_USER_MAXLEN
- 1);
718 int passwordlen
= int_min(strlen(passwd
), AUTH_PASSWORD_MAXLEN
- 1);
721 memcpy(proxy_auth
.user
, user
, userlen
);
722 proxy_auth
.user
[userlen
] = '\0';
724 memcpy(proxy_auth
.password
, passwd
, passwordlen
);
725 proxy_auth
.password
[passwordlen
] = '\0';
727 /* FIXME: @uri is the proxied URI. Maybe the passed URI
728 * should be the proxy URI aka conn->uri. --jonas */
729 response
= get_http_auth_digest_response(&proxy_auth
, uri
);
731 add_to_string(&header
, "Proxy-Authorization: Digest ");
732 add_to_string(&header
, response
);
733 add_crlf_to_string(&header
);
740 unsigned char *proxy_data
;
742 proxy_data
= straconcat(user
, ":", passwd
, (unsigned char *) NULL
);
744 unsigned char *proxy_64
= base64_encode(proxy_data
);
747 add_to_string(&header
, "Proxy-Authorization: Basic ");
748 add_to_string(&header
, proxy_64
);
749 add_crlf_to_string(&header
);
752 mem_free(proxy_data
);
758 /* CONNECT: User-Agent does not reveal anything about the
759 * resource we're fetching, and it may help the proxy return
760 * better error messages. */
761 optstr
= get_opt_str("protocol.http.user_agent", NULL
);
762 if (*optstr
&& strcmp(optstr
, " ")) {
763 unsigned char *ustr
, ts
[64] = "";
764 /* TODO: Somehow get the terminal in which the
765 * document will actually be displayed. */
766 struct terminal
*term
= get_default_terminal();
768 add_to_string(&header
, "User-Agent: ");
771 unsigned int tslen
= 0;
773 ulongcat(ts
, &tslen
, term
->width
, 3, 0);
775 ulongcat(ts
, &tslen
, term
->height
, 3, 0);
777 ustr
= subst_user_agent(optstr
, VERSION_STRING
, system_name
,
781 add_to_string(&header
, ustr
);
785 add_crlf_to_string(&header
);
788 /* CONNECT: Referer probably is a secret page in the HTTPS
789 * server, so don't reveal it to the proxy. */
791 switch (get_opt_int("protocol.http.referer.policy", NULL
)) {
797 optstr
= get_opt_str("protocol.http.referer.fake", NULL
);
798 if (!optstr
[0]) break;
799 add_to_string(&header
, "Referer: ");
800 add_to_string(&header
, optstr
);
801 add_crlf_to_string(&header
);
805 if (!conn
->referrer
) break;
806 add_to_string(&header
, "Referer: ");
807 add_url_to_http_string(&header
, conn
->referrer
, URI_HTTP_REFERRER
);
808 add_crlf_to_string(&header
);
811 case REFERER_SAME_URL
:
812 add_to_string(&header
, "Referer: ");
813 add_url_to_http_string(&header
, uri
, URI_HTTP_REFERRER
);
814 add_crlf_to_string(&header
);
819 /* CONNECT: Do send all Accept* headers to the CONNECT proxy,
820 * because they do not reveal anything about the resource
821 * we're going to request via TLS, and they may affect the
822 * error message if the CONNECT request fails.
824 * If ELinks is ever changed to vary its Accept headers based
825 * on what it intends to do with the returned resource, e.g.
826 * sending "Accept: text/css" when it wants an external
827 * stylesheet, then it should do that only in the inner GET
828 * and not in the outer CONNECT. */
829 add_to_string(&header
, "Accept: */*");
830 add_crlf_to_string(&header
);
832 if (get_opt_bool("protocol.http.compression", NULL
))
833 accept_encoding_header(&header
);
835 if (!accept_charset
) {
836 init_accept_charset();
839 if (!(http
->bl_flags
& SERVER_BLACKLIST_NO_CHARSET
)
840 && !get_opt_bool("protocol.http.bugs.accept_charset", NULL
)
842 add_to_string(&header
, accept_charset
);
845 optstr
= get_opt_str("protocol.http.accept_language", NULL
);
847 add_to_string(&header
, "Accept-Language: ");
848 add_to_string(&header
, optstr
);
849 add_crlf_to_string(&header
);
852 else if (get_opt_bool("protocol.http.accept_ui_language", NULL
)) {
853 unsigned char *code
= language_to_iso639(current_language
);
856 add_to_string(&header
, "Accept-Language: ");
857 add_to_string(&header
, code
);
858 add_crlf_to_string(&header
);
863 /* CONNECT: Proxy-Connection is intended to be seen by the
864 * proxy. If the CONNECT request succeeds, then the proxy
865 * will forward the remainder of the TCP connection to the
866 * origin server, and Proxy-Connection does not matter; but
867 * if the request fails, then Proxy-Connection may matter. */
868 /* FIXME: What about post-HTTP/1.1?? --Zas */
869 if (HTTP_1_1(http
->sent_version
)) {
870 if (!IS_PROXY_URI(conn
->uri
)) {
871 add_to_string(&header
, "Connection: ");
873 add_to_string(&header
, "Proxy-Connection: ");
876 if (!uri
->post
|| !get_opt_bool("protocol.http.bugs.post_no_keepalive", NULL
)) {
877 add_to_string(&header
, "Keep-Alive");
879 add_to_string(&header
, "close");
881 add_crlf_to_string(&header
);
884 /* CONNECT: Do not tell the proxy anything we have cached
885 * about the resource. */
886 if (!use_connect
&& conn
->cached
) {
887 if (!conn
->cached
->incomplete
&& conn
->cached
->head
888 && conn
->cache_mode
<= CACHE_MODE_CHECK_IF_MODIFIED
) {
889 if (conn
->cached
->last_modified
) {
890 add_to_string(&header
, "If-Modified-Since: ");
891 add_to_string(&header
, conn
->cached
->last_modified
);
892 add_crlf_to_string(&header
);
894 if (conn
->cached
->etag
) {
895 add_to_string(&header
, "If-None-Match: ");
896 add_to_string(&header
, conn
->cached
->etag
);
897 add_crlf_to_string(&header
);
902 /* CONNECT: Let's send cache control headers to the proxy too;
903 * they may affect DNS caching. */
904 if (conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
) {
905 add_to_string(&header
, "Pragma: no-cache");
906 add_crlf_to_string(&header
);
907 add_to_string(&header
, "Cache-Control: no-cache");
908 add_crlf_to_string(&header
);
911 /* CONNECT: Do not reveal byte ranges to the proxy. It can't
912 * do anything good with that information anyway. */
913 if (!use_connect
&& (conn
->from
|| conn
->progress
->start
> 0)) {
914 /* conn->from takes precedence. conn->progress.start is set only the first
915 * time, then conn->from gets updated and in case of any retries
916 * etc we have everything interesting in conn->from already. */
917 add_to_string(&header
, "Range: bytes=");
918 add_long_to_string(&header
, conn
->from
? conn
->from
: conn
->progress
->start
);
919 add_char_to_string(&header
, '-');
920 add_crlf_to_string(&header
);
923 /* CONNECT: The Authorization header is for the origin server only. */
926 if (http_negotiate_output(uri
, &header
) != 0)
928 entry
= find_auth(uri
);
933 unsigned char *response
;
935 response
= get_http_auth_digest_response(entry
, uri
);
937 add_to_string(&header
, "Authorization: Digest ");
938 add_to_string(&header
, response
);
939 add_crlf_to_string(&header
);
945 /* RFC2617 section 2 [Basic Authentication Scheme]
947 * To receive authorization, the client sends the userid
948 * and password, separated by a single colon (":")
949 * character, within a base64 [7] encoded string in the
953 /* Create base64 encoded string. */
954 id
= straconcat(entry
->user
, ":", entry
->password
,
955 (unsigned char *) NULL
);
957 unsigned char *base64
= base64_encode(id
);
959 mem_free_set(&id
, base64
);
963 add_to_string(&header
, "Authorization: Basic ");
964 add_to_string(&header
, id
);
965 add_crlf_to_string(&header
);
971 /* CONNECT: Any POST data is for the origin server only. */
972 if (!use_connect
&& uri
->post
) {
973 /* We search for first '\n' in uri->post to get content type
974 * as set by get_form_uri(). This '\n' is dropped if any
975 * and replaced by correct '\r\n' termination here. */
976 unsigned char *postend
= strchr(uri
->post
, '\n');
977 struct connection_state error
;
980 add_to_string(&header
, "Content-Type: ");
981 add_bytes_to_string(&header
, uri
->post
, postend
- uri
->post
);
982 add_crlf_to_string(&header
);
985 post_data
= postend
? postend
+ 1 : uri
->post
;
986 if (!open_http_post(&http
->post
, post_data
, &error
)) {
987 http_end_request(conn
, error
, 0);
988 done_string(&header
);
991 add_format_to_string(&header
, "Content-Length: "
992 "%" OFF_PRINT_FORMAT
"\x0D\x0A",
994 http
->post
.total_upload_length
);
997 #ifdef CONFIG_COOKIES
998 /* CONNECT: Cookies are for the origin server only. */
1000 struct string
*cookies
= send_cookies(uri
);
1003 add_to_string(&header
, "Cookie: ");
1004 add_string_to_string(&header
, cookies
);
1005 add_crlf_to_string(&header
);
1006 done_string(cookies
);
1011 add_crlf_to_string(&header
);
1013 /* CONNECT: Any POST data is for the origin server only.
1014 * This was already checked above and post_data is NULL
1015 * in that case. Verified with an assertion below. */
1017 assert(!use_connect
); /* see comment above */
1019 socket
->state
= SOCKET_END_ONCLOSE
;
1020 if (!conn
->http_upload_progress
&& http
->post
.file_count
)
1021 conn
->http_upload_progress
= init_progress(0);
1022 write_to_socket(socket
, header
.source
, header
.length
,
1023 connection_state(S_TRANS
),
1024 send_more_post_data
);
1026 request_from_socket(socket
, header
.source
, header
.length
,
1027 connection_state(S_SENT
),
1028 SOCKET_END_ONCLOSE
, http_got_header
);
1029 done_string(&header
);
1032 #undef POST_BUFFER_SIZE
1035 /* This function decompresses the data block given in @data (if it was
1036 * compressed), which is long @len bytes. The decompressed data block is given
1037 * back to the world as the return value and its length is stored into
1038 * @new_len. After this function returns, the caller will discard all the @len
1039 * input bytes, so this function must use all of them unless an error occurs.
1041 * In this function, value of either http->chunk_remaining or http->length is
1042 * being changed (it depends on if chunked mode is used or not).
1044 * Note that the function is still a little esotheric for me. Don't take it
1045 * lightly and don't mess with it without grave reason! If you dare to touch
1046 * this without testing the changes on slashdot, freshmeat and cvsweb
1047 * (including revision history), don't dare to send me any patches! ;) --pasky
1049 * This function gotta die. */
1050 static unsigned char *
1051 decompress_data(struct connection
*conn
, unsigned char *data
, int len
,
1054 struct http_connection_info
*http
= conn
->info
;
1055 enum { NORMAL
, FINISHING
} state
= NORMAL
;
1057 int *length_of_block
;
1058 unsigned char *output
= NULL
;
1061 if (http
->length
== LEN_CHUNKED
) {
1062 if (http
->chunk_remaining
== CHUNK_ZERO_SIZE
)
1064 length_of_block
= &http
->chunk_remaining
;
1066 length_of_block
= &http
->length
;
1067 if (!*length_of_block
) {
1068 /* Going to finish this decoding bussiness. */
1073 if (conn
->content_encoding
== ENCODING_NONE
) {
1075 if (*length_of_block
> 0) *length_of_block
-= len
;
1079 *new_len
= 0; /* new_len must be zero if we would ever return NULL */
1081 if (conn
->stream_pipes
[0] == -1
1082 && (c_pipe(conn
->stream_pipes
) < 0
1083 || set_nonblocking_fd(conn
->stream_pipes
[0]) < 0
1084 || set_nonblocking_fd(conn
->stream_pipes
[1]) < 0)) {
1091 if (state
== NORMAL
) {
1092 /* ... we aren't finishing yet. */
1093 int written
= safe_write(conn
->stream_pipes
[1], data
, len
);
1099 /* In non-keep-alive connections http->length == -1, so the test below */
1100 if (*length_of_block
> 0)
1101 *length_of_block
-= written
;
1102 /* http->length is 0 at the end of block for all modes: keep-alive,
1103 * non-keep-alive and chunked */
1104 if (!http
->length
) {
1105 /* That's all, folks - let's finish this. */
1108 /* We've done for this round (but not done
1109 * completely). Thus we will get out with
1110 * what we have and leave what we wrote to
1111 * the next round - we have to do that since
1112 * we MUST NOT ever empty the pipe completely
1113 * - this would cause a disaster for
1114 * read_encoded(), which would simply not
1115 * work right then. */
1121 if (!conn
->stream
) {
1122 conn
->stream
= open_encoded(conn
->stream_pipes
[0],
1123 conn
->content_encoding
);
1124 if (!conn
->stream
) return NULL
;
1127 tmp
= mem_realloc(output
, *new_len
+ BIG_READ
);
1131 did_read
= read_encoded(conn
->stream
, output
+ *new_len
, BIG_READ
);
1133 /* Do not break from the loop if did_read == 0. It
1134 * means no decoded data is available yet, but some may
1135 * become available later. This happens especially with
1136 * the bzip2 decoder, which needs an entire compressed
1137 * block as input before it generates any output. */
1142 *new_len
+= did_read
;
1143 } while (len
|| (did_read
== BIG_READ
));
1145 if (state
== FINISHING
) shutdown_connection_stream(conn
);
1151 is_line_in_buffer(struct read_buffer
*rb
)
1155 for (l
= 0; l
< rb
->length
; l
++) {
1156 unsigned char a0
= rb
->data
[l
];
1160 if (a0
== ASCII_CR
) {
1161 if (rb
->data
[l
+ 1] == ASCII_LF
1162 && l
< rb
->length
- 1)
1164 if (l
== rb
->length
- 1)
1173 static void read_http_data(struct socket
*socket
, struct read_buffer
*rb
);
1176 read_more_http_data(struct connection
*conn
, struct read_buffer
*rb
,
1177 int already_got_anything
)
1179 struct connection_state state
= already_got_anything
1180 ? connection_state(S_TRANS
) : conn
->state
;
1182 read_from_socket(conn
->socket
, rb
, state
, read_http_data
);
1186 read_http_data_done(struct connection
*conn
)
1188 struct http_connection_info
*http
= conn
->info
;
1190 /* There's no content but an error so just print
1191 * that instead of nothing. */
1193 if (http
->code
>= 400) {
1194 http_error_document(conn
, http
->code
);
1197 /* This is not an error, thus fine. No need generate any
1198 * document, as this may be empty and it's not a problem.
1199 * In case of 3xx, we're probably just getting kicked to
1200 * another page anyway. And in case of 2xx, the document
1201 * may indeed be empty and thus the user should see it so. */
1205 http_end_request(conn
, connection_state(S_OK
), 0);
1214 read_chunked_http_data(struct connection
*conn
, struct read_buffer
*rb
)
1216 struct http_connection_info
*http
= conn
->info
;
1217 int total_data_len
= 0;
1220 /* Chunked. Good luck! */
1221 /* See RFC2616, section 3.6.1. Basically, it looks like:
1222 * 1234 ; a = b ; c = d\r\n
1223 * aklkjadslkfjalkfjlkajkljfdkljdsfkljdf*1234\r\n
1226 if (http
->chunk_remaining
== CHUNK_DATA_END
) {
1227 int l
= is_line_in_buffer(rb
);
1231 /* Invalid character in buffer. */
1235 /* Remove everything to the EOLN. */
1236 kill_buffer_data(rb
, l
);
1244 } else if (http
->chunk_remaining
== CHUNK_SIZE
) {
1245 int l
= is_line_in_buffer(rb
);
1253 n
= strtol(rb
->data
, (char **) &de
, 16);
1254 if (errno
|| !*de
) {
1259 if (l
== -1 || de
== rb
->data
) {
1263 /* Remove everything to the EOLN. */
1264 kill_buffer_data(rb
, l
);
1265 http
->chunk_remaining
= n
;
1266 if (!http
->chunk_remaining
)
1267 http
->chunk_remaining
= CHUNK_ZERO_SIZE
;
1272 unsigned char *data
;
1274 int zero
= (http
->chunk_remaining
== CHUNK_ZERO_SIZE
);
1275 int len
= zero
? 0 : http
->chunk_remaining
;
1277 /* Maybe everything necessary didn't come yet.. */
1278 int_upper_bound(&len
, rb
->length
);
1279 conn
->received
+= len
;
1281 data
= decompress_data(conn
, rb
->data
, len
, &data_len
);
1283 if (add_fragment(conn
->cached
, conn
->from
,
1284 data
, data_len
) == 1)
1287 if (data
&& data
!= rb
->data
) mem_free(data
);
1289 conn
->from
+= data_len
;
1290 total_data_len
+= data_len
;
1292 kill_buffer_data(rb
, len
);
1295 /* Last chunk has zero length, so this is last
1296 * chunk, we finished decompression just now
1297 * and now we can happily finish reading this
1299 http
->chunk_remaining
= CHUNK_DATA_END
;
1303 if (!http
->chunk_remaining
&& rb
->length
> 0) {
1304 /* Eat newline succeeding each chunk. */
1305 if (rb
->data
[0] == ASCII_LF
) {
1306 kill_buffer_data(rb
, 1);
1308 if (rb
->data
[0] != ASCII_CR
1310 && rb
->data
[1] != ASCII_LF
)) {
1313 if (rb
->length
< 2) break;
1314 kill_buffer_data(rb
, 2);
1316 http
->chunk_remaining
= CHUNK_SIZE
;
1324 return !!total_data_len
;
1327 /* Returns 0 if more data, 1 if done. */
1329 read_normal_http_data(struct connection
*conn
, struct read_buffer
*rb
)
1331 struct http_connection_info
*http
= conn
->info
;
1332 unsigned char *data
;
1334 int len
= rb
->length
;
1336 if (http
->length
>= 0 && http
->length
< len
) {
1337 /* We won't read more than we have to go. */
1341 conn
->received
+= len
;
1343 data
= decompress_data(conn
, rb
->data
, len
, &data_len
);
1345 if (add_fragment(conn
->cached
, conn
->from
, data
, data_len
) == 1)
1348 if (data
&& data
!= rb
->data
) mem_free(data
);
1350 conn
->from
+= data_len
;
1352 kill_buffer_data(rb
, len
);
1354 if (!http
->length
&& (conn
->socket
->state
== SOCKET_RETRY_ONCLOSE
1355 || conn
->socket
->state
== SOCKET_CLOSED
)) {
1363 read_http_data(struct socket
*socket
, struct read_buffer
*rb
)
1365 struct connection
*conn
= socket
->conn
;
1366 struct http_connection_info
*http
= conn
->info
;
1369 if (socket
->state
== SOCKET_CLOSED
) {
1370 if (conn
->content_encoding
) {
1371 /* Flush decompression first. */
1374 read_http_data_done(conn
);
1379 if (http
->length
!= LEN_CHUNKED
) {
1380 ret
= read_normal_http_data(conn
, rb
);
1383 ret
= read_chunked_http_data(conn
, rb
);
1388 read_more_http_data(conn
, rb
, 0);
1391 read_more_http_data(conn
, rb
, 1);
1394 read_http_data_done(conn
);
1397 assertm(ret
== -1, "Unexpected return value: %d", ret
);
1398 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1402 /* Returns offset of the header end, zero if more data is needed, -1 when
1403 * incorrect data was received, -2 if this is HTTP/0.9 and no header is to
1406 get_header(struct read_buffer
*rb
)
1410 /* XXX: We will have to do some guess about whether an HTTP header is
1411 * coming or not, in order to support HTTP/0.9 reply correctly. This
1412 * means a little code duplication with get_http_code(). --pasky */
1413 if (rb
->length
> 4 && c_strncasecmp(rb
->data
, "HTTP/", 5))
1416 for (i
= 0; i
< rb
->length
; i
++) {
1417 unsigned char a0
= rb
->data
[i
];
1418 unsigned char a1
= rb
->data
[i
+ 1];
1424 if (a0
== ASCII_LF
&& a1
== ASCII_LF
1425 && i
< rb
->length
- 1)
1427 if (a0
== ASCII_CR
&& i
< rb
->length
- 3) {
1428 if (a1
== ASCII_CR
) continue;
1429 if (a1
!= ASCII_LF
) return -1;
1430 if (rb
->data
[i
+ 2] == ASCII_CR
) {
1431 if (rb
->data
[i
+ 3] != ASCII_LF
) return -1;
1440 /* returns 1 if we need retry the connection (for negotiate-auth only) */
1442 check_http_authentication(struct connection
*conn
, struct uri
*uri
,
1443 unsigned char *header
, unsigned char *header_field
)
1445 unsigned char *str
, *d
;
1448 d
= parse_header(header
, header_field
, &str
);
1450 if (!c_strncasecmp(d
, "Basic", 5)) {
1451 unsigned char *realm
= get_header_param(d
, "realm");
1454 add_auth_entry(uri
, realm
, NULL
, NULL
, 0);
1459 } else if (!c_strncasecmp(d
, "Digest", 6)) {
1460 unsigned char *realm
= get_header_param(d
, "realm");
1461 unsigned char *nonce
= get_header_param(d
, "nonce");
1462 unsigned char *opaque
= get_header_param(d
, "opaque");
1464 add_auth_entry(uri
, realm
, nonce
, opaque
, 1);
1468 mem_free_if(opaque
);
1472 #ifdef CONFIG_GSSAPI
1473 else if (!c_strncasecmp(d
, HTTPNEG_GSS_STR
, HTTPNEG_GSS_STRLEN
)) {
1474 if (http_negotiate_input(conn
, uri
, HTTPNEG_GSS
, str
)==0)
1479 else if (!c_strncasecmp(d
, HTTPNEG_NEG_STR
, HTTPNEG_NEG_STRLEN
)) {
1480 if (http_negotiate_input(conn
, uri
, HTTPNEG_NEG
, str
)==0)
1487 d
= parse_header(str
, header_field
, &str
);
1494 http_got_header(struct socket
*socket
, struct read_buffer
*rb
)
1496 struct connection
*conn
= socket
->conn
;
1497 struct http_connection_info
*http
= conn
->info
;
1498 unsigned char *head
;
1499 #ifdef CONFIG_COOKIES
1500 unsigned char *cookie
, *ch
;
1503 struct uri
*uri
= conn
->proxied_uri
; /* Set to the real uri */
1504 struct http_version version
= { 0, 9 };
1505 struct connection_state state
= (!is_in_state(conn
->state
, S_PROC
)
1506 ? connection_state(S_GETH
)
1507 : connection_state(S_PROC
));
1511 if (socket
->state
== SOCKET_CLOSED
) {
1512 if (!conn
->tries
&& uri
->host
) {
1513 if (http
->bl_flags
& SERVER_BLACKLIST_NO_CHARSET
) {
1514 del_blacklist_entry(uri
, SERVER_BLACKLIST_NO_CHARSET
);
1516 add_blacklist_entry(uri
, SERVER_BLACKLIST_NO_CHARSET
);
1520 retry_connection(conn
, connection_state(S_CANT_READ
));
1523 socket
->state
= SOCKET_RETRY_ONCLOSE
;
1528 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1532 read_from_socket(conn
->socket
, rb
, state
, http_got_header
);
1535 /* a == -2 from get_header means HTTP/0.9. In that case, skip
1536 * the get_http_code call; @h and @version have already been
1537 * initialized with the right values. */
1539 if ((a
&& get_http_code(rb
, &h
, &version
))
1541 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1545 /* When no header, HTTP/0.9 document. That's always text/html,
1547 * http://www.w3.org/Protocols/HTTP/AsImplemented.html. */
1548 /* FIXME: This usage of fake protocol headers for setting up the
1549 * content type has been obsoleted by the @content_type member of
1550 * {struct cache_entry}. */
1551 head
= (a
? memacpy(rb
->data
, a
)
1552 : stracpy("\r\nContent-Type: text/html\r\n"));
1554 abort_connection(conn
, connection_state(S_OUT_OF_MEM
));
1558 if (check_http_server_bugs(uri
, http
, head
)) {
1560 retry_connection(conn
, connection_state(S_RESTART
));
1565 if (uri
->protocol
== PROTOCOL_FILE
) {
1566 /* ``Status'' is not a standard HTTP header field although some
1567 * HTTP servers like www.php.net uses it for some reason. It should
1568 * only be used for CGI scripts so that it does not interfere
1569 * with status code depended handling for ``normal'' HTTP like
1571 d
= parse_header(head
, "Status", NULL
);
1576 if (h2
>= 100 && h2
< 600) h
= h2
;
1579 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1586 #ifdef CONFIG_COOKIES
1588 while ((cookie
= parse_header(ch
, "Set-Cookie", &ch
))) {
1589 set_cookie(uri
, cookie
);
1597 state
= connection_state(S_PROC
);
1598 kill_buffer_data(rb
, a
);
1603 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1608 http_end_request(conn
, connection_state(S_OK
), 1);
1613 http_end_request(conn
, connection_state(S_HTTP_204
), 0);
1616 if (h
== 200 && connection_is_https_proxy(conn
) && !conn
->socket
->ssl
) {
1619 socket
->need_ssl
= 1;
1620 complete_connect_socket(socket
, uri
, http_send_header
);
1622 abort_connection(conn
, connection_state(S_SSL_ERROR
));
1627 conn
->cached
= get_cache_entry(conn
->uri
);
1628 if (!conn
->cached
) {
1630 abort_connection(conn
, connection_state(S_OUT_OF_MEM
));
1633 conn
->cached
->cgi
= conn
->cgi
;
1634 mem_free_set(&conn
->cached
->head
, head
);
1636 if (!get_opt_bool("document.cache.ignore_cache_control", NULL
)) {
1637 struct cache_entry
*cached
= conn
->cached
;
1639 /* I am not entirely sure in what order we should process these
1640 * headers and if we should still process Cache-Control max-age
1641 * if we already set max age to date mentioned in Expires.
1643 /* Ensure that when ever cached->max_age is set, cached->expired
1644 * is also set, so the cache management knows max_age contains a
1645 * valid time. If on the other hand no caching is requested
1646 * cached->expire should be set to zero. */
1647 if ((d
= parse_header(cached
->head
, "Expires", NULL
))) {
1648 /* Convert date to seconds. */
1649 time_t expires
= parse_date(&d
, NULL
, 0, 1);
1653 if (expires
&& cached
->cache_mode
!= CACHE_MODE_NEVER
) {
1654 timeval_from_seconds(&cached
->max_age
, expires
);
1659 if ((d
= parse_header(cached
->head
, "Pragma", NULL
))) {
1660 if (strstr(d
, "no-cache")) {
1661 cached
->cache_mode
= CACHE_MODE_NEVER
;
1667 if (cached
->cache_mode
!= CACHE_MODE_NEVER
1668 && (d
= parse_header(cached
->head
, "Cache-Control", NULL
))) {
1669 if (strstr(d
, "no-cache") || strstr(d
, "must-revalidate")) {
1670 cached
->cache_mode
= CACHE_MODE_NEVER
;
1674 unsigned char *pos
= strstr(d
, "max-age=");
1676 assert(cached
->cache_mode
!= CACHE_MODE_NEVER
);
1679 /* Grab the number of seconds. */
1682 timeval_from_seconds(&max_age
, atol(pos
+ 8));
1683 timeval_now(&cached
->max_age
);
1684 timeval_add_interval(&cached
->max_age
, &max_age
);
1694 /* XXX: Is there some reason why NOT to follow the Location header
1695 * for any status? If the server didn't mean it, it wouldn't send
1696 * it, after all...? --pasky */
1697 if (h
== 201 || h
== 301 || h
== 302 || h
== 303 || h
== 307) {
1698 d
= parse_header(conn
->cached
->head
, "Location", NULL
);
1700 int use_get_method
= (h
== 303);
1702 /* A note from RFC 2616 section 10.3.3:
1703 * RFC 1945 and RFC 2068 specify that the client is not
1704 * allowed to change the method on the redirected
1705 * request. However, most existing user agent
1706 * implementations treat 302 as if it were a 303
1707 * response, performing a GET on the Location
1708 * field-value regardless of the original request
1710 /* So POST must not be redirected to GET, but some
1711 * BUGGY message boards rely on it :-( */
1713 && get_opt_bool("protocol.http.bugs.broken_302_redirect", NULL
))
1716 redirect_cache(conn
->cached
, d
, use_get_method
, -1);
1722 if (check_http_authentication(conn
, uri
,
1723 conn
->cached
->head
, "WWW-Authenticate")) {
1724 retry_connection(conn
, connection_state(S_RESTART
));
1732 d
= parse_header(conn
->cached
->head
, "Proxy-Authenticate", &str
);
1734 if (!c_strncasecmp(d
, "Basic", 5)) {
1735 unsigned char *realm
= get_header_param(d
, "realm");
1738 mem_free_set(&proxy_auth
.realm
, realm
);
1739 proxy_auth
.digest
= 0;
1744 } else if (!c_strncasecmp(d
, "Digest", 6)) {
1745 unsigned char *realm
= get_header_param(d
, "realm");
1746 unsigned char *nonce
= get_header_param(d
, "nonce");
1747 unsigned char *opaque
= get_header_param(d
, "opaque");
1749 mem_free_set(&proxy_auth
.realm
, realm
);
1750 mem_free_set(&proxy_auth
.nonce
, nonce
);
1751 mem_free_set(&proxy_auth
.opaque
, opaque
);
1752 proxy_auth
.digest
= 1;
1759 d
= parse_header(str
, "Proxy-Authenticate", &str
);
1763 kill_buffer_data(rb
, a
);
1766 http
->recv_version
= version
;
1768 if ((d
= parse_header(conn
->cached
->head
, "Connection", NULL
))
1769 || (d
= parse_header(conn
->cached
->head
, "Proxy-Connection", NULL
))) {
1770 if (!c_strcasecmp(d
, "close")) http
->close
= 1;
1772 } else if (PRE_HTTP_1_1(version
)) {
1778 d
= parse_header(conn
->cached
->head
, "Content-Range", NULL
);
1780 if (strlen(d
) > 6) {
1782 if (isdigit(d
[6]) && !c_strcasecmp(d
, "bytes")) {
1786 f
= strtol(d
+ 6, NULL
, 10);
1788 if (!errno
&& f
>= 0) conn
->from
= f
;
1793 if (cf
&& !conn
->from
&& !conn
->unrestartable
) conn
->unrestartable
= 1;
1794 if ((conn
->progress
->start
<= 0 && conn
->from
> cf
) || conn
->from
< 0) {
1795 /* We don't want this if conn->progress.start because then conn->from will
1796 * be probably value of conn->progress.start, while cf is 0. */
1797 abort_connection(conn
, connection_state(S_HTTP_ERROR
));
1804 foreach (s
, conn
->downloads
) {
1805 fprintf(stderr
, "conn %p status %p pri %d st %d er %d :: ce %s",
1806 conn
, s
, s
->pri
, s
->state
, s
->prev_error
,
1807 s
->cached
? s
->cached
->url
: (unsigned char *) "N-U-L-L");
1812 if (conn
->progress
->start
>= 0) {
1813 /* Update to the real value which we've got from Content-Range. */
1814 conn
->progress
->seek
= conn
->from
;
1816 conn
->progress
->start
= conn
->from
;
1818 d
= parse_header(conn
->cached
->head
, "Content-Length", NULL
);
1824 l
= strtol(d
, (char **) &ep
, 10);
1826 if (!errno
&& !*ep
&& l
>= 0) {
1827 if (!http
->close
|| POST_HTTP_1_0(version
))
1829 conn
->est_length
= conn
->from
+ l
;
1834 if (!conn
->unrestartable
) {
1835 d
= parse_header(conn
->cached
->head
, "Accept-Ranges", NULL
);
1838 if (!c_strcasecmp(d
, "none"))
1839 conn
->unrestartable
= 1;
1843 conn
->unrestartable
= 1;
1847 d
= parse_header(conn
->cached
->head
, "Transfer-Encoding", NULL
);
1849 if (!c_strcasecmp(d
, "chunked")) {
1850 http
->length
= LEN_CHUNKED
;
1851 http
->chunk_remaining
= CHUNK_SIZE
;
1855 if (!http
->close
&& http
->length
== -1) http
->close
= 1;
1857 d
= parse_header(conn
->cached
->head
, "Last-Modified", NULL
);
1859 if (conn
->cached
->last_modified
&& c_strcasecmp(conn
->cached
->last_modified
, d
)) {
1860 delete_entry_content(conn
->cached
);
1864 retry_connection(conn
, connection_state(S_MODIFIED
));
1868 if (!conn
->cached
->last_modified
) conn
->cached
->last_modified
= d
;
1871 if (!conn
->cached
->last_modified
) {
1872 d
= parse_header(conn
->cached
->head
, "Date", NULL
);
1873 if (d
) conn
->cached
->last_modified
= d
;
1876 /* FIXME: Parse only if HTTP/1.1 or later? --Zas */
1877 d
= parse_header(conn
->cached
->head
, "ETag", NULL
);
1879 if (conn
->cached
->etag
) {
1880 unsigned char *old_tag
= conn
->cached
->etag
;
1881 unsigned char *new_tag
= d
;
1883 /* http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.19 */
1885 if (new_tag
[0] == 'W' && new_tag
[1] == '/')
1888 if (old_tag
[0] == 'W' && old_tag
[1] == '/')
1891 if (strcmp(new_tag
, old_tag
)) {
1892 delete_entry_content(conn
->cached
);
1896 retry_connection(conn
, connection_state(S_MODIFIED
));
1902 if (!conn
->cached
->etag
)
1903 conn
->cached
->etag
= d
;
1908 d
= parse_header(conn
->cached
->head
, "Content-Encoding", NULL
);
1910 unsigned char *extension
= get_extension_from_uri(uri
);
1911 enum stream_encoding file_encoding
;
1913 file_encoding
= extension
? guess_encoding(extension
) : ENCODING_NONE
;
1914 mem_free_if(extension
);
1916 /* If the content is encoded, we want to preserve the encoding
1917 * if it is implied by the extension, so that saving the URI
1918 * will leave the saved file with the correct encoding. */
1920 if (file_encoding
!= ENCODING_GZIP
1921 && (!c_strcasecmp(d
, "gzip") || !c_strcasecmp(d
, "x-gzip")))
1922 conn
->content_encoding
= ENCODING_GZIP
;
1923 if (!c_strcasecmp(d
, "deflate") || !c_strcasecmp(d
, "x-deflate"))
1924 conn
->content_encoding
= ENCODING_DEFLATE
;
1928 if (file_encoding
!= ENCODING_BZIP2
1929 && (!c_strcasecmp(d
, "bzip2") || !c_strcasecmp(d
, "x-bzip2")))
1930 conn
->content_encoding
= ENCODING_BZIP2
;
1934 if (file_encoding
!= ENCODING_LZMA
1935 && (!c_strcasecmp(d
, "lzma") || !c_strcasecmp(d
, "x-lzma")))
1936 conn
->content_encoding
= ENCODING_LZMA
;
1941 if (conn
->content_encoding
!= ENCODING_NONE
) {
1942 mem_free_if(conn
->cached
->encoding_info
);
1943 conn
->cached
->encoding_info
= stracpy(get_encoding_name(conn
->content_encoding
));
1946 if (http
->length
== -1 || http
->close
)
1947 socket
->state
= SOCKET_END_ONCLOSE
;
1949 read_http_data(socket
, rb
);