drop md4 i?86 specific asm implementations
[mldonkey.git] / src / utils / net / http_client.ml
blob98f430e3cb010505c43c7c5d80f02772de807f1a
1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 (* HTTP Requests:
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
25 open Printf2
26 open BasicSocket
27 open Url
28 open TcpBufferedSocket
29 open Int64ops
32 type http_request =
33 GET
34 | POST
35 | HEAD
36 | PUT
37 | DELETE
38 | TRACE
40 type error = [ `HTTP of int | `RST of BasicSocket.close_reason | `DNS | `Block of Ip.t ]
42 let show_error = function
43 | `HTTP code -> Printf.sprintf "HTTP error code %d" code
44 | `RST reason -> Printf.sprintf "Connection closed : %s" (BasicSocket.string_of_reason reason)
45 | `DNS -> Printf.sprintf "DNS resolution failed"
46 | `Block ip -> Printf.sprintf "Blocked connection to %s" (Ip.to_string ip)
48 let verbose = ref false
50 type request = {
51 req_headers : ( string * string ) list;
52 req_user_agent : string;
53 req_accept : string;
54 req_proxy : (string * int * (string * string) option) option; (* (host,port,(login,password)) *)
55 mutable req_url : url;
56 mutable req_gzip : bool;
57 mutable req_save_to_file_time : float;
58 req_request : http_request;
59 req_referer : Url.url option;
60 req_retry : int;
61 req_max_retry : int;
62 req_save : bool;
63 req_max_total_time : float;
64 req_filter_ip : (Ip.t -> bool);
67 type content_handler =
68 int64 -> (string * string) list -> TcpBufferedSocket.t -> int -> unit
70 let log_prefix = "[HTTPcl]"
72 let lprintf_nl fmt =
73 lprintf_nl2 log_prefix fmt
75 let basic_request = {
76 req_url = Url.of_string "http://mldonkey.sf.net/";
77 req_referer = None;
78 req_save_to_file_time = 0.;
79 req_request = GET;
80 req_gzip = false;
81 req_proxy = None;
82 req_headers = [];
83 req_user_agent = "Wget 1.4";
84 req_accept = "*/*";
85 req_retry = 0;
86 req_max_retry = 0;
87 req_save = false;
88 req_max_total_time = infinite_timeout;
89 req_filter_ip = (fun _ -> true);
92 let make_full_request r =
93 let url = r.req_url in
94 let args = url.args in
95 let res = Buffer.create 80 in
96 let is_real_post = r.req_request = POST && args <> [] in
97 if is_real_post
98 then Buffer.add_string res "POST "
99 else
100 Buffer.add_string res (if r.req_request = HEAD then "HEAD " else "GET ");
101 Buffer.add_string res (
102 let url =
103 if r.req_proxy <> None
104 then Url.to_string_no_args url
105 else url.short_file
107 (* I get a lot more bittorrent urls with this line: *)
108 let url = (Str.global_replace (Str.regexp " ") "%20" url) in
109 let url = if is_real_post then url else
110 Url.put_args url args
112 url);
113 Printf.bprintf res " HTTP/1.0\r\nHost: %s%s\r\n" url.server (if url.port != 80 then Printf.sprintf ":%d" url.port else "");
114 List.iter (fun (a,b) ->
115 Printf.bprintf res "%s: %s\r\n" a b
116 ) r.req_headers;
117 Printf.bprintf res "Accept-Encoding: gzip\r\n";
118 Printf.bprintf res "User-Agent: %s\r\n" r.req_user_agent;
119 Printf.bprintf res "Accept: %s\r\n" r.req_accept;
120 Printf.bprintf res "Connection: close\r\n";
121 begin match r.req_referer with
122 | None -> ()
123 | Some url -> Printf.bprintf res "Referer: %s\r\n" (Url.to_string_no_args url)
124 end;
125 begin match r.req_proxy with
126 | Some (_,_,Some (login,password)) ->
127 Printf.bprintf res "Proxy-Authorization: Basic %s\n" (Base64.encode (login ^ ":" ^ password))
128 | _ -> ()
129 end;
130 if url.user <> "" then begin
131 let userpass = Printf.sprintf "%s:%s" url.user url.passwd in
132 Printf.bprintf res "Authorization: Basic %s\r\n" (Base64.encode userpass)
133 end;
134 if is_real_post then begin
135 let post = Buffer.create 80 in
136 let rec make_post = function
137 | [] -> assert false
138 | [a, b] ->
139 Printf.bprintf post "%s%c%s" (Url.encode a) '=' (Url.encode b)
140 | (a,b)::l ->
141 Printf.bprintf post "%s%c%s%c"
142 (Url.encode a) '=' (Url.encode b) '&';
143 make_post l in
144 make_post args;
145 Printf.bprintf res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
146 (Buffer.length post) (Buffer.contents post)
147 end else
148 Buffer.add_string res "\r\n";
149 let s = Buffer.contents res in
150 if !verbose then
151 lprintf_nl "make_full_request on URL: %s" (String.escaped s);
154 let split_head s =
155 let rec iter pos1 res =
157 let pos3 = String.index_from s pos1 '\n' in
158 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '\r' then pos3 - 1 else pos3 in
159 let line = String.sub s pos1 (pos2 - pos1) in
160 if line = "" then List.rev res else
161 iter (pos3+1) (line :: res)
162 with _ ->
163 let last_line = String.sub s pos1 (String.length s - pos1) in
164 List.rev (if last_line = "" then res else last_line :: res)
166 iter 0 []
168 let parse_header headers_handler sock header =
169 let headers = split_head header in
170 match headers with
171 [] -> failwith "Ill formed reply"
172 | ans :: headers ->
173 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans;
174 let ans_code = int_of_string (String.sub ans 9 3) in
175 let headers = List.map (fun s ->
176 let sep = String.index s ':' in
177 (* TODO: we should lowercase the names here!
178 The header-names are case-insensitive,
179 therefore we only use lowercased names. *)
180 let name_head = String.sub s 0 sep in
181 let size = String.length s in
182 let content_head = String.sub s (sep+2) (size-sep-2) in
183 (name_head, content_head)
184 ) headers in
186 headers_handler sock ans_code headers;
187 with _ ->
188 TcpBufferedSocket.close sock (Closed_for_error "bad header")
190 let read_header header_handler sock nread =
191 let b = TcpBufferedSocket.buf sock in
192 let end_pos = b.pos + b.len in
193 let new_pos = end_pos - nread in
194 let new_pos = max 0 (new_pos - 1) in
196 lprintf "received [%s]" (String.escaped
197 (String.sub b.buf new_pos nread));
199 let rec iter i =
200 let end_pos = b.pos + b.len in
201 if i < end_pos then
202 if Bytes.get b.buf i = '\n' && i <= end_pos - 2 then
203 let c = (Bytes.get b.buf (i+1)) in
204 if c = '\n' then
205 let len = i + 2 - b.pos in
206 let header = Bytes.sub_string b.buf b.pos len in
207 buf_used b len;
208 header_handler sock header
209 else
210 if c = '\r' && i <= end_pos - 3 && (Bytes.get b.buf (i+2)) = '\n' then
211 let len = i + 3 - b.pos in
212 let header = Bytes.sub_string b.buf b.pos len in
213 buf_used b len;
214 header_handler sock header
215 else
216 iter (i+1)
217 else
218 iter (i+1)
219 else
222 iter new_pos
224 let http_reply_handler nr headers_handler sock nread =
225 (* lprintf "http_reply_handler\n"; *)
226 nr := true;
227 read_header (parse_header headers_handler) sock nread
230 let def_ferr = (fun _ -> ())
232 let rec get_page r content_handler f ferr =
233 let ok = ref false in
234 let ferr =
235 let err_done = ref false in (* call not more than once *)
236 fun c -> if not !err_done then begin err_done := true; ferr c; end
238 let rec get_url level r =
240 let url = r.req_url in
241 let level = r.req_retry in
242 let request = make_full_request r in
243 let server, port =
244 match r.req_proxy with
245 | None -> url.server, url.port
246 | Some (s, p, _) -> s, p
248 (* lprintf "async_ip ...\n"; *)
249 Ip.async_ip server (fun ip ->
250 match r.req_filter_ip ip with
251 | false -> ferr (`Block ip)
252 | true ->
253 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
254 let token = create_token unlimited_connection_manager in
255 let sock = TcpBufferedSocket.connect token "http client connecting"
256 (try Ip.to_inet_addr ip with e -> raise Not_found) port
257 (fun sock e ->
258 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
259 match e with (* FIXME content-length check *)
260 | BASIC_EVENT (CLOSED (Closed_by_user | Closed_by_peer)) when !ok -> f ()
261 | BASIC_EVENT (CLOSED reason) -> ferr (`RST reason)
262 | BASIC_EVENT LTIMEOUT -> close sock Closed_for_lifetime
263 | _ -> ())
266 let nread = ref false in
267 if !verbose then
268 lprintf_nl "get_page: %s" (String.escaped request);
269 TcpBufferedSocket.write_string sock request;
270 TcpBufferedSocket.set_reader sock (http_reply_handler nread
271 (default_headers_handler url level));
272 set_rtimeout sock 5.;
273 set_lifetime sock r.req_max_total_time;
275 (fun () -> ferr `DNS);
276 with e ->
277 lprintf_nl "error in get_url";
278 raise Not_found
280 and default_headers_handler old_url level sock ans_code headers =
281 let print_headers () =
282 List.iter (fun (name, value) ->
283 lprintf_nl "[%s]=[%s]" name value;
284 ) headers;
286 if !verbose then print_headers ();
287 match ans_code with
288 | 200 ->
289 ok := true;
290 let content_length = ref (-1L) in
291 List.iter (fun (name, content) ->
292 match String.lowercase name with
293 | "content-length" ->
294 (try
295 content_length := Int64.of_string content
296 with _ ->
297 lprintf_nl "bad content length [%s]" content)
298 | "content-encoding" ->
299 if String.lowercase content = "gzip" then r.req_gzip <- true
300 | _ -> ()
301 ) headers;
302 let location = "Location", Url.to_string old_url in
303 let content_handler = content_handler !content_length (location::headers) in
304 set_reader sock content_handler;
305 let buf = TcpBufferedSocket.buf sock in
306 if buf.len > 0 then
307 content_handler sock buf.len
309 | 301 | 302 | 304 ->
310 if !verbose then lprintf_nl "%d: Redirect" ans_code;
311 let retrynum = r.req_retry in
312 if retrynum < r.req_max_retry then begin
314 let url = ref "" in
315 List.iter (fun (name, content) ->
316 if String.lowercase name = "location" then
317 url := content;
318 ) headers;
319 if !verbose then print_headers ();
320 let url =
321 if String2.check_prefix !url "." then url := String2.after !url 1;
322 if String.length !url > 0 && !url.[0] <> '/'
323 then !url
324 else Printf.sprintf "http://%s%s%s"
325 old_url.Url.server
326 (if old_url.Url.port = 80 then "" else Printf.sprintf ":%d" old_url.Url.port)
327 !url
330 if !verbose then lprintf_nl "Redirected to %s" url;
331 r.req_url <- (Url.of_string url);
332 let r = { r with
333 req_url = Url.of_string url;
334 req_retry = retrynum+1
335 } in
336 get_page r content_handler f ferr
338 with e ->
339 lprintf_nl "error understanding redirect response %d" ans_code;
340 print_headers ();
341 raise Not_found
344 else begin
345 lprintf_nl "more than %d redirections, aborting." r.req_max_retry;
346 raise Not_found
349 | 400 when r.req_request = HEAD ->
350 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args r.req_url);
351 let r2 = {
352 r with
353 req_request = GET;
354 } in
355 get_page r2 content_handler f ferr
357 | 404 ->
358 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args r.req_url);
359 close sock (Closed_for_error "bad reply");
360 ferr (`HTTP ans_code);
361 raise Not_found
363 | 502 | 503 | 504 ->
364 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
365 let retrynum = r.req_retry in
366 if retrynum < r.req_max_retry then begin
367 if !verbose then print_headers ();
368 let seconds = (retrynum+1)*10 in
369 lprintf_nl "retry %d/%d in %d seconds for %s"
370 (retrynum+1) r.req_max_retry seconds (Url.to_string_no_args r.req_url);
371 let r = { r with
372 req_retry = retrynum+1
373 } in
374 add_timer (float(seconds)) (fun t -> get_page r content_handler f ferr)
376 else begin
377 lprintf_nl "more than %d retries, aborting." r.req_max_retry;
378 ferr (`HTTP ans_code);
379 raise Not_found
382 | _ ->
383 lprintf_nl "%d: bad reply for: %s"
384 ans_code (Url.to_string_no_args r.req_url);
385 close sock (Closed_for_error "bad reply");
386 ferr (`HTTP ans_code);
387 raise Not_found
389 get_url 0 r
391 (** Copy all data from [input] to [output] *)
392 let io_copy input output =
394 let size = 16 * 1024 in
395 let s = String.create size in
396 while true do
397 let n = IO.input input s 0 size in
398 if n = 0 then raise IO.No_more_input;
399 ignore (IO.really_output output s 0 n)
400 done
401 with IO.No_more_input -> ()
403 let wget r f =
405 let file_buf = Buffer.create 1000 in
406 let file_size = ref 0L in
409 get_page r (fun maxlen headers sock nread ->
410 (* lprintf "received %d\n" nread; *)
411 let buf = TcpBufferedSocket.buf sock in
413 if nread > 0 then begin
414 let left =
415 if maxlen >= 0L then
416 min (Int64.to_int (maxlen -- !file_size)) nread
417 else nread
419 Buffer.add_bytes file_buf (Bytes.sub buf.buf buf.pos left);
420 buf_used buf left;
421 file_size := !file_size ++ (Int64.of_int left);
422 if nread > left then
423 TcpBufferedSocket.close sock Closed_by_user
426 (fun _ ->
427 let s = Buffer.contents file_buf in
428 if s = "" then begin
429 lprintf_nl "Empty content for url %s"
430 (Url.to_string r.req_url);
431 end;
433 let webinfos_dir = "web_infos" in
434 Unix2.safe_mkdir webinfos_dir;
435 Unix2.can_write_to_directory webinfos_dir;
437 let base = Filename.basename r.req_url.Url.short_file in
438 (* Base could be "." for http://site.com/ *)
439 let base = if base = "."
440 then begin
441 let prng = Random.State.make_self_init () in
442 let rnd = (Random.State.bits prng) land 0xFFFFFF in
443 Printf.sprintf "http_%06x.tmp" rnd
444 end else base
447 let filename = Filename.concat webinfos_dir base in
448 if !verbose then lprintf_nl "Filename: %s" filename;
449 if r.req_gzip then
450 begin
452 Unix2.tryopen_write_bin filename begin fun oc ->
453 let gz = Gzip.input_io (IO.input_string s) in
454 io_copy gz (IO.output_channel oc)
456 with e ->
457 lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e) (Url.to_string r.req_url);
458 Sys.remove filename;
459 raise Not_found
461 else
462 Unix2.tryopen_write_bin filename (fun oc -> output_string oc s);
463 if r.req_save_to_file_time <> 0. then
464 Unix.utimes filename r.req_save_to_file_time r.req_save_to_file_time;
466 (f filename : unit);
467 if not r.req_save then Sys.remove filename
468 with e ->
469 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e) filename;
470 Sys.remove filename;
471 raise Not_found
472 ) def_ferr
473 with e ->
474 lprintf_nl "Exception %s in wget" (Printexc2.to_string e);
475 raise Not_found
477 let whead2 r f ferr =
478 get_page r
479 (fun maxlen headers ->
480 (try f headers with _ -> ());
481 fun sock nread ->
482 close sock Closed_by_user
484 (fun _ -> ())
485 ferr
487 let whead r f = whead2 r f def_ferr
489 let wget_string r f ?(ferr=def_ferr) progress =
491 let file_buf = Buffer.create 1000 in
492 let file_size = ref 0L in
494 get_page r
495 (fun maxlen headers sock nread ->
496 let buf = TcpBufferedSocket.buf sock in
498 if nread > 0 then begin
499 let left =
500 if maxlen >= 0L then
501 min (Int64.to_int (maxlen -- !file_size)) nread
502 else nread
504 Buffer.add_bytes file_buf (Bytes.sub buf.buf buf.pos left);
505 progress left maxlen;
506 buf_used buf left;
507 file_size := !file_size ++ (Int64.of_int left);
508 if nread > left then
509 TcpBufferedSocket.close sock Closed_by_user
510 end)
511 (fun _ ->
512 let content =
513 if r.req_gzip then
515 let io = Gzip.input_io (IO.input_string (Buffer.contents file_buf)) in
516 IO.read_all io
517 with e ->
518 lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e) (Url.to_string r.req_url);
519 raise Not_found
520 else
521 Buffer.contents file_buf
523 f content
524 ) ferr
527 let split_header header =
528 let len = String.length header in
529 let header_bytes = Bytes.of_string header in
530 for i = 0 to len - 1 do
531 if Bytes.get header_bytes i = '\r' then
532 Bytes.set header_bytes i '\n'
533 done;
534 for i = len - 1 downto 1 do
535 if Bytes.get header_bytes (i - 1) = '\n' then
536 if Bytes.get header_bytes i = ' ' then (
537 Bytes.set header_bytes i ',';
538 Bytes.set header_bytes (i - 1) ','
539 ) else if Bytes.get header_bytes i = ',' then
540 Bytes.set header_bytes (i - 1) ','
541 done;
542 String2.split_simplify (Bytes.unsafe_to_string header_bytes) '\n'
544 let cut_headers headers =
546 List.map (fun s ->
547 let pos = String.index s ':' in
548 let len = String.length s in
549 let key = String.sub s 0 pos in
550 String.lowercase key, if pos+1 < len && s.[pos+1] = ' ' then
551 String.sub s (pos+2) (len-pos-2), key
552 else
553 String.sub s (pos+1) (len-pos-1), key
554 ) headers
555 with e ->
556 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e);
557 raise e