patch #7573
[mldonkey.git] / src / utils / net / http_client.ml
blob22a4f2c73a0c3472defbd887b19b1637d3dde902
1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 (* HTTP Requests:
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
25 open Printf2
26 open BasicSocket
27 open Unix
28 open Url
29 open TcpBufferedSocket
30 open Int64ops
33 type http_request =
34 GET
35 | POST
36 | HEAD
37 | PUT
38 | DELETE
39 | TRACE
41 type error = [ `HTTP of int | `RST of BasicSocket.close_reason | `DNS ]
43 let show_error = function
44 | `HTTP code -> Printf.sprintf "HTTP error code %d" code
45 | `RST reason -> Printf.sprintf "Connection closed : %s" (BasicSocket.string_of_reason reason)
46 | `DNS -> Printf.sprintf "DNS resolution failed"
48 let verbose = ref false
50 type request = {
51 req_headers : ( string * string ) list;
52 req_user_agent : string;
53 req_accept : string;
54 req_proxy : (string * int * (string * string) option) option; (* (host,port,(login,password)) *)
55 mutable req_url : url;
56 mutable req_gzip : bool;
57 mutable req_save_to_file_time : float;
58 req_request : http_request;
59 req_referer : Url.url option;
60 req_retry : int;
61 req_max_retry : int;
62 req_save : bool;
63 req_max_total_time : float;
66 type content_handler =
67 int64 -> (string * string) list -> TcpBufferedSocket.t -> int -> unit
69 let log_prefix = "[HTTPcl]"
71 let lprintf_nl fmt =
72 lprintf_nl2 log_prefix fmt
74 let basic_request = {
75 req_url = Url.of_string "http://www.mldonkey.org/";
76 req_referer = None;
77 req_save_to_file_time = 0.;
78 req_request = GET;
79 req_gzip = false;
80 req_proxy = None;
81 req_headers = [];
82 req_user_agent = "Wget 1.4";
83 req_accept = "*/*";
84 req_retry = 0;
85 req_max_retry = 0;
86 req_save = false;
87 req_max_total_time = infinite_timeout;
90 let make_full_request r =
91 let url = r.req_url in
92 let args = url.args in
93 let res = Buffer.create 80 in
94 let is_real_post = r.req_request = POST && args <> [] in
95 if is_real_post
96 then Buffer.add_string res "POST "
97 else
98 Buffer.add_string res (if r.req_request = HEAD then "HEAD " else "GET ");
99 Buffer.add_string res (
100 let url =
101 if r.req_proxy <> None
102 then Url.to_string_no_args url
103 else url.short_file
105 (* I get a lot more bittorrent urls with this line: *)
106 let url = (Str.global_replace (Str.regexp " ") "%20" url) in
107 let url = if is_real_post then url else
108 Url.put_args url args
110 url);
111 Printf.bprintf res " HTTP/1.0\r\nHost: %s%s\r\n" url.server (if url.port != 80 then Printf.sprintf ":%d" url.port else "");
112 List.iter (fun (a,b) ->
113 Printf.bprintf res "%s: %s\r\n" a b
114 ) r.req_headers;
115 Printf.bprintf res "Accept-Encoding: gzip\r\n";
116 Printf.bprintf res "User-Agent: %s\r\n" r.req_user_agent;
117 Printf.bprintf res "Accept: %s\r\n" r.req_accept;
118 Printf.bprintf res "Connection: close\r\n";
119 begin match r.req_referer with
120 | None -> ()
121 | Some url -> Printf.bprintf res "Referer: %s\r\n" (Url.to_string_no_args url)
122 end;
123 begin match r.req_proxy with
124 | Some (_,_,Some (login,password)) ->
125 Printf.bprintf res "Proxy-Authorization: Basic %s\n" (Base64.encode (login ^ ":" ^ password))
126 | _ -> ()
127 end;
128 if url.user <> "" then begin
129 let userpass = Printf.sprintf "%s:%s" url.user url.passwd in
130 Printf.bprintf res "Authorization: Basic %s\r\n" (Base64.encode userpass)
131 end;
132 if is_real_post then begin
133 let post = Buffer.create 80 in
134 let rec make_post = function
135 | [] -> assert false
136 | [a, b] ->
137 Printf.bprintf post "%s%c%s" (Url.encode a) '=' (Url.encode b)
138 | (a,b)::l ->
139 Printf.bprintf post "%s%c%s%c"
140 (Url.encode a) '=' (Url.encode b) '&';
141 make_post l in
142 make_post args;
143 Printf.bprintf res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
144 (Buffer.length post) (Buffer.contents post)
145 end else
146 Buffer.add_string res "\r\n";
147 let s = Buffer.contents res in
148 if !verbose then
149 lprintf_nl "make_full_request on URL: %s" (String.escaped s);
152 let split_head s =
153 let rec iter pos1 res =
155 let pos3 = String.index_from s pos1 '\n' in
156 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '\r' then pos3 - 1 else pos3 in
157 let line = String.sub s pos1 (pos2 - pos1) in
158 if line = "" then List.rev res else
159 iter (pos3+1) (line :: res)
160 with _ ->
161 let last_line = String.sub s pos1 (String.length s - pos1) in
162 List.rev (if last_line = "" then res else last_line :: res)
164 iter 0 []
166 let parse_header headers_handler sock header =
167 let headers = split_head header in
168 match headers with
169 [] -> failwith "Ill formed reply"
170 | ans :: headers ->
171 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans;
172 let ans_code = int_of_string (String.sub ans 9 3) in
173 let headers = List.map (fun s ->
174 let sep = String.index s ':' in
175 (* TODO: we should lowercase the names here!
176 The header-names are case-insensitive,
177 therefore we only use lowercased names. *)
178 let name_head = String.sub s 0 sep in
179 let size = String.length s in
180 let content_head = String.sub s (sep+2) (size-sep-2) in
181 (name_head, content_head)
182 ) headers in
184 headers_handler sock ans_code headers;
185 with _ ->
186 TcpBufferedSocket.close sock (Closed_for_error "bad header")
188 let read_header header_handler sock nread =
189 let b = TcpBufferedSocket.buf sock in
190 let end_pos = b.pos + b.len in
191 let new_pos = end_pos - nread in
192 let new_pos = max 0 (new_pos - 1) in
194 lprintf "received [%s]" (String.escaped
195 (String.sub b.buf new_pos nread));
197 let rec iter i =
198 let end_pos = b.pos + b.len in
199 if i < end_pos then
200 if b.buf.[i] = '\n' && i <= end_pos - 2 then
201 let c = b.buf.[i+1] in
202 if c = '\n' then
203 let len = i + 2 - b.pos in
204 let header = String.sub b.buf b.pos len in
205 buf_used b len;
206 header_handler sock header
207 else
208 if c = '\r' && i <= end_pos - 3 && b.buf.[i+2] = '\n' then
209 let len = i + 3 - b.pos in
210 let header = String.sub b.buf b.pos len in
211 buf_used b len;
212 header_handler sock header
213 else
214 iter (i+1)
215 else
216 iter (i+1)
217 else
220 iter new_pos
222 let http_reply_handler nr headers_handler sock nread =
223 (* lprintf "http_reply_handler\n"; *)
224 nr := true;
225 read_header (parse_header headers_handler) sock nread
228 let def_ferr = (fun _ -> ())
230 let rec get_page r content_handler f ferr =
231 let ok = ref false in
232 let ferr =
233 let err_done = ref false in (* call not more than once *)
234 fun c -> if not !err_done then begin err_done := true; ferr c; end
236 let rec get_url level r =
238 let url = r.req_url in
239 let level = r.req_retry in
240 let request = make_full_request r in
241 let server, port =
242 match r.req_proxy with
243 | None -> url.server, url.port
244 | Some (s, p, _) -> s, p
246 (* lprintf "async_ip ...\n"; *)
247 Ip.async_ip server (fun ip ->
248 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
249 let token = create_token unlimited_connection_manager in
250 let sock = TcpBufferedSocket.connect token "http client connecting"
251 (try Ip.to_inet_addr ip with e -> raise Not_found) port
252 (fun sock e ->
253 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
254 match e with (* FIXME content-length check *)
255 | BASIC_EVENT (CLOSED (Closed_by_user | Closed_by_peer)) when !ok -> f ()
256 | BASIC_EVENT (CLOSED reason) -> ferr (`RST reason)
257 | BASIC_EVENT LTIMEOUT -> close sock Closed_for_lifetime
258 | _ -> ())
261 let nread = ref false in
262 if !verbose then
263 lprintf_nl "get_page: %s" (String.escaped request);
264 TcpBufferedSocket.write_string sock request;
265 TcpBufferedSocket.set_reader sock (http_reply_handler nread
266 (default_headers_handler url level));
267 set_rtimeout sock 5.;
268 set_lifetime sock r.req_max_total_time;
270 (fun () -> ferr `DNS);
271 with e ->
272 lprintf_nl "error in get_url";
273 raise Not_found
275 and default_headers_handler old_url level sock ans_code headers =
276 let print_headers () =
277 List.iter (fun (name, value) ->
278 lprintf_nl "[%s]=[%s]" name value;
279 ) headers;
281 if !verbose then print_headers ();
282 match ans_code with
283 | 200 ->
284 ok := true;
285 let content_length = ref (-1L) in
286 List.iter (fun (name, content) ->
287 match String.lowercase name with
288 | "content-length" ->
289 (try
290 content_length := Int64.of_string content
291 with _ ->
292 lprintf_nl "bad content length [%s]" content)
293 | "content-encoding" ->
294 if String.lowercase content = "gzip" then r.req_gzip <- true
295 | _ -> ()
296 ) headers;
297 let location = "Location", Url.to_string old_url in
298 let content_handler = content_handler !content_length (location::headers) in
299 set_reader sock content_handler;
300 let buf = TcpBufferedSocket.buf sock in
301 if buf.len > 0 then
302 content_handler sock buf.len
304 | 301 | 302 | 304 ->
305 if !verbose then lprintf_nl "%d: Redirect" ans_code;
306 let retrynum = r.req_retry in
307 if retrynum < r.req_max_retry then begin
309 let url = ref "" in
310 List.iter (fun (name, content) ->
311 if String.lowercase name = "location" then
312 url := content;
313 ) headers;
314 if !verbose then print_headers ();
315 let url =
316 if String2.check_prefix !url "." then url := String2.after !url 1;
317 if String.length !url > 0 && !url.[0] <> '/'
318 then !url
319 else Printf.sprintf "http://%s%s%s"
320 old_url.Url.server
321 (if old_url.Url.port = 80 then "" else Printf.sprintf ":%d" old_url.Url.port)
322 !url
325 if !verbose then lprintf_nl "Redirected to %s" url;
326 r.req_url <- (Url.of_string url);
327 let r = { r with
328 req_url = Url.of_string url;
329 req_retry = retrynum+1
330 } in
331 get_page r content_handler f ferr
333 with e ->
334 lprintf_nl "error understanding redirect response %d" ans_code;
335 print_headers ();
336 raise Not_found
339 else begin
340 lprintf_nl "more than %d redirections, aborting." r.req_max_retry;
341 raise Not_found
344 | 400 when r.req_request = HEAD ->
345 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args r.req_url);
346 let r2 = {
347 r with
348 req_request = GET;
349 } in
350 get_page r2 content_handler f ferr
352 | 404 ->
353 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args r.req_url);
354 close sock (Closed_for_error "bad reply");
355 ferr (`HTTP ans_code);
356 raise Not_found
358 | 502 | 503 | 504 ->
359 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
360 let retrynum = r.req_retry in
361 if retrynum < r.req_max_retry then begin
362 if !verbose then print_headers ();
363 let seconds = (retrynum+1)*10 in
364 lprintf_nl "retry %d/%d in %d seconds for %s"
365 (retrynum+1) r.req_max_retry seconds (Url.to_string_no_args r.req_url);
366 let r = { r with
367 req_retry = retrynum+1
368 } in
369 add_timer (float(seconds)) (fun t -> get_page r content_handler f ferr)
371 else begin
372 lprintf_nl "more than %d retries, aborting." r.req_max_retry;
373 ferr (`HTTP ans_code);
374 raise Not_found
377 | _ ->
378 lprintf_nl "%d: bad reply for: %s"
379 ans_code (Url.to_string_no_args r.req_url);
380 close sock (Closed_for_error "bad reply");
381 ferr (`HTTP ans_code);
382 raise Not_found
384 get_url 0 r
386 (** Copy all data from [input] to [output] *)
387 let io_copy input output =
389 let size = 16 * 1024 in
390 let s = String.create size in
391 while true do
392 let n = IO.input input s 0 size in
393 if n = 0 then raise IO.No_more_input;
394 ignore (IO.really_output output s 0 n)
395 done
396 with IO.No_more_input -> ()
398 let wget r f =
400 let file_buf = Buffer.create 1000 in
401 let file_size = ref 0L in
404 get_page r (fun maxlen headers sock nread ->
405 (* lprintf "received %d\n" nread; *)
406 let buf = TcpBufferedSocket.buf sock in
408 if nread > 0 then begin
409 let left =
410 if maxlen >= 0L then
411 min (Int64.to_int (maxlen -- !file_size)) nread
412 else nread
414 Buffer.add_string file_buf (String.sub buf.buf buf.pos left);
415 buf_used buf left;
416 file_size := !file_size ++ (Int64.of_int left);
417 if nread > left then
418 TcpBufferedSocket.close sock Closed_by_user
421 (fun _ ->
422 let s = Buffer.contents file_buf in
423 if s = "" then begin
424 lprintf_nl "Empty content for url %s"
425 (Url.to_string r.req_url);
426 end;
428 let webinfos_dir = "web_infos" in
429 Unix2.safe_mkdir webinfos_dir;
430 Unix2.can_write_to_directory webinfos_dir;
432 let base = Filename.basename r.req_url.Url.short_file in
433 (* Base could be "." for http://site.com/ *)
434 let base = if base = "."
435 then begin
436 let prng = Random.State.make_self_init () in
437 let rnd = (Random.State.bits prng) land 0xFFFFFF in
438 Printf.sprintf "http_%06x.tmp" rnd
439 end else base
442 let filename = Filename.concat webinfos_dir base in
443 if !verbose then lprintf_nl "Filename: %s" filename;
444 if r.req_gzip then
445 begin
447 Unix2.tryopen_write_bin filename begin fun oc ->
448 let gz = Gzip.input_io (IO.input_string s) in
449 io_copy gz (IO.output_channel oc)
451 with e ->
452 lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e) (Url.to_string r.req_url);
453 Sys.remove filename;
454 raise Not_found
456 else
457 Unix2.tryopen_write_bin filename (fun oc -> output_string oc s);
458 if r.req_save_to_file_time <> 0. then
459 Unix.utimes filename r.req_save_to_file_time r.req_save_to_file_time;
461 (f filename : unit);
462 if not r.req_save then Sys.remove filename
463 with e ->
464 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e) filename;
465 Sys.remove filename;
466 raise Not_found
467 ) def_ferr
468 with e ->
469 lprintf_nl "Exception %s in wget" (Printexc2.to_string e);
470 raise Not_found
472 let whead2 r f ferr =
473 get_page r
474 (fun maxlen headers ->
475 (try f headers with _ -> ());
476 fun sock nread ->
477 close sock Closed_by_user
479 (fun _ -> ())
480 ferr
482 let whead r f = whead2 r f def_ferr
484 let wget_string r f ?(ferr=def_ferr) progress =
486 let file_buf = Buffer.create 1000 in
487 let file_size = ref 0L in
489 get_page r
490 (fun maxlen headers sock nread ->
491 let buf = TcpBufferedSocket.buf sock in
493 if nread > 0 then begin
494 let left =
495 if maxlen >= 0L then
496 min (Int64.to_int (maxlen -- !file_size)) nread
497 else nread
499 Buffer.add_string file_buf (String.sub buf.buf buf.pos left);
500 progress left maxlen;
501 buf_used buf left;
502 file_size := !file_size ++ (Int64.of_int left);
503 if nread > left then
504 TcpBufferedSocket.close sock Closed_by_user
505 end)
506 (fun _ ->
507 let content =
508 if r.req_gzip then
510 let io = Gzip.input_io (IO.input_string (Buffer.contents file_buf)) in
511 IO.read_all io
512 with e ->
513 lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e) (Url.to_string r.req_url);
514 raise Not_found
515 else
516 Buffer.contents file_buf
518 f content
519 ) ferr
522 let split_header header =
523 for i = 0 to String.length header - 1 do
524 if header.[i] = '\r' then header.[i] <- '\n';
525 done;
526 for i = String.length header - 1 downto 1 do
527 if header.[i-1] = '\n' then
528 if header.[i] = ' ' then (header.[i] <- ','; header.[i-1] <- ',')
529 else
530 if header.[i] = ',' then header.[i-1] <- ',';
531 done;
532 String2.split_simplify header '\n'
534 let cut_headers headers =
536 List.map (fun s ->
537 let pos = String.index s ':' in
538 let len = String.length s in
539 let key = String.sub s 0 pos in
540 String.lowercase key, if pos+1 < len && s.[pos+1] = ' ' then
541 String.sub s (pos+2) (len-pos-2), key
542 else
543 String.sub s (pos+1) (len-pos-1), key
544 ) headers
545 with e ->
546 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e);
547 raise e