patch #7262
[mldonkey.git] / src / utils / net / http_client.ml
blob135ccc8df382d481585aa178b9b71804aa0c2851
1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 (* HTTP Requests:
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
25 open Printf2
26 open BasicSocket
27 open Unix
28 open Url
29 open TcpBufferedSocket
30 open Int64ops
33 type http_request =
34 GET
35 | POST
36 | HEAD
37 | PUT
38 | DELETE
39 | TRACE
41 let verbose = ref false
43 type request = {
44 req_headers : ( string * string ) list;
45 req_user_agent : string;
46 req_accept : string;
47 req_proxy : (string * int) option;
48 mutable req_url : url;
49 mutable req_save_to_file_time : float;
50 req_request : http_request;
51 req_referer : Url.url option;
52 req_retry : int;
53 req_max_retry : int;
54 req_save : bool;
55 req_max_total_time : float;
58 type content_handler =
59 int64 -> (string * string) list -> TcpBufferedSocket.t -> int -> unit
61 let log_prefix = "[HTTPcl]"
63 let lprintf_nl fmt =
64 lprintf_nl2 log_prefix fmt
66 let basic_request = {
67 req_url = Url.of_string "http://www.mldonkey.org/";
68 req_referer = None;
69 req_save_to_file_time = 0.;
70 req_request = GET;
71 req_proxy = None;
72 req_headers = [];
73 req_user_agent = "Wget 1.4";
74 req_accept = "*/*";
75 req_retry = 0;
76 req_max_retry = 0;
77 req_save = false;
78 req_max_total_time = infinite_timeout;
81 let make_full_request r =
82 let url = r.req_url in
83 let args = url.args in
84 let res = Buffer.create 80 in
85 let is_real_post = r.req_request = POST && args <> [] in
86 if is_real_post
87 then Buffer.add_string res "POST "
88 else
89 Buffer.add_string res (if r.req_request = HEAD then "HEAD " else "GET ");
90 Buffer.add_string res (
91 let url =
92 if r.req_proxy <> None
93 then Url.to_string_no_args url
94 else url.short_file
96 (* I get a lot more bittorrent urls with this line: *)
97 let url = (Str.global_replace (Str.regexp " ") "%20" url) in
98 let url = if is_real_post then url else
99 Url.put_args url args
101 url);
102 Printf.bprintf res " HTTP/1.0\r\nHost: %s%s\r\n" url.server (if url.port != 80 then Printf.sprintf ":%d" url.port else "");
103 List.iter (fun (a,b) ->
104 Printf.bprintf res "%s: %s\r\n" a b
105 ) r.req_headers;
106 Printf.bprintf res "User-Agent: %s\r\n" r.req_user_agent;
107 Printf.bprintf res "Accept: %s\r\n" r.req_accept;
108 Printf.bprintf res "Connection: close\r\n";
109 (match r.req_referer with None -> ()
110 | Some url ->
111 Printf.bprintf res "Referer: %s\r\n" (Url.to_string_no_args url));
112 if url.user <> "" then begin
113 let userpass = Printf.sprintf "%s:%s" url.user url.passwd in
114 Printf.bprintf res "Authorization: Basic %s\r\n" (Base64.encode userpass)
115 end;
116 if is_real_post then begin
117 let post = Buffer.create 80 in
118 let rec make_post = function
119 | [] -> assert false
120 | [a, b] ->
121 Printf.bprintf post "%s%c%s" (Url.encode a) '=' (Url.encode b)
122 | (a,b)::l ->
123 Printf.bprintf post "%s%c%s%c"
124 (Url.encode a) '=' (Url.encode b) '&';
125 make_post l in
126 make_post args;
127 Printf.bprintf res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
128 (Buffer.length post) (Buffer.contents post)
129 end else
130 Buffer.add_string res "\r\n";
131 let s = Buffer.contents res in
132 if !verbose then
133 lprintf_nl "make_full_request on URL: %s" (String.escaped s);
136 let split_head s =
137 let rec iter pos1 res =
139 let pos3 = String.index_from s pos1 '\n' in
140 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '\r' then pos3 - 1 else pos3 in
141 let line = String.sub s pos1 (pos2 - pos1) in
142 if line = "" then List.rev res else
143 iter (pos3+1) (line :: res)
144 with _ ->
145 let last_line = String.sub s pos1 (String.length s - pos1) in
146 List.rev (if last_line = "" then res else last_line :: res)
148 iter 0 []
150 let parse_header headers_handler sock header =
151 let headers = split_head header in
152 match headers with
153 [] -> failwith "Ill formed reply"
154 | ans :: headers ->
155 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans;
156 let ans_code = int_of_string (String.sub ans 9 3) in
157 let headers = List.map (fun s ->
158 let sep = String.index s ':' in
159 (* TODO: we should lowercase the names here!
160 The header-names are case-insensitive,
161 therefore we only use lowercased names. *)
162 let name_head = String.sub s 0 sep in
163 let size = String.length s in
164 let content_head = String.sub s (sep+2) (size-sep-2) in
165 (name_head, content_head)
166 ) headers in
168 headers_handler sock ans_code headers;
169 with _ ->
170 TcpBufferedSocket.close sock (Closed_for_error "bad header")
172 let read_header header_handler sock nread =
173 let b = TcpBufferedSocket.buf sock in
174 let end_pos = b.pos + b.len in
175 let new_pos = end_pos - nread in
176 let new_pos = max 0 (new_pos - 1) in
178 lprintf "received [%s]" (String.escaped
179 (String.sub b.buf new_pos nread));
181 let rec iter i =
182 let end_pos = b.pos + b.len in
183 if i < end_pos then
184 if b.buf.[i] = '\n' && i <= end_pos - 2 then
185 let c = b.buf.[i+1] in
186 if c = '\n' then
187 let len = i + 2 - b.pos in
188 let header = String.sub b.buf b.pos len in
189 buf_used b len;
190 header_handler sock header
191 else
192 if c = '\r' && i <= end_pos - 3 && b.buf.[i+2] = '\n' then
193 let len = i + 3 - b.pos in
194 let header = String.sub b.buf b.pos len in
195 buf_used b len;
196 header_handler sock header
197 else
198 iter (i+1)
199 else
200 iter (i+1)
201 else
204 iter new_pos
206 let http_reply_handler nr headers_handler sock nread =
207 (* lprintf "http_reply_handler\n"; *)
208 nr := true;
209 read_header (parse_header headers_handler) sock nread
212 let def_ferr = (fun c -> ())
214 let rec get_page r content_handler f ferr =
215 let ok = ref false in
216 let ferr =
217 let err_done = ref false in (* call not more than once *)
218 fun n -> if not !err_done then begin err_done := true; ferr n; end
220 let rec get_url level r =
222 let url = r.req_url in
223 let level = r.req_retry in
224 let request = make_full_request r in
225 let server, port =
226 match r.req_proxy with
227 | None -> url.server, url.port
228 | Some (s, p) -> s, p
230 (* lprintf "async_ip ...\n"; *)
231 Ip.async_ip server (fun ip ->
232 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
233 let token = create_token unlimited_connection_manager in
234 let sock = TcpBufferedSocket.connect token "http client connecting"
235 (try Ip.to_inet_addr ip with e -> raise Not_found) port
236 (fun sock e ->
237 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
238 match e with (* FIXME content-length check *)
239 | BASIC_EVENT (CLOSED (Closed_by_user | Closed_by_peer _)) when !ok -> f ()
240 | BASIC_EVENT (CLOSED _) -> ferr 0
241 | BASIC_EVENT LTIMEOUT -> close sock Closed_for_lifetime
242 | _ -> ())
245 let nread = ref false in
246 if !verbose then
247 lprintf_nl "get_page: %s" (String.escaped request);
248 TcpBufferedSocket.write_string sock request;
249 TcpBufferedSocket.set_reader sock (http_reply_handler nread
250 (default_headers_handler url level));
251 set_rtimeout sock 5.;
252 set_lifetime sock r.req_max_total_time;
254 ferr;
255 with e ->
256 lprintf_nl "error in get_url";
257 raise Not_found
259 and default_headers_handler old_url level sock ans_code headers =
260 let print_headers () =
261 List.iter (fun (name, value) ->
262 lprintf_nl "[%s]=[%s]" name value;
263 ) headers;
265 if !verbose then print_headers ();
266 match ans_code with
267 | 200 ->
268 ok := true;
269 let content_length = ref (-1L) in
270 List.iter (fun (name, content) ->
271 if String.lowercase name = "content-length" then
272 try content_length := Int64.of_string content
273 with _ -> lprintf_nl "bad content length [%s]" content;
274 ) headers;
275 let location = "Location", Url.to_string old_url in
276 let content_handler = content_handler !content_length (location::headers) in
277 set_reader sock content_handler;
278 let buf = TcpBufferedSocket.buf sock in
279 if buf.len > 0 then
280 content_handler sock buf.len
282 | 301 | 302 | 304 ->
283 if !verbose then lprintf_nl "%d: Redirect" ans_code;
284 let retrynum = r.req_retry in
285 if retrynum < r.req_max_retry then begin
287 let url = ref "" in
288 List.iter (fun (name, content) ->
289 if String.lowercase name = "location" then
290 url := content;
291 ) headers;
292 if !verbose then print_headers ();
293 let url =
294 if String2.check_prefix !url "." then url := String2.after !url 1;
295 if String.length !url > 0 && !url.[0] <> '/'
296 then !url
297 else Printf.sprintf "http://%s%s%s"
298 old_url.Url.server
299 (if old_url.Url.port = 80 then "" else Printf.sprintf ":%d" old_url.Url.port)
300 !url
303 if !verbose then lprintf_nl "Redirected to %s" url;
304 r.req_url <- (Url.of_string url);
305 let r = { r with
306 req_url = Url.of_string url;
307 req_retry = retrynum+1
308 } in
309 get_page r content_handler f ferr
311 with e ->
312 lprintf_nl "error understanding redirect response %d" ans_code;
313 print_headers ();
314 raise Not_found
317 else begin
318 lprintf_nl "more than %d redirections, aborting." r.req_max_retry;
319 raise Not_found
322 | 400 when r.req_request = HEAD ->
323 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args r.req_url);
324 let r2 = {
325 r with
326 req_request = GET;
327 } in
328 get_page r2 content_handler f ferr
330 | 404 ->
331 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args r.req_url);
332 close sock (Closed_for_error "bad reply");
333 ferr ans_code;
334 raise Not_found
336 | 502 | 503 | 504 ->
337 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
338 let retrynum = r.req_retry in
339 if retrynum < r.req_max_retry then begin
340 if !verbose then print_headers ();
341 let seconds = (retrynum+1)*10 in
342 lprintf_nl "retry %d/%d in %d seconds for %s"
343 (retrynum+1) r.req_max_retry seconds (Url.to_string_no_args r.req_url);
344 let r = { r with
345 req_retry = retrynum+1
346 } in
347 add_timer (float(seconds)) (fun t -> get_page r content_handler f ferr)
349 else begin
350 lprintf_nl "more than %d retries, aborting." r.req_max_retry;
351 ferr ans_code;
352 raise Not_found
355 | _ ->
356 lprintf_nl "%d: bad reply for: %s"
357 ans_code (Url.to_string_no_args r.req_url);
358 close sock (Closed_for_error "bad reply");
359 ferr ans_code;
360 raise Not_found
362 get_url 0 r
364 let wget r f =
366 let file_buf = Buffer.create 1000 in
367 let file_size = ref 0L in
370 get_page r (fun maxlen headers sock nread ->
371 (* lprintf "received %d\n" nread; *)
372 let buf = TcpBufferedSocket.buf sock in
374 if nread > 0 then begin
375 let left =
376 if maxlen >= 0L then
377 min (Int64.to_int (maxlen -- !file_size)) nread
378 else nread
380 Buffer.add_string file_buf (String.sub buf.buf buf.pos left);
381 buf_used buf left;
382 file_size := !file_size ++ (Int64.of_int left);
383 if nread > left then
384 TcpBufferedSocket.close sock Closed_by_user
387 (fun _ ->
388 let s = Buffer.contents file_buf in
389 if s = "" then begin
390 lprintf_nl "Empty content for url %s"
391 (Url.to_string r.req_url);
392 end;
394 let webinfos_dir = "web_infos" in
395 Unix2.safe_mkdir webinfos_dir;
396 Unix2.can_write_to_directory webinfos_dir;
398 let base = Filename.basename r.req_url.Url.short_file in
399 (* Base could be "." for http://site.com/ *)
400 let base = if base = "."
401 then begin
402 let prng = Random.State.make_self_init () in
403 let rnd = (Random.State.bits prng) land 0xFFFFFF in
404 Printf.sprintf "http_%06x.tmp" rnd
405 end else base
408 let filename = Filename.concat webinfos_dir base in
409 if !verbose then lprintf_nl "Filename: %s" filename;
410 Unix2.tryopen_write_bin filename (fun oc -> output_string oc s);
411 if r.req_save_to_file_time <> 0. then
412 Unix.utimes filename r.req_save_to_file_time r.req_save_to_file_time;
414 (f filename : unit);
415 if not r.req_save then Sys.remove filename
416 with e ->
417 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e) filename;
418 Sys.remove filename;
419 raise Not_found
420 ) def_ferr
421 with e ->
422 lprintf_nl "Exception %s in wget" (Printexc2.to_string e);
423 raise Not_found
425 let whead2 r f ferr =
426 get_page r
427 (fun maxlen headers ->
428 (try f headers with _ -> ());
429 fun sock nread ->
430 close sock Closed_by_user
432 (fun _ -> ())
433 ferr
435 let whead r f = whead2 r f def_ferr
437 let wget_string r f ?(ferr=def_ferr) progress =
439 let file_buf = Buffer.create 1000 in
440 let file_size = ref 0L in
442 get_page r
443 (fun maxlen headers sock nread ->
444 let buf = TcpBufferedSocket.buf sock in
446 if nread > 0 then begin
447 let left =
448 if maxlen >= 0L then
449 min (Int64.to_int (maxlen -- !file_size)) nread
450 else nread
452 Buffer.add_string file_buf (String.sub buf.buf buf.pos left);
453 progress left maxlen;
454 buf_used buf left;
455 file_size := !file_size ++ (Int64.of_int left);
456 if nread > left then
457 TcpBufferedSocket.close sock Closed_by_user
458 end)
459 (fun _ ->
460 f (Buffer.contents file_buf)
461 ) ferr
464 let split_header header =
465 for i = 0 to String.length header - 1 do
466 if header.[i] = '\r' then header.[i] <- '\n';
467 done;
468 for i = String.length header - 1 downto 1 do
469 if header.[i-1] = '\n' then
470 if header.[i] = ' ' then (header.[i] <- ','; header.[i-1] <- ',')
471 else
472 if header.[i] = ',' then header.[i-1] <- ',';
473 done;
474 String2.split_simplify header '\n'
476 let cut_headers headers =
478 List.map (fun s ->
479 let pos = String.index s ':' in
480 let len = String.length s in
481 let key = String.sub s 0 pos in
482 String.lowercase key, if pos+1 < len && s.[pos+1] = ' ' then
483 String.sub s (pos+2) (len-pos-2), key
484 else
485 String.sub s (pos+1) (len-pos-1), key
486 ) headers
487 with e ->
488 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e);
489 raise e