enable proxy authentication for http client too
[mldonkey.git] / src / utils / net / http_client.ml
blob7034858662b3995149d067011095e802b6c68290
1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 (* HTTP Requests:
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
25 open Printf2
26 open BasicSocket
27 open Unix
28 open Url
29 open TcpBufferedSocket
30 open Int64ops
33 type http_request =
34 GET
35 | POST
36 | HEAD
37 | PUT
38 | DELETE
39 | TRACE
41 let verbose = ref false
43 type request = {
44 req_headers : ( string * string ) list;
45 req_user_agent : string;
46 req_accept : string;
47 req_proxy : (string * int * (string * string) option) option; (* (host,port,(login,password)) *)
48 mutable req_url : url;
49 mutable req_save_to_file_time : float;
50 req_request : http_request;
51 req_referer : Url.url option;
52 req_retry : int;
53 req_max_retry : int;
54 req_save : bool;
55 req_max_total_time : float;
58 type content_handler =
59 int64 -> (string * string) list -> TcpBufferedSocket.t -> int -> unit
61 let log_prefix = "[HTTPcl]"
63 let lprintf_nl fmt =
64 lprintf_nl2 log_prefix fmt
66 let basic_request = {
67 req_url = Url.of_string "http://www.mldonkey.org/";
68 req_referer = None;
69 req_save_to_file_time = 0.;
70 req_request = GET;
71 req_proxy = None;
72 req_headers = [];
73 req_user_agent = "Wget 1.4";
74 req_accept = "*/*";
75 req_retry = 0;
76 req_max_retry = 0;
77 req_save = false;
78 req_max_total_time = infinite_timeout;
81 let make_full_request r =
82 let url = r.req_url in
83 let args = url.args in
84 let res = Buffer.create 80 in
85 let is_real_post = r.req_request = POST && args <> [] in
86 if is_real_post
87 then Buffer.add_string res "POST "
88 else
89 Buffer.add_string res (if r.req_request = HEAD then "HEAD " else "GET ");
90 Buffer.add_string res (
91 let url =
92 if r.req_proxy <> None
93 then Url.to_string_no_args url
94 else url.short_file
96 (* I get a lot more bittorrent urls with this line: *)
97 let url = (Str.global_replace (Str.regexp " ") "%20" url) in
98 let url = if is_real_post then url else
99 Url.put_args url args
101 url);
102 Printf.bprintf res " HTTP/1.0\r\nHost: %s%s\r\n" url.server (if url.port != 80 then Printf.sprintf ":%d" url.port else "");
103 List.iter (fun (a,b) ->
104 Printf.bprintf res "%s: %s\r\n" a b
105 ) r.req_headers;
106 Printf.bprintf res "User-Agent: %s\r\n" r.req_user_agent;
107 Printf.bprintf res "Accept: %s\r\n" r.req_accept;
108 Printf.bprintf res "Connection: close\r\n";
109 begin match r.req_referer with
110 | None -> ()
111 | Some url -> Printf.bprintf res "Referer: %s\r\n" (Url.to_string_no_args url)
112 end;
113 begin match r.req_proxy with
114 | Some (_,_,Some (login,password)) ->
115 Printf.bprintf res "Proxy-Authorization: Basic %s\n" (Base64.encode (login ^ ":" ^ password))
116 | _ -> ()
117 end;
118 if url.user <> "" then begin
119 let userpass = Printf.sprintf "%s:%s" url.user url.passwd in
120 Printf.bprintf res "Authorization: Basic %s\r\n" (Base64.encode userpass)
121 end;
122 if is_real_post then begin
123 let post = Buffer.create 80 in
124 let rec make_post = function
125 | [] -> assert false
126 | [a, b] ->
127 Printf.bprintf post "%s%c%s" (Url.encode a) '=' (Url.encode b)
128 | (a,b)::l ->
129 Printf.bprintf post "%s%c%s%c"
130 (Url.encode a) '=' (Url.encode b) '&';
131 make_post l in
132 make_post args;
133 Printf.bprintf res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
134 (Buffer.length post) (Buffer.contents post)
135 end else
136 Buffer.add_string res "\r\n";
137 let s = Buffer.contents res in
138 if !verbose then
139 lprintf_nl "make_full_request on URL: %s" (String.escaped s);
142 let split_head s =
143 let rec iter pos1 res =
145 let pos3 = String.index_from s pos1 '\n' in
146 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '\r' then pos3 - 1 else pos3 in
147 let line = String.sub s pos1 (pos2 - pos1) in
148 if line = "" then List.rev res else
149 iter (pos3+1) (line :: res)
150 with _ ->
151 let last_line = String.sub s pos1 (String.length s - pos1) in
152 List.rev (if last_line = "" then res else last_line :: res)
154 iter 0 []
156 let parse_header headers_handler sock header =
157 let headers = split_head header in
158 match headers with
159 [] -> failwith "Ill formed reply"
160 | ans :: headers ->
161 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans;
162 let ans_code = int_of_string (String.sub ans 9 3) in
163 let headers = List.map (fun s ->
164 let sep = String.index s ':' in
165 (* TODO: we should lowercase the names here!
166 The header-names are case-insensitive,
167 therefore we only use lowercased names. *)
168 let name_head = String.sub s 0 sep in
169 let size = String.length s in
170 let content_head = String.sub s (sep+2) (size-sep-2) in
171 (name_head, content_head)
172 ) headers in
174 headers_handler sock ans_code headers;
175 with _ ->
176 TcpBufferedSocket.close sock (Closed_for_error "bad header")
178 let read_header header_handler sock nread =
179 let b = TcpBufferedSocket.buf sock in
180 let end_pos = b.pos + b.len in
181 let new_pos = end_pos - nread in
182 let new_pos = max 0 (new_pos - 1) in
184 lprintf "received [%s]" (String.escaped
185 (String.sub b.buf new_pos nread));
187 let rec iter i =
188 let end_pos = b.pos + b.len in
189 if i < end_pos then
190 if b.buf.[i] = '\n' && i <= end_pos - 2 then
191 let c = b.buf.[i+1] in
192 if c = '\n' then
193 let len = i + 2 - b.pos in
194 let header = String.sub b.buf b.pos len in
195 buf_used b len;
196 header_handler sock header
197 else
198 if c = '\r' && i <= end_pos - 3 && b.buf.[i+2] = '\n' then
199 let len = i + 3 - b.pos in
200 let header = String.sub b.buf b.pos len in
201 buf_used b len;
202 header_handler sock header
203 else
204 iter (i+1)
205 else
206 iter (i+1)
207 else
210 iter new_pos
212 let http_reply_handler nr headers_handler sock nread =
213 (* lprintf "http_reply_handler\n"; *)
214 nr := true;
215 read_header (parse_header headers_handler) sock nread
218 let def_ferr = (fun c -> ())
220 let rec get_page r content_handler f ferr =
221 let ok = ref false in
222 let ferr =
223 let err_done = ref false in (* call not more than once *)
224 fun n -> if not !err_done then begin err_done := true; ferr n; end
226 let rec get_url level r =
228 let url = r.req_url in
229 let level = r.req_retry in
230 let request = make_full_request r in
231 let server, port =
232 match r.req_proxy with
233 | None -> url.server, url.port
234 | Some (s, p, _) -> s, p
236 (* lprintf "async_ip ...\n"; *)
237 Ip.async_ip server (fun ip ->
238 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
239 let token = create_token unlimited_connection_manager in
240 let sock = TcpBufferedSocket.connect token "http client connecting"
241 (try Ip.to_inet_addr ip with e -> raise Not_found) port
242 (fun sock e ->
243 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
244 match e with (* FIXME content-length check *)
245 | BASIC_EVENT (CLOSED (Closed_by_user | Closed_by_peer)) when !ok -> f ()
246 | BASIC_EVENT (CLOSED _) -> ferr 0
247 | BASIC_EVENT LTIMEOUT -> close sock Closed_for_lifetime
248 | _ -> ())
251 let nread = ref false in
252 if !verbose then
253 lprintf_nl "get_page: %s" (String.escaped request);
254 TcpBufferedSocket.write_string sock request;
255 TcpBufferedSocket.set_reader sock (http_reply_handler nread
256 (default_headers_handler url level));
257 set_rtimeout sock 5.;
258 set_lifetime sock r.req_max_total_time;
260 ferr;
261 with e ->
262 lprintf_nl "error in get_url";
263 raise Not_found
265 and default_headers_handler old_url level sock ans_code headers =
266 let print_headers () =
267 List.iter (fun (name, value) ->
268 lprintf_nl "[%s]=[%s]" name value;
269 ) headers;
271 if !verbose then print_headers ();
272 match ans_code with
273 | 200 ->
274 ok := true;
275 let content_length = ref (-1L) in
276 List.iter (fun (name, content) ->
277 if String.lowercase name = "content-length" then
278 try content_length := Int64.of_string content
279 with _ -> lprintf_nl "bad content length [%s]" content;
280 ) headers;
281 let location = "Location", Url.to_string old_url in
282 let content_handler = content_handler !content_length (location::headers) in
283 set_reader sock content_handler;
284 let buf = TcpBufferedSocket.buf sock in
285 if buf.len > 0 then
286 content_handler sock buf.len
288 | 301 | 302 | 304 ->
289 if !verbose then lprintf_nl "%d: Redirect" ans_code;
290 let retrynum = r.req_retry in
291 if retrynum < r.req_max_retry then begin
293 let url = ref "" in
294 List.iter (fun (name, content) ->
295 if String.lowercase name = "location" then
296 url := content;
297 ) headers;
298 if !verbose then print_headers ();
299 let url =
300 if String2.check_prefix !url "." then url := String2.after !url 1;
301 if String.length !url > 0 && !url.[0] <> '/'
302 then !url
303 else Printf.sprintf "http://%s%s%s"
304 old_url.Url.server
305 (if old_url.Url.port = 80 then "" else Printf.sprintf ":%d" old_url.Url.port)
306 !url
309 if !verbose then lprintf_nl "Redirected to %s" url;
310 r.req_url <- (Url.of_string url);
311 let r = { r with
312 req_url = Url.of_string url;
313 req_retry = retrynum+1
314 } in
315 get_page r content_handler f ferr
317 with e ->
318 lprintf_nl "error understanding redirect response %d" ans_code;
319 print_headers ();
320 raise Not_found
323 else begin
324 lprintf_nl "more than %d redirections, aborting." r.req_max_retry;
325 raise Not_found
328 | 400 when r.req_request = HEAD ->
329 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args r.req_url);
330 let r2 = {
331 r with
332 req_request = GET;
333 } in
334 get_page r2 content_handler f ferr
336 | 404 ->
337 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args r.req_url);
338 close sock (Closed_for_error "bad reply");
339 ferr ans_code;
340 raise Not_found
342 | 502 | 503 | 504 ->
343 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
344 let retrynum = r.req_retry in
345 if retrynum < r.req_max_retry then begin
346 if !verbose then print_headers ();
347 let seconds = (retrynum+1)*10 in
348 lprintf_nl "retry %d/%d in %d seconds for %s"
349 (retrynum+1) r.req_max_retry seconds (Url.to_string_no_args r.req_url);
350 let r = { r with
351 req_retry = retrynum+1
352 } in
353 add_timer (float(seconds)) (fun t -> get_page r content_handler f ferr)
355 else begin
356 lprintf_nl "more than %d retries, aborting." r.req_max_retry;
357 ferr ans_code;
358 raise Not_found
361 | _ ->
362 lprintf_nl "%d: bad reply for: %s"
363 ans_code (Url.to_string_no_args r.req_url);
364 close sock (Closed_for_error "bad reply");
365 ferr ans_code;
366 raise Not_found
368 get_url 0 r
370 let wget r f =
372 let file_buf = Buffer.create 1000 in
373 let file_size = ref 0L in
376 get_page r (fun maxlen headers sock nread ->
377 (* lprintf "received %d\n" nread; *)
378 let buf = TcpBufferedSocket.buf sock in
380 if nread > 0 then begin
381 let left =
382 if maxlen >= 0L then
383 min (Int64.to_int (maxlen -- !file_size)) nread
384 else nread
386 Buffer.add_string file_buf (String.sub buf.buf buf.pos left);
387 buf_used buf left;
388 file_size := !file_size ++ (Int64.of_int left);
389 if nread > left then
390 TcpBufferedSocket.close sock Closed_by_user
393 (fun _ ->
394 let s = Buffer.contents file_buf in
395 if s = "" then begin
396 lprintf_nl "Empty content for url %s"
397 (Url.to_string r.req_url);
398 end;
400 let webinfos_dir = "web_infos" in
401 Unix2.safe_mkdir webinfos_dir;
402 Unix2.can_write_to_directory webinfos_dir;
404 let base = Filename.basename r.req_url.Url.short_file in
405 (* Base could be "." for http://site.com/ *)
406 let base = if base = "."
407 then begin
408 let prng = Random.State.make_self_init () in
409 let rnd = (Random.State.bits prng) land 0xFFFFFF in
410 Printf.sprintf "http_%06x.tmp" rnd
411 end else base
414 let filename = Filename.concat webinfos_dir base in
415 if !verbose then lprintf_nl "Filename: %s" filename;
416 Unix2.tryopen_write_bin filename (fun oc -> output_string oc s);
417 if r.req_save_to_file_time <> 0. then
418 Unix.utimes filename r.req_save_to_file_time r.req_save_to_file_time;
420 (f filename : unit);
421 if not r.req_save then Sys.remove filename
422 with e ->
423 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e) filename;
424 Sys.remove filename;
425 raise Not_found
426 ) def_ferr
427 with e ->
428 lprintf_nl "Exception %s in wget" (Printexc2.to_string e);
429 raise Not_found
431 let whead2 r f ferr =
432 get_page r
433 (fun maxlen headers ->
434 (try f headers with _ -> ());
435 fun sock nread ->
436 close sock Closed_by_user
438 (fun _ -> ())
439 ferr
441 let whead r f = whead2 r f def_ferr
443 let wget_string r f ?(ferr=def_ferr) progress =
445 let file_buf = Buffer.create 1000 in
446 let file_size = ref 0L in
448 get_page r
449 (fun maxlen headers sock nread ->
450 let buf = TcpBufferedSocket.buf sock in
452 if nread > 0 then begin
453 let left =
454 if maxlen >= 0L then
455 min (Int64.to_int (maxlen -- !file_size)) nread
456 else nread
458 Buffer.add_string file_buf (String.sub buf.buf buf.pos left);
459 progress left maxlen;
460 buf_used buf left;
461 file_size := !file_size ++ (Int64.of_int left);
462 if nread > left then
463 TcpBufferedSocket.close sock Closed_by_user
464 end)
465 (fun _ ->
466 f (Buffer.contents file_buf)
467 ) ferr
470 let split_header header =
471 for i = 0 to String.length header - 1 do
472 if header.[i] = '\r' then header.[i] <- '\n';
473 done;
474 for i = String.length header - 1 downto 1 do
475 if header.[i-1] = '\n' then
476 if header.[i] = ' ' then (header.[i] <- ','; header.[i-1] <- ',')
477 else
478 if header.[i] = ',' then header.[i-1] <- ',';
479 done;
480 String2.split_simplify header '\n'
482 let cut_headers headers =
484 List.map (fun s ->
485 let pos = String.index s ':' in
486 let len = String.length s in
487 let key = String.sub s 0 pos in
488 String.lowercase key, if pos+1 < len && s.[pos+1] = ' ' then
489 String.sub s (pos+2) (len-pos-2), key
490 else
491 String.sub s (pos+1) (len-pos-1), key
492 ) headers
493 with e ->
494 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e);
495 raise e