1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
29 open TcpBufferedSocket
41 let verbose = ref false
44 req_headers
: ( string * string ) list
;
45 req_user_agent
: string;
47 req_proxy
: (string * int * (string * string) option) option; (* (host,port,(login,password)) *)
48 mutable req_url
: url
;
49 mutable req_save_to_file_time
: float;
50 req_request
: http_request
;
51 req_referer
: Url.url
option;
55 req_max_total_time
: float;
58 type content_handler
=
59 int64
-> (string * string) list
-> TcpBufferedSocket.t
-> int -> unit
61 let log_prefix = "[HTTPcl]"
64 lprintf_nl2
log_prefix fmt
67 req_url
= Url.of_string
"http://www.mldonkey.org/";
69 req_save_to_file_time
= 0.;
73 req_user_agent
= "Wget 1.4";
78 req_max_total_time
= infinite_timeout
;
81 let make_full_request r
=
82 let url = r
.req_url
in
83 let args = url.args in
84 let res = Buffer.create
80 in
85 let is_real_post = r
.req_request
= POST
&& args <> [] in
87 then Buffer.add_string
res "POST "
89 Buffer.add_string
res (if r
.req_request
= HEAD
then "HEAD " else "GET ");
90 Buffer.add_string
res (
92 if r
.req_proxy
<> None
93 then Url.to_string_no_args
url
96 (* I get a lot more bittorrent urls with this line: *)
97 let url = (Str.global_replace
(Str.regexp
" ") "%20" url) in
98 let url = if is_real_post then url else
102 Printf.bprintf
res " HTTP/1.0\r\nHost: %s%s\r\n" url.server
(if url.port
!= 80 then Printf.sprintf
":%d" url.port
else "");
103 List.iter
(fun (a
,b
) ->
104 Printf.bprintf
res "%s: %s\r\n" a b
106 Printf.bprintf
res "User-Agent: %s\r\n" r
.req_user_agent
;
107 Printf.bprintf
res "Accept: %s\r\n" r
.req_accept
;
108 Printf.bprintf
res "Connection: close\r\n";
109 begin match r
.req_referer
with
111 | Some
url -> Printf.bprintf
res "Referer: %s\r\n" (Url.to_string_no_args
url)
113 begin match r
.req_proxy
with
114 | Some
(_
,_
,Some
(login
,password
)) ->
115 Printf.bprintf
res "Proxy-Authorization: Basic %s\n" (Base64.encode
(login ^
":" ^ password
))
118 if url.user
<> "" then begin
119 let userpass = Printf.sprintf
"%s:%s" url.user
url.passwd
in
120 Printf.bprintf
res "Authorization: Basic %s\r\n" (Base64.encode
userpass)
122 if is_real_post then begin
123 let post = Buffer.create
80 in
124 let rec make_post = function
127 Printf.bprintf
post "%s%c%s" (Url.encode a
) '
='
(Url.encode b
)
129 Printf.bprintf
post "%s%c%s%c"
130 (Url.encode a
) '
='
(Url.encode b
) '
&'
;
133 Printf.bprintf
res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
134 (Buffer.length
post) (Buffer.contents
post)
136 Buffer.add_string
res "\r\n";
137 let s = Buffer.contents
res in
139 lprintf_nl "make_full_request on URL: %s" (String.escaped
s);
143 let rec iter pos1
res =
145 let pos3 = String.index_from
s pos1 '
\n'
in
146 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '
\r'
then pos3 - 1 else pos3 in
147 let line = String.sub
s pos1
(pos2 - pos1
) in
148 if line = "" then List.rev
res else
149 iter (pos3+1) (line :: res)
151 let last_line = String.sub
s pos1
(String.length
s - pos1
) in
152 List.rev
(if last_line = "" then res else last_line :: res)
156 let parse_header headers_handler sock header
=
157 let headers = split_head header
in
159 [] -> failwith
"Ill formed reply"
161 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans
;
162 let ans_code = int_of_string
(String.sub ans
9 3) in
163 let headers = List.map
(fun s ->
164 let sep = String.index
s '
:'
in
165 (* TODO: we should lowercase the names here!
166 The header-names are case-insensitive,
167 therefore we only use lowercased names. *)
168 let name_head = String.sub
s 0 sep in
169 let size = String.length
s in
170 let content_head = String.sub
s (sep+2) (size-sep-2) in
171 (name_head, content_head)
174 headers_handler sock
ans_code headers;
176 TcpBufferedSocket.close sock
(Closed_for_error
"bad header")
178 let read_header header_handler sock nread
=
179 let b = TcpBufferedSocket.buf sock
in
180 let end_pos = b.pos
+ b.len
in
181 let new_pos = end_pos - nread
in
182 let new_pos = max
0 (new_pos - 1) in
184 lprintf "received [%s]" (String.escaped
185 (String.sub b.buf new_pos nread));
188 let end_pos = b.pos
+ b.len
in
190 if b.buf
.[i
] = '
\n'
&& i
<= end_pos - 2 then
191 let c = b.buf
.[i
+1] in
193 let len = i
+ 2 - b.pos
in
194 let header = String.sub
b.buf
b.pos
len in
196 header_handler sock
header
198 if c = '
\r'
&& i
<= end_pos - 3 && b.buf
.[i
+2] = '
\n'
then
199 let len = i
+ 3 - b.pos
in
200 let header = String.sub
b.buf
b.pos
len in
202 header_handler sock
header
212 let http_reply_handler nr headers_handler sock nread
=
213 (* lprintf "http_reply_handler\n"; *)
215 read_header (parse_header headers_handler
) sock nread
218 let def_ferr = (fun c -> ())
220 let rec get_page r content_handler f ferr
=
221 let ok = ref false in
223 let err_done = ref false in (* call not more than once *)
224 fun n
-> if not
!err_done then begin err_done := true; ferr n
; end
226 let rec get_url level r
=
228 let url = r
.req_url
in
229 let level = r
.req_retry
in
230 let request = make_full_request r
in
232 match r
.req_proxy
with
233 | None
-> url.server, url.port
234 | Some
(s, p
, _
) -> s, p
236 (* lprintf "async_ip ...\n"; *)
237 Ip.async_ip
server (fun ip
->
238 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
239 let token = create_token unlimited_connection_manager
in
240 let sock = TcpBufferedSocket.connect
token "http client connecting"
241 (try Ip.to_inet_addr ip
with e
-> raise Not_found
) port
243 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
244 match e
with (* FIXME content-length check *)
245 | BASIC_EVENT
(CLOSED
(Closed_by_user
| Closed_by_peer
)) when !ok -> f
()
246 | BASIC_EVENT
(CLOSED _
) -> ferr 0
247 | BASIC_EVENT LTIMEOUT
-> close
sock Closed_for_lifetime
251 let nread = ref false in
253 lprintf_nl "get_page: %s" (String.escaped
request);
254 TcpBufferedSocket.write_string
sock request;
255 TcpBufferedSocket.set_reader
sock (http_reply_handler nread
256 (default_headers_handler
url level));
257 set_rtimeout
sock 5.;
258 set_lifetime
sock r
.req_max_total_time
;
262 lprintf_nl "error in get_url";
265 and default_headers_handler old_url
level sock ans_code headers =
266 let print_headers () =
267 List.iter (fun (name
, value) ->
268 lprintf_nl "[%s]=[%s]" name
value;
271 if !verbose then print_headers ();
275 let content_length = ref (-1L) in
276 List.iter (fun (name
, content
) ->
277 if String.lowercase name
= "content-length" then
278 try content_length := Int64.of_string content
279 with _
-> lprintf_nl "bad content length [%s]" content
;
281 let location = "Location", Url.to_string old_url
in
282 let content_handler = content_handler !content_length (location::headers) in
283 set_reader
sock content_handler;
284 let buf = TcpBufferedSocket.buf sock in
286 content_handler sock buf.len
289 if !verbose then lprintf_nl "%d: Redirect" ans_code;
290 let retrynum = r
.req_retry
in
291 if retrynum < r
.req_max_retry
then begin
294 List.iter (fun (name
, content
) ->
295 if String.lowercase name
= "location" then
298 if !verbose then print_headers ();
300 if String2.check_prefix
!url "." then url := String2.after
!url 1;
301 if String.length
!url > 0 && !url.[0] <> '
/'
303 else Printf.sprintf
"http://%s%s%s"
305 (if old_url
.Url.port
= 80 then "" else Printf.sprintf
":%d" old_url
.Url.port
)
309 if !verbose then lprintf_nl "Redirected to %s" url;
310 r
.req_url
<- (Url.of_string
url);
312 req_url
= Url.of_string
url;
313 req_retry
= retrynum+1
315 get_page r content_handler f
ferr
318 lprintf_nl "error understanding redirect response %d" ans_code;
324 lprintf_nl "more than %d redirections, aborting." r.req_max_retry
;
328 | 400 when r.req_request
= HEAD
->
329 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args
r.req_url
);
334 get_page r2 content_handler f
ferr
337 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args
r.req_url
);
338 close
sock (Closed_for_error
"bad reply");
343 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
344 let retrynum = r.req_retry
in
345 if retrynum < r.req_max_retry
then begin
346 if !verbose then print_headers ();
347 let seconds = (retrynum+1)*10 in
348 lprintf_nl "retry %d/%d in %d seconds for %s"
349 (retrynum+1) r.req_max_retry
seconds (Url.to_string_no_args
r.req_url
);
351 req_retry
= retrynum+1
353 add_timer
(float(seconds)) (fun t
-> get_page r content_handler f
ferr)
356 lprintf_nl "more than %d retries, aborting." r.req_max_retry
;
362 lprintf_nl "%d: bad reply for: %s"
363 ans_code (Url.to_string_no_args
r.req_url
);
364 close
sock (Closed_for_error
"bad reply");
372 let file_buf = Buffer.create
1000 in
373 let file_size = ref 0L in
376 get_page r (fun maxlen
headers sock nread ->
377 (* lprintf "received %d\n" nread; *)
378 let buf = TcpBufferedSocket.buf sock in
380 if nread > 0 then begin
383 min
(Int64.to_int
(maxlen
-- !file_size)) nread
386 Buffer.add_string
file_buf (String.sub
buf.buf buf.pos
left);
388 file_size := !file_size ++ (Int64.of_int
left);
390 TcpBufferedSocket.close
sock Closed_by_user
394 let s = Buffer.contents
file_buf in
396 lprintf_nl "Empty content for url %s"
397 (Url.to_string
r.req_url
);
400 let webinfos_dir = "web_infos" in
401 Unix2.safe_mkdir
webinfos_dir;
402 Unix2.can_write_to_directory
webinfos_dir;
404 let base = Filename.basename
r.req_url
.Url.short_file
in
405 (* Base could be "." for http://site.com/ *)
406 let base = if base = "."
408 let prng = Random.State.make_self_init
() in
409 let rnd = (Random.State.bits
prng) land 0xFFFFFF in
410 Printf.sprintf
"http_%06x.tmp" rnd
414 let filename = Filename.concat
webinfos_dir base in
415 if !verbose then lprintf_nl "Filename: %s" filename;
416 Unix2.tryopen_write_bin
filename (fun oc
-> output_string oc
s);
417 if r.req_save_to_file_time
<> 0. then
418 Unix.utimes
filename r.req_save_to_file_time
r.req_save_to_file_time
;
421 if not
r.req_save
then Sys.remove
filename
423 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e
) filename;
428 lprintf_nl "Exception %s in wget" (Printexc2.to_string e
);
431 let whead2 r f
ferr =
433 (fun maxlen
headers ->
434 (try f
headers with _
-> ());
436 close
sock Closed_by_user
441 let whead r f
= whead2 r f
def_ferr
443 let wget_string r f ?
(ferr=def_ferr) progress
=
445 let file_buf = Buffer.create
1000 in
446 let file_size = ref 0L in
449 (fun maxlen
headers sock nread ->
450 let buf = TcpBufferedSocket.buf sock in
452 if nread > 0 then begin
455 min
(Int64.to_int
(maxlen
-- !file_size)) nread
458 Buffer.add_string
file_buf (String.sub
buf.buf buf.pos
left);
459 progress
left maxlen
;
461 file_size := !file_size ++ (Int64.of_int
left);
463 TcpBufferedSocket.close
sock Closed_by_user
466 f
(Buffer.contents
file_buf)
470 let split_header header =
471 for i
= 0 to String.length
header - 1 do
472 if header.[i
] = '
\r'
then header.[i
] <- '
\n'
;
474 for i
= String.length
header - 1 downto 1 do
475 if header.[i
-1] = '
\n'
then
476 if header.[i
] = ' '
then (header.[i
] <- '
,'
; header.[i
-1] <- '
,'
)
478 if header.[i
] = '
,'
then header.[i
-1] <- '
,'
;
480 String2.split_simplify
header '
\n'
482 let cut_headers headers =
485 let pos = String.index
s '
:'
in
486 let len = String.length
s in
487 let key = String.sub
s 0 pos in
488 String.lowercase
key, if pos+1 < len && s.[pos+1] = ' '
then
489 String.sub
s (pos+2) (len-pos-2), key
491 String.sub
s (pos+1) (len-pos-1), key
494 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e
);