1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
29 open TcpBufferedSocket
41 type error
= [ `HTTP
of int | `RST
of BasicSocket.close_reason
| `DNS
]
43 let show_error = function
44 | `HTTP code
-> Printf.sprintf
"HTTP error code %d" code
45 | `RST reason
-> Printf.sprintf
"Connection closed : %s" (BasicSocket.string_of_reason reason
)
46 | `DNS
-> Printf.sprintf
"DNS resolution failed"
48 let verbose = ref false
51 req_headers
: ( string * string ) list
;
52 req_user_agent
: string;
54 req_proxy
: (string * int * (string * string) option) option; (* (host,port,(login,password)) *)
55 mutable req_url
: url
;
56 mutable req_gzip
: bool;
57 mutable req_save_to_file_time
: float;
58 req_request
: http_request
;
59 req_referer
: Url.url
option;
63 req_max_total_time
: float;
66 type content_handler
=
67 int64
-> (string * string) list
-> TcpBufferedSocket.t
-> int -> unit
69 let log_prefix = "[HTTPcl]"
72 lprintf_nl2
log_prefix fmt
75 req_url
= Url.of_string
"http://www.mldonkey.org/";
77 req_save_to_file_time
= 0.;
82 req_user_agent
= "Wget 1.4";
87 req_max_total_time
= infinite_timeout
;
90 let make_full_request r
=
91 let url = r
.req_url
in
92 let args = url.args in
93 let res = Buffer.create
80 in
94 let is_real_post = r
.req_request
= POST
&& args <> [] in
96 then Buffer.add_string
res "POST "
98 Buffer.add_string
res (if r
.req_request
= HEAD
then "HEAD " else "GET ");
99 Buffer.add_string
res (
101 if r
.req_proxy
<> None
102 then Url.to_string_no_args
url
105 (* I get a lot more bittorrent urls with this line: *)
106 let url = (Str.global_replace
(Str.regexp
" ") "%20" url) in
107 let url = if is_real_post then url else
108 Url.put_args
url args
111 Printf.bprintf
res " HTTP/1.0\r\nHost: %s%s\r\n" url.server
(if url.port
!= 80 then Printf.sprintf
":%d" url.port
else "");
112 List.iter
(fun (a
,b
) ->
113 Printf.bprintf
res "%s: %s\r\n" a b
115 Printf.bprintf
res "Accept-Encoding: gzip\r\n";
116 Printf.bprintf
res "User-Agent: %s\r\n" r
.req_user_agent
;
117 Printf.bprintf
res "Accept: %s\r\n" r
.req_accept
;
118 Printf.bprintf
res "Connection: close\r\n";
119 begin match r
.req_referer
with
121 | Some
url -> Printf.bprintf
res "Referer: %s\r\n" (Url.to_string_no_args
url)
123 begin match r
.req_proxy
with
124 | Some
(_
,_
,Some
(login
,password
)) ->
125 Printf.bprintf
res "Proxy-Authorization: Basic %s\n" (Base64.encode
(login ^
":" ^ password
))
128 if url.user
<> "" then begin
129 let userpass = Printf.sprintf
"%s:%s" url.user
url.passwd
in
130 Printf.bprintf
res "Authorization: Basic %s\r\n" (Base64.encode
userpass)
132 if is_real_post then begin
133 let post = Buffer.create
80 in
134 let rec make_post = function
137 Printf.bprintf
post "%s%c%s" (Url.encode a
) '
='
(Url.encode b
)
139 Printf.bprintf
post "%s%c%s%c"
140 (Url.encode a
) '
='
(Url.encode b
) '
&'
;
143 Printf.bprintf
res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
144 (Buffer.length
post) (Buffer.contents
post)
146 Buffer.add_string
res "\r\n";
147 let s = Buffer.contents
res in
149 lprintf_nl "make_full_request on URL: %s" (String.escaped
s);
153 let rec iter pos1
res =
155 let pos3 = String.index_from
s pos1 '
\n'
in
156 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '
\r'
then pos3 - 1 else pos3 in
157 let line = String.sub
s pos1
(pos2 - pos1
) in
158 if line = "" then List.rev
res else
159 iter (pos3+1) (line :: res)
161 let last_line = String.sub
s pos1
(String.length
s - pos1
) in
162 List.rev
(if last_line = "" then res else last_line :: res)
166 let parse_header headers_handler sock header
=
167 let headers = split_head header
in
169 [] -> failwith
"Ill formed reply"
171 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans
;
172 let ans_code = int_of_string
(String.sub ans
9 3) in
173 let headers = List.map
(fun s ->
174 let sep = String.index
s '
:'
in
175 (* TODO: we should lowercase the names here!
176 The header-names are case-insensitive,
177 therefore we only use lowercased names. *)
178 let name_head = String.sub
s 0 sep in
179 let size = String.length
s in
180 let content_head = String.sub
s (sep+2) (size-sep-2) in
181 (name_head, content_head)
184 headers_handler sock
ans_code headers;
186 TcpBufferedSocket.close sock
(Closed_for_error
"bad header")
188 let read_header header_handler sock nread
=
189 let b = TcpBufferedSocket.buf sock
in
190 let end_pos = b.pos
+ b.len
in
191 let new_pos = end_pos - nread
in
192 let new_pos = max
0 (new_pos - 1) in
194 lprintf "received [%s]" (String.escaped
195 (String.sub b.buf new_pos nread));
198 let end_pos = b.pos
+ b.len
in
200 if b.buf
.[i
] = '
\n'
&& i
<= end_pos - 2 then
201 let c = b.buf
.[i
+1] in
203 let len = i
+ 2 - b.pos
in
204 let header = String.sub
b.buf
b.pos
len in
206 header_handler sock
header
208 if c = '
\r'
&& i
<= end_pos - 3 && b.buf
.[i
+2] = '
\n'
then
209 let len = i
+ 3 - b.pos
in
210 let header = String.sub
b.buf
b.pos
len in
212 header_handler sock
header
222 let http_reply_handler nr headers_handler sock nread
=
223 (* lprintf "http_reply_handler\n"; *)
225 read_header (parse_header headers_handler
) sock nread
228 let def_ferr = (fun _
-> ())
230 let rec get_page r content_handler f ferr
=
231 let ok = ref false in
233 let err_done = ref false in (* call not more than once *)
234 fun c -> if not
!err_done then begin err_done := true; ferr c; end
236 let rec get_url level r
=
238 let url = r
.req_url
in
239 let level = r
.req_retry
in
240 let request = make_full_request r
in
242 match r
.req_proxy
with
243 | None
-> url.server, url.port
244 | Some
(s, p
, _
) -> s, p
246 (* lprintf "async_ip ...\n"; *)
247 Ip.async_ip
server (fun ip
->
248 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
249 let token = create_token unlimited_connection_manager
in
250 let sock = TcpBufferedSocket.connect
token "http client connecting"
251 (try Ip.to_inet_addr ip
with e
-> raise Not_found
) port
253 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
254 match e
with (* FIXME content-length check *)
255 | BASIC_EVENT
(CLOSED
(Closed_by_user
| Closed_by_peer
)) when !ok -> f
()
256 | BASIC_EVENT
(CLOSED reason
) -> ferr (`RST reason
)
257 | BASIC_EVENT LTIMEOUT
-> close
sock Closed_for_lifetime
261 let nread = ref false in
263 lprintf_nl "get_page: %s" (String.escaped
request);
264 TcpBufferedSocket.write_string
sock request;
265 TcpBufferedSocket.set_reader
sock (http_reply_handler nread
266 (default_headers_handler
url level));
267 set_rtimeout
sock 5.;
268 set_lifetime
sock r
.req_max_total_time
;
270 (fun () -> ferr `DNS
);
272 lprintf_nl "error in get_url";
275 and default_headers_handler old_url
level sock ans_code headers =
276 let print_headers () =
277 List.iter (fun (name
, value) ->
278 lprintf_nl "[%s]=[%s]" name
value;
281 if !verbose then print_headers ();
285 let content_length = ref (-1L) in
286 List.iter (fun (name
, content
) ->
287 match String.lowercase name
with
288 | "content-length" ->
290 content_length := Int64.of_string content
292 lprintf_nl "bad content length [%s]" content
)
293 | "content-encoding" ->
294 if String.lowercase content
= "gzip" then r
.req_gzip
<- true
297 let location = "Location", Url.to_string old_url
in
298 let content_handler = content_handler !content_length (location::headers) in
299 set_reader
sock content_handler;
300 let buf = TcpBufferedSocket.buf sock in
302 content_handler sock buf.len
305 if !verbose then lprintf_nl "%d: Redirect" ans_code;
306 let retrynum = r
.req_retry
in
307 if retrynum < r
.req_max_retry
then begin
310 List.iter (fun (name
, content
) ->
311 if String.lowercase name
= "location" then
314 if !verbose then print_headers ();
316 if String2.check_prefix
!url "." then url := String2.after
!url 1;
317 if String.length
!url > 0 && !url.[0] <> '
/'
319 else Printf.sprintf
"http://%s%s%s"
321 (if old_url
.Url.port
= 80 then "" else Printf.sprintf
":%d" old_url
.Url.port
)
325 if !verbose then lprintf_nl "Redirected to %s" url;
326 r
.req_url
<- (Url.of_string
url);
328 req_url
= Url.of_string
url;
329 req_retry
= retrynum+1
331 get_page r content_handler f
ferr
334 lprintf_nl "error understanding redirect response %d" ans_code;
340 lprintf_nl "more than %d redirections, aborting." r.req_max_retry
;
344 | 400 when r.req_request
= HEAD
->
345 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args
r.req_url
);
350 get_page r2 content_handler f
ferr
353 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args
r.req_url
);
354 close
sock (Closed_for_error
"bad reply");
355 ferr (`HTTP
ans_code);
359 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
360 let retrynum = r.req_retry
in
361 if retrynum < r.req_max_retry
then begin
362 if !verbose then print_headers ();
363 let seconds = (retrynum+1)*10 in
364 lprintf_nl "retry %d/%d in %d seconds for %s"
365 (retrynum+1) r.req_max_retry
seconds (Url.to_string_no_args
r.req_url
);
367 req_retry
= retrynum+1
369 add_timer
(float(seconds)) (fun t
-> get_page r content_handler f
ferr)
372 lprintf_nl "more than %d retries, aborting." r.req_max_retry
;
373 ferr (`HTTP
ans_code);
378 lprintf_nl "%d: bad reply for: %s"
379 ans_code (Url.to_string_no_args
r.req_url
);
380 close
sock (Closed_for_error
"bad reply");
381 ferr (`HTTP
ans_code);
386 (** Copy all data from [input] to [output] *)
387 let io_copy input output
=
389 let size = 16 * 1024 in
390 let s = String.create
size in
392 let n = IO.input input
s 0 size in
393 if n = 0 then raise
IO.No_more_input
;
394 ignore
(IO.really_output output
s 0 n)
396 with IO.No_more_input
-> ()
400 let file_buf = Buffer.create
1000 in
401 let file_size = ref 0L in
404 get_page r (fun maxlen
headers sock nread ->
405 (* lprintf "received %d\n" nread; *)
406 let buf = TcpBufferedSocket.buf sock in
408 if nread > 0 then begin
411 min
(Int64.to_int
(maxlen
-- !file_size)) nread
414 Buffer.add_string
file_buf (String.sub
buf.buf buf.pos
left);
416 file_size := !file_size ++ (Int64.of_int
left);
418 TcpBufferedSocket.close
sock Closed_by_user
422 let s = Buffer.contents
file_buf in
424 lprintf_nl "Empty content for url %s"
425 (Url.to_string
r.req_url
);
428 let webinfos_dir = "web_infos" in
429 Unix2.safe_mkdir
webinfos_dir;
430 Unix2.can_write_to_directory
webinfos_dir;
432 let base = Filename.basename
r.req_url
.Url.short_file
in
433 (* Base could be "." for http://site.com/ *)
434 let base = if base = "."
436 let prng = Random.State.make_self_init
() in
437 let rnd = (Random.State.bits
prng) land 0xFFFFFF in
438 Printf.sprintf
"http_%06x.tmp" rnd
442 let filename = Filename.concat
webinfos_dir base in
443 if !verbose then lprintf_nl "Filename: %s" filename;
447 Unix2.tryopen_write_bin
filename begin fun oc
->
448 let gz = Gzip.input_io
(IO.input_string
s) in
449 io_copy gz (IO.output_channel oc
)
452 lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e
) (Url.to_string
r.req_url
);
457 Unix2.tryopen_write_bin
filename (fun oc
-> output_string oc
s);
458 if r.req_save_to_file_time
<> 0. then
459 Unix.utimes
filename r.req_save_to_file_time
r.req_save_to_file_time
;
462 if not
r.req_save
then Sys.remove
filename
464 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e
) filename;
469 lprintf_nl "Exception %s in wget" (Printexc2.to_string e
);
472 let whead2 r f
ferr =
474 (fun maxlen
headers ->
475 (try f
headers with _
-> ());
477 close
sock Closed_by_user
482 let whead r f
= whead2 r f
def_ferr
484 let wget_string r f ?
(ferr=def_ferr) progress
=
486 let file_buf = Buffer.create
1000 in
487 let file_size = ref 0L in
490 (fun maxlen
headers sock nread ->
491 let buf = TcpBufferedSocket.buf sock in
493 if nread > 0 then begin
496 min
(Int64.to_int
(maxlen
-- !file_size)) nread
499 Buffer.add_string
file_buf (String.sub
buf.buf buf.pos
left);
500 progress
left maxlen
;
502 file_size := !file_size ++ (Int64.of_int
left);
504 TcpBufferedSocket.close
sock Closed_by_user
510 let io = Gzip.input_io
(IO.input_string
(Buffer.contents
file_buf)) in
513 lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e
) (Url.to_string
r.req_url
);
516 Buffer.contents
file_buf
522 let split_header header =
523 for i
= 0 to String.length
header - 1 do
524 if header.[i
] = '
\r'
then header.[i
] <- '
\n'
;
526 for i
= String.length
header - 1 downto 1 do
527 if header.[i
-1] = '
\n'
then
528 if header.[i
] = ' '
then (header.[i
] <- '
,'
; header.[i
-1] <- '
,'
)
530 if header.[i
] = '
,'
then header.[i
-1] <- '
,'
;
532 String2.split_simplify
header '
\n'
534 let cut_headers headers =
537 let pos = String.index
s '
:'
in
538 let len = String.length
s in
539 let key = String.sub
s 0 pos in
540 String.lowercase
key, if pos+1 < len && s.[pos+1] = ' '
then
541 String.sub
s (pos+2) (len-pos-2), key
543 String.sub
s (pos+1) (len-pos-1), key
546 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e
);