1 (* Copyright 2002 b8_bavard, b8_fee_carabine, INRIA *)
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 GET, POST, HEAD, PUT, DELETE, TRACE, OPTIONS, CONNECT
29 open TcpBufferedSocket
41 let verbose = ref false
44 req_headers
: ( string * string ) list
;
45 req_user_agent
: string;
47 req_proxy
: (string * int) option;
48 mutable req_url
: url
;
49 mutable req_save_to_file_time
: float;
50 req_request
: http_request
;
51 req_referer
: Url.url
option;
55 req_max_total_time
: float;
58 type content_handler
=
59 int64
-> (string * string) list
-> TcpBufferedSocket.t
-> int -> unit
61 let log_prefix = "[HTTPcl]"
64 lprintf_nl2
log_prefix fmt
67 req_url
= Url.of_string
"http://www.mldonkey.org/";
69 req_save_to_file_time
= 0.;
73 req_user_agent
= "Wget 1.4";
78 req_max_total_time
= infinite_timeout
;
81 let make_full_request r
=
82 let url = r
.req_url
in
83 let args = url.args in
84 let res = Buffer.create
80 in
85 let is_real_post = r
.req_request
= POST
&& args <> [] in
87 then Buffer.add_string
res "POST "
89 Buffer.add_string
res (if r
.req_request
= HEAD
then "HEAD " else "GET ");
90 Buffer.add_string
res (
92 if r
.req_proxy
<> None
93 then Url.to_string_no_args
url
96 (* I get a lot more bittorrent urls with this line: *)
97 let url = (Str.global_replace
(Str.regexp
" ") "%20" url) in
98 let url = if is_real_post then url else
102 Printf.bprintf
res " HTTP/1.0\r\nHost: %s%s\r\n" url.server
(if url.port
!= 80 then Printf.sprintf
":%d" url.port
else "");
103 List.iter
(fun (a
,b
) ->
104 Printf.bprintf
res "%s: %s\r\n" a b
106 Printf.bprintf
res "User-Agent: %s\r\n" r
.req_user_agent
;
107 Printf.bprintf
res "Accept: %s\r\n" r
.req_accept
;
108 Printf.bprintf
res "Connection: close\r\n";
109 (match r
.req_referer
with None
-> ()
111 Printf.bprintf
res "Referer: %s\r\n" (Url.to_string_no_args
url));
112 if url.user
<> "" then begin
113 let userpass = Printf.sprintf
"%s:%s" url.user
url.passwd
in
114 Printf.bprintf
res "Authorization: Basic %s\r\n" (Base64.encode
userpass)
116 if is_real_post then begin
117 let post = Buffer.create
80 in
118 let rec make_post = function
121 Printf.bprintf
post "%s%c%s" (Url.encode a
) '
='
(Url.encode b
)
123 Printf.bprintf
post "%s%c%s%c"
124 (Url.encode a
) '
='
(Url.encode b
) '
&'
;
127 Printf.bprintf
res "Content-Type: application/x-www-form-urlencoded\r\nContent-Length: %d\r\n\r\n%s"
128 (Buffer.length
post) (Buffer.contents
post)
130 Buffer.add_string
res "\r\n";
131 let s = Buffer.contents
res in
133 lprintf_nl "make_full_request on URL: %s" (String.escaped
s);
137 let rec iter pos1
res =
139 let pos3 = String.index_from
s pos1 '
\n'
in
140 let pos2 = if pos3 > 0 && s.[pos3 - 1] = '
\r'
then pos3 - 1 else pos3 in
141 let line = String.sub
s pos1
(pos2 - pos1
) in
142 if line = "" then List.rev
res else
143 iter (pos3+1) (line :: res)
145 let last_line = String.sub
s pos1
(String.length
s - pos1
) in
146 List.rev
(if last_line = "" then res else last_line :: res)
150 let parse_header headers_handler sock header
=
151 let headers = split_head header
in
153 [] -> failwith
"Ill formed reply"
155 if !verbose then lprintf_nl "parse_header: ANSWER %s" ans
;
156 let ans_code = int_of_string
(String.sub ans
9 3) in
157 let headers = List.map
(fun s ->
158 let sep = String.index
s '
:'
in
159 (* TODO: we should lowercase the names here!
160 The header-names are case-insensitive,
161 therefore we only use lowercased names. *)
162 let name_head = String.sub
s 0 sep in
163 let size = String.length
s in
164 let content_head = String.sub
s (sep+2) (size-sep-2) in
165 (name_head, content_head)
168 headers_handler sock
ans_code headers;
170 TcpBufferedSocket.close sock
(Closed_for_error
"bad header")
172 let read_header header_handler sock nread
=
173 let b = TcpBufferedSocket.buf sock
in
174 let end_pos = b.pos
+ b.len
in
175 let new_pos = end_pos - nread
in
176 let new_pos = max
0 (new_pos - 1) in
178 lprintf "received [%s]" (String.escaped
179 (String.sub b.buf new_pos nread));
182 let end_pos = b.pos
+ b.len
in
184 if b.buf
.[i
] = '
\n'
&& i
<= end_pos - 2 then
185 let c = b.buf
.[i
+1] in
187 let len = i
+ 2 - b.pos
in
188 let header = String.sub
b.buf
b.pos
len in
190 header_handler sock
header
192 if c = '
\r'
&& i
<= end_pos - 3 && b.buf
.[i
+2] = '
\n'
then
193 let len = i
+ 3 - b.pos
in
194 let header = String.sub
b.buf
b.pos
len in
196 header_handler sock
header
206 let http_reply_handler nr headers_handler sock nread
=
207 (* lprintf "http_reply_handler\n"; *)
209 read_header (parse_header headers_handler
) sock nread
212 let def_ferr = (fun c -> ())
214 let rec get_page r content_handler f ferr
=
215 let ok = ref false in
217 let err_done = ref false in (* call not more than once *)
218 fun n
-> if not
!err_done then begin err_done := true; ferr n
; end
220 let rec get_url level r
=
222 let url = r
.req_url
in
223 let level = r
.req_retry
in
224 let request = make_full_request r
in
226 match r
.req_proxy
with
227 | None
-> url.server, url.port
228 | Some
(s, p
) -> s, p
230 (* lprintf "async_ip ...\n"; *)
231 Ip.async_ip
server (fun ip
->
232 (* lprintf "IP done %s:%d\n" (Ip.to_string ip) port;*)
233 let token = create_token unlimited_connection_manager
in
234 let sock = TcpBufferedSocket.connect
token "http client connecting"
235 (try Ip.to_inet_addr ip
with e
-> raise Not_found
) port
237 (* if !verbose then lprintf_nl "Event %s" (string_of_event e); *)
238 match e
with (* FIXME content-length check *)
239 | BASIC_EVENT
(CLOSED
(Closed_by_user
| Closed_by_peer _
)) when !ok -> f
()
240 | BASIC_EVENT
(CLOSED _
) -> ferr 0
241 | BASIC_EVENT LTIMEOUT
-> close
sock Closed_for_lifetime
245 let nread = ref false in
247 lprintf_nl "get_page: %s" (String.escaped
request);
248 TcpBufferedSocket.write_string
sock request;
249 TcpBufferedSocket.set_reader
sock (http_reply_handler nread
250 (default_headers_handler
url level));
251 set_rtimeout
sock 5.;
252 set_lifetime
sock r
.req_max_total_time
;
256 lprintf_nl "error in get_url";
259 and default_headers_handler old_url
level sock ans_code headers =
260 let print_headers () =
261 List.iter (fun (name
, value) ->
262 lprintf_nl "[%s]=[%s]" name
value;
265 if !verbose then print_headers ();
269 let content_length = ref (-1L) in
270 List.iter (fun (name
, content
) ->
271 if String.lowercase name
= "content-length" then
272 try content_length := Int64.of_string content
273 with _
-> lprintf_nl "bad content length [%s]" content
;
275 let location = "Location", Url.to_string old_url
in
276 let content_handler = content_handler !content_length (location::headers) in
277 set_reader
sock content_handler;
278 let buf = TcpBufferedSocket.buf sock in
280 content_handler sock buf.len
283 if !verbose then lprintf_nl "%d: Redirect" ans_code;
284 let retrynum = r
.req_retry
in
285 if retrynum < r
.req_max_retry
then begin
288 List.iter (fun (name
, content
) ->
289 if String.lowercase name
= "location" then
292 if !verbose then print_headers ();
294 if String2.check_prefix
!url "." then url := String2.after
!url 1;
295 if String.length
!url > 0 && !url.[0] <> '
/'
297 else Printf.sprintf
"http://%s%s%s"
299 (if old_url
.Url.port
= 80 then "" else Printf.sprintf
":%d" old_url
.Url.port
)
303 if !verbose then lprintf_nl "Redirected to %s" url;
304 r
.req_url
<- (Url.of_string
url);
306 req_url
= Url.of_string
url;
307 req_retry
= retrynum+1
309 get_page r content_handler f
ferr
312 lprintf_nl "error understanding redirect response %d" ans_code;
318 lprintf_nl "more than %d redirections, aborting." r.req_max_retry
;
322 | 400 when r.req_request
= HEAD
->
323 lprintf_nl "Error 400 received for HEAD %s, re-try GET" (Url.to_string_no_args
r.req_url
);
328 get_page r2 content_handler f
ferr
331 lprintf_nl "404: Not found for: %s" (Url.to_string_no_args
r.req_url
);
332 close
sock (Closed_for_error
"bad reply");
337 if !verbose then lprintf_nl "%d: Unavailable" ans_code;
338 let retrynum = r.req_retry
in
339 if retrynum < r.req_max_retry
then begin
340 if !verbose then print_headers ();
341 let seconds = (retrynum+1)*10 in
342 lprintf_nl "retry %d/%d in %d seconds for %s"
343 (retrynum+1) r.req_max_retry
seconds (Url.to_string_no_args
r.req_url
);
345 req_retry
= retrynum+1
347 add_timer
(float(seconds)) (fun t
-> get_page r content_handler f
ferr)
350 lprintf_nl "more than %d retries, aborting." r.req_max_retry
;
356 lprintf_nl "%d: bad reply for: %s"
357 ans_code (Url.to_string_no_args
r.req_url
);
358 close
sock (Closed_for_error
"bad reply");
366 let file_buf = Buffer.create
1000 in
367 let file_size = ref 0L in
370 get_page r (fun maxlen
headers sock nread ->
371 (* lprintf "received %d\n" nread; *)
372 let buf = TcpBufferedSocket.buf sock in
374 if nread > 0 then begin
377 min
(Int64.to_int
(maxlen
-- !file_size)) nread
380 Buffer.add_string
file_buf (String.sub
buf.buf buf.pos
left);
382 file_size := !file_size ++ (Int64.of_int
left);
384 TcpBufferedSocket.close
sock Closed_by_user
388 let s = Buffer.contents
file_buf in
390 lprintf_nl "Empty content for url %s"
391 (Url.to_string
r.req_url
);
394 let webinfos_dir = "web_infos" in
395 Unix2.safe_mkdir
webinfos_dir;
396 Unix2.can_write_to_directory
webinfos_dir;
398 let base = Filename.basename
r.req_url
.Url.short_file
in
399 (* Base could be "." for http://site.com/ *)
400 let base = if base = "."
402 let prng = Random.State.make_self_init
() in
403 let rnd = (Random.State.bits
prng) land 0xFFFFFF in
404 Printf.sprintf
"http_%06x.tmp" rnd
408 let filename = Filename.concat
webinfos_dir base in
409 if !verbose then lprintf_nl "Filename: %s" filename;
410 Unix2.tryopen_write_bin
filename (fun oc
-> output_string oc
s);
411 if r.req_save_to_file_time
<> 0. then
412 Unix.utimes
filename r.req_save_to_file_time
r.req_save_to_file_time
;
415 if not
r.req_save
then Sys.remove
filename
417 lprintf_nl "Exception %s in loading downloaded file %s" (Printexc2.to_string e
) filename;
422 lprintf_nl "Exception %s in wget" (Printexc2.to_string e
);
425 let whead2 r f
ferr =
427 (fun maxlen
headers ->
428 (try f
headers with _
-> ());
430 close
sock Closed_by_user
435 let whead r f
= whead2 r f
def_ferr
437 let wget_string r f ?
(ferr=def_ferr) progress
=
439 let file_buf = Buffer.create
1000 in
440 let file_size = ref 0L in
443 (fun maxlen
headers sock nread ->
444 let buf = TcpBufferedSocket.buf sock in
446 if nread > 0 then begin
449 min
(Int64.to_int
(maxlen
-- !file_size)) nread
452 Buffer.add_string
file_buf (String.sub
buf.buf buf.pos
left);
453 progress
left maxlen
;
455 file_size := !file_size ++ (Int64.of_int
left);
457 TcpBufferedSocket.close
sock Closed_by_user
460 f
(Buffer.contents
file_buf)
464 let split_header header =
465 for i
= 0 to String.length
header - 1 do
466 if header.[i
] = '
\r'
then header.[i
] <- '
\n'
;
468 for i
= String.length
header - 1 downto 1 do
469 if header.[i
-1] = '
\n'
then
470 if header.[i
] = ' '
then (header.[i
] <- '
,'
; header.[i
-1] <- '
,'
)
472 if header.[i
] = '
,'
then header.[i
-1] <- '
,'
;
474 String2.split_simplify
header '
\n'
476 let cut_headers headers =
479 let pos = String.index
s '
:'
in
480 let len = String.length
s in
481 let key = String.sub
s 0 pos in
482 String.lowercase
key, if pos+1 < len && s.[pos+1] = ' '
then
483 String.sub
s (pos+2) (len-pos-2), key
485 String.sub
s (pos+1) (len-pos-1), key
488 lprintf_nl "Exception in cut_headers: %s" (Printexc2.to_string e
);