1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 open TcpBufferedSocket
34 let _s x
= _s "CommonWeb" x
35 let _b x
= _b "CommonWeb" x
37 let log_prefix = "[cWeb]"
40 lprintf_nl2
log_prefix fmt
43 lprintf2
log_prefix fmt
48 (*************************************************************************)
52 (*************************************************************************)
54 let file_kinds = ref []
56 let add_web_kind kind descr f
=
57 let kind_record = { f
= f
; description
= descr
} in
58 file_kinds := (kind
, kind_record) :: !file_kinds
61 let mldonkey_wget_url w f
=
63 | None
-> w
.state
<- Some DownloadStarted
;
65 let module H
= Http_client
in
68 H.req_url
= Url.of_string w
.url
;
69 H.req_proxy
= !CommonOptions.http_proxy
;
71 let (rule_search
,rule_value
) =
72 try (List.find
(fun (rule_search
,rule_value
) ->
73 Str.string_match
(Str.regexp rule_search
) w
.url
0
75 with Not_found
-> ("", w
.url
) in
76 Some
(Url.of_string rule_value
) );
78 let cookies = List.assoc w
.url
!!cookies in
79 [ ( "Cookie", List.fold_left
(fun res
(key
, value) ->
83 res ^
"; " ^ key ^
"=" ^
value
86 with Not_found
-> []);
87 H.req_user_agent
= get_user_agent
();
93 H.req_request
= H.HEAD
;
95 let date = ref None
in
97 H.whead2
r1 (fun headers
->
98 List.iter
(fun (name
, content
) ->
99 if String.lowercase name
= "last-modified" then
107 w
.state
<- Some FileLoaded
108 with e
-> w
.state
<- None
; raise e
)
110 let file = Filename.concat
"web_infos" (Filename.basename
r.H.req_url
.Url.short_file
) in
111 r.H.req_save_to_file_time
<- (begin try
112 Date.time_of_string
date
116 if not
(Sys.file_exists
file) then
120 w
.state
<- Some FileLoaded
121 with e
-> w
.state
<- None
; raise e
125 let file_loaded state
=
127 | Some FileLoaded
-> true
130 let file_date = Unix.LargeFile.stat
file in
131 if r.H.req_save_to_file_time
<= file_date.Unix.LargeFile.st_mtime
then
133 lprintf_nl (_b "%s version of %s (%s), HTML header (%s)")
134 (if file_loaded w
.state
then "already loaded local" else "re-loading possible broken")
135 file (Date.to_full_string
file_date.Unix.LargeFile.st_mtime
) date;
136 if not
(file_loaded w
.state
) then
139 w
.state
<- Some FileLoaded
140 with e
-> w
.state
<- None
; raise e
144 lprintf_nl (_b "downloading newer %s, HTML header (%s)") file date;
147 w
.state
<- Some FileLoaded
148 with e
-> w
.state
<- None
; raise e
154 | `HTTP x
when (x
< 200 || x
> 299) -> begin
155 (* use local version if wget fail and file exists *)
156 let file = Filename.concat
"web_infos" (Filename.basename
r.H.req_url
.Url.short_file
) in
157 (* mark this job downloaded *)
160 lprintf_nl (_b "already loaded local version of %s, HTTP request failed (error %d)") file x
162 if Sys.file_exists
file then begin
163 lprintf_nl (_b "using local version of %s, HTTP request failed (error %d)") file x
;
164 add_timer
5. (fun timer
->
166 (* check if other jobs are still in downloading state to avoid calling
167 function f, which might hurt other downloads for expensive functions *)
168 let others_running = ref 0 in
169 Hashtbl.iter
(fun key w
->
171 | Some DownloadStarted
-> incr
others_running
177 (* no other jobs in downloading state, process local versions of remotely failed job *)
180 (* other jobs in downloading state, reactivate this timer to check again in 5s *)
181 reactivate_timer timer
185 lprintf_nl (_b "local file %s not found, HTTP request failed (error %d)") file x
;
192 lprintf_nl (_b "Exception %s while loading %s") (Printexc2.to_string e
) w
.url
195 let mldonkey_wget_shell w f
=
196 let command_urlencoded = Str.string_after w
.url
8 in
197 let command = Url.decode
command_urlencoded in
198 let filename = Filename2.temp_file
"wget_" ".tmp" in
199 ignore
(Sys.command (Printf.sprintf
"%s > %s" command filename));
202 let mldonkey_wget w f
=
203 if Str.string_match
(Str.regexp
"shell://") w
.url
0 then
204 mldonkey_wget_shell w f
206 mldonkey_wget_url w f
208 let load_url can_fail w
=
211 (List.assoc w
.kind
!file_kinds).f w
.url
212 with e
-> failwith
(Printf.sprintf
"Unknown kind [%s]" w
.kind
)
215 lprintf_nl (_b "request %s (%s)") w
.kind w
.url
;
219 failwith
(Printf.sprintf
"Exception %s while loading %s"
220 (Printexc2.to_string e
) w
.url
)
222 lprintf_nl (_b "Exception %s while loading %s")
223 (Printexc2.to_string e
) w
.url
225 (*************************************************************************)
229 (*************************************************************************)
231 let load_web_infos core_start force
=
232 Hashtbl.iter
(fun key w
->
233 if (core_start
&& w
.period
= 0) || (w
.period
<> 0 && !hours mod w
.period
= 0) || force
then
238 lprintf_nl (_b "%s while loading %s")
239 (Printexc2.to_string e
) w
.url
241 ) CommonOptions.web_infos_table
244 mutable rss_date
: int;
245 mutable rss_value
: Rss.channel
;
248 let rss_feeds = Hashtbl.create
10
251 add_web_kind "rss" "Syndication feeds to get periodically updated data"
253 lprintf_nl (_b "parsing feed %s (rss)") url
;
256 let rss_c = Rss.channel_of_file
filename in
257 (try Sys.remove
filename with _ -> ());
260 lprintf_nl (_b "found buggy feed, preprocessing with %s and trying again") !!rss_preprocessor
;
262 let pipe_out, pipe_in
= Unix.pipe
() in
263 let pid = Unix.create_process
!!rss_preprocessor
[| !!rss_preprocessor
; filename |]
264 Unix.stdin pipe_in pipe_in
in
266 let output = Buffer.create
1024 in
267 let buffersize = 1024 in
268 let buffer = String.create
buffersize in
271 let nread = Unix.read
pipe_out buffer 0 buffersize in
272 if nread = 0 then raise End_of_file
;
273 Buffer.add_substring
output buffer 0 nread
277 | Unix.Unix_error
(code
, f, arg
) ->
278 lprintf_nl "%s failed: %s" !!rss_preprocessor
(Unix.error_message code
));
279 (try Unix.close
pipe_out with _ -> ());
280 (try Sys.remove
filename with _ -> ());
281 let _pid, _ = Unix.waitpid
[] pid in
282 let result = Buffer.contents
output in
283 if result = "" then begin
284 lprintf_nl (_b "%s produced empty content for feed %s, program missing?") !!rss_preprocessor url
;
287 Rss.channel_of_string
result
288 with Unix.Unix_error
(code
, f, arg
) ->
289 lprintf_nl (_b "%s failed: %s") !!rss_preprocessor
(Unix.error_message code
); raise Not_found
))
292 try Hashtbl.find
rss_feeds url
with
298 Hashtbl.add
rss_feeds url
feed;
301 feed.rss_date
<- last_time
();