patch #7573
[mldonkey.git] / src / daemon / common / commonWeb.ml
blob17fe1a1d2dac8cbd583fe941a97d6a4eb76f58a9
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 open Options
21 open Printf2
23 open AnyEndian
24 open LittleEndian
26 open BasicSocket
27 open TcpBufferedSocket
29 open CommonGlobals
30 open CommonOptions
31 open CommonTypes
32 open Gettext
34 let _s x = _s "CommonWeb" x
35 let _b x = _b "CommonWeb" x
37 let log_prefix = "[cWeb]"
39 let lprintf_nl fmt =
40 lprintf_nl2 log_prefix fmt
42 let lprintf_n fmt =
43 lprintf2 log_prefix fmt
45 let days = ref 0
46 let hours = ref 0
48 (*************************************************************************)
49 (* *)
50 (* load_url *)
51 (* *)
52 (*************************************************************************)
54 let file_kinds = ref []
56 let add_web_kind kind descr f =
57 let kind_record = { f = f; description = descr } in
58 file_kinds := (kind, kind_record) :: !file_kinds
61 let mldonkey_wget_url w f =
62 (match w.state with
63 | None -> w.state <- Some DownloadStarted;
64 | _ -> ());
65 let module H = Http_client in
66 let r = {
67 H.basic_request with
68 H.req_url = Url.of_string w.url;
69 H.req_proxy = !CommonOptions.http_proxy;
70 H.req_referer = (
71 let (rule_search,rule_value) =
72 try (List.find(fun (rule_search,rule_value) ->
73 Str.string_match (Str.regexp rule_search) w.url 0
74 ) !!referers )
75 with Not_found -> ("", w.url) in
76 Some (Url.of_string rule_value) );
77 H.req_headers = (try
78 let cookies = List.assoc w.url !!cookies in
79 [ ( "Cookie", List.fold_left (fun res (key, value) ->
80 if res = "" then
81 key ^ "=" ^ value
82 else
83 res ^ "; " ^ key ^ "=" ^ value
84 ) "" cookies
85 ) ]
86 with Not_found -> []);
87 H.req_user_agent = get_user_agent ();
88 H.req_max_retry = 20;
89 H.req_save = true;
90 } in
91 let r1 = {
92 r with
93 H.req_request = H.HEAD;
94 } in
95 let date = ref None in
96 begin try
97 H.whead2 r1 (fun headers ->
98 List.iter (fun (name, content) ->
99 if String.lowercase name = "last-modified" then
101 date := Some content
102 with _ -> ()
103 ) headers;
104 match !date with
105 None -> (try
106 H.wget r f;
107 w.state <- Some FileLoaded
108 with e -> w.state <- None; raise e)
109 | Some date ->
110 let file = Filename.concat "web_infos" (Filename.basename r.H.req_url.Url.short_file) in
111 r.H.req_save_to_file_time <- (begin try
112 Date.time_of_string date
113 with e ->
114 Unix.time ()
115 end);
116 if not (Sys.file_exists file) then
117 begin
119 H.wget r f;
120 w.state <- Some FileLoaded
121 with e -> w.state <- None; raise e
123 else
124 begin
125 let file_loaded state =
126 match state with
127 | Some FileLoaded -> true
128 | _ -> false
130 let file_date = Unix.LargeFile.stat file in
131 if r.H.req_save_to_file_time <= file_date.Unix.LargeFile.st_mtime then
132 begin
133 lprintf_nl (_b "%s version of %s (%s), HTML header (%s)")
134 (if file_loaded w.state then "already loaded local" else "re-loading possible broken")
135 file (Date.to_full_string file_date.Unix.LargeFile.st_mtime) date;
136 if not (file_loaded w.state) then
138 H.wget r f;
139 w.state <- Some FileLoaded
140 with e -> w.state <- None; raise e
142 else
143 begin
144 lprintf_nl (_b "downloading newer %s, HTML header (%s)") file date;
146 H.wget r f;
147 w.state <- Some FileLoaded
148 with e -> w.state <- None; raise e
152 (fun c ->
153 match c with
154 | `HTTP x when (x < 200 || x > 299) -> begin
155 (* use local version if wget fail and file exists *)
156 let file = Filename.concat "web_infos" (Filename.basename r.H.req_url.Url.short_file) in
157 (* mark this job downloaded *)
158 match w.state with
159 | Some FileLoaded ->
160 lprintf_nl (_b "already loaded local version of %s, HTTP request failed (error %d)") file x
161 | _ ->
162 if Sys.file_exists file then begin
163 lprintf_nl (_b "using local version of %s, HTTP request failed (error %d)") file x;
164 add_timer 5. (fun timer ->
165 let jobs =
166 (* check if other jobs are still in downloading state to avoid calling
167 function f, which might hurt other downloads for expensive functions *)
168 let others_running = ref 0 in
169 Hashtbl.iter (fun key w ->
170 match w.state with
171 | Some DownloadStarted -> incr others_running
172 | _ -> ()
173 ) web_infos_table;
174 !others_running
176 if jobs = 0 then
177 (* no other jobs in downloading state, process local versions of remotely failed job *)
178 (f file : unit)
179 else
180 (* other jobs in downloading state, reactivate this timer to check again in 5s *)
181 reactivate_timer timer
184 else
185 lprintf_nl (_b "local file %s not found, HTTP request failed (error %d)") file x;
186 w.state <- None
188 | _ -> ()
190 with e ->
191 w.state <- None;
192 lprintf_nl (_b "Exception %s while loading %s") (Printexc2.to_string e) w.url
195 let mldonkey_wget_shell w f =
196 let command_urlencoded = Str.string_after w.url 8 in
197 let command = Url.decode command_urlencoded in
198 let filename = Filename2.temp_file "wget_" ".tmp" in
199 ignore (Sys.command (Printf.sprintf "%s > %s" command filename));
200 (f filename : unit)
202 let mldonkey_wget w f =
203 if Str.string_match (Str.regexp "shell://") w.url 0 then
204 mldonkey_wget_shell w f
205 else
206 mldonkey_wget_url w f
208 let load_url can_fail w =
209 let f =
211 (List.assoc w.kind !file_kinds).f w.url
212 with e -> failwith (Printf.sprintf "Unknown kind [%s]" w.kind)
215 lprintf_nl (_b "request %s (%s)") w.kind w.url;
216 mldonkey_wget w f
217 with e ->
218 if can_fail then
219 failwith (Printf.sprintf "Exception %s while loading %s"
220 (Printexc2.to_string e) w.url)
221 else
222 lprintf_nl (_b "Exception %s while loading %s")
223 (Printexc2.to_string e) w.url
225 (*************************************************************************)
226 (* *)
227 (* load_web_infos *)
228 (* *)
229 (*************************************************************************)
231 let load_web_infos core_start force =
232 Hashtbl.iter (fun key w ->
233 if (core_start && w.period = 0) || (w.period <> 0 && !hours mod w.period = 0) || force then
234 begin
236 load_url false w
237 with e ->
238 lprintf_nl (_b "%s while loading %s")
239 (Printexc2.to_string e) w.url
241 ) CommonOptions.web_infos_table
243 type rss_feed = {
244 mutable rss_date : int;
245 mutable rss_value : Rss.channel;
248 let rss_feeds = Hashtbl.create 10
250 let _ =
251 add_web_kind "rss" "Syndication feeds to get periodically updated data"
252 (fun url filename ->
253 lprintf_nl (_b "parsing feed %s (rss)") url;
254 let c =
255 (try
256 let rss_c = Rss.channel_of_file filename in
257 (try Sys.remove filename with _ -> ());
258 rss_c
259 with Xml.Error _ ->
260 lprintf_nl (_b "found buggy feed, preprocessing with %s and trying again") !!rss_preprocessor;
261 (try
262 let pipe_out, pipe_in = Unix.pipe () in
263 let pid = Unix.create_process !!rss_preprocessor [| !!rss_preprocessor; filename |]
264 Unix.stdin pipe_in pipe_in in
265 Unix.close pipe_in;
266 let output = Buffer.create 1024 in
267 let buffersize = 1024 in
268 let buffer = String.create buffersize in
269 (try
270 while true do
271 let nread = Unix.read pipe_out buffer 0 buffersize in
272 if nread = 0 then raise End_of_file;
273 Buffer.add_substring output buffer 0 nread
274 done
275 with
276 | End_of_file -> ()
277 | Unix.Unix_error (code, f, arg) ->
278 lprintf_nl "%s failed: %s" !!rss_preprocessor (Unix.error_message code));
279 (try Unix.close pipe_out with _ -> ());
280 (try Sys.remove filename with _ -> ());
281 let _pid, _ = Unix.waitpid [] pid in
282 let result = Buffer.contents output in
283 if result = "" then begin
284 lprintf_nl (_b "%s produced empty content for feed %s, program missing?") !!rss_preprocessor url;
285 raise Not_found
286 end;
287 Rss.channel_of_string result
288 with Unix.Unix_error (code, f, arg) ->
289 lprintf_nl (_b "%s failed: %s") !!rss_preprocessor (Unix.error_message code); raise Not_found))
291 let feed =
292 try Hashtbl.find rss_feeds url with
293 Not_found ->
294 let feed = {
295 rss_date = 0;
296 rss_value = c;
297 } in
298 Hashtbl.add rss_feeds url feed;
299 feed
301 feed.rss_date <- last_time ();
302 feed.rss_value <- c;