fix shorten_string, add tests
[mldonkey.git] / tools / mld_hash.ml
blob67f8ab9299dbe49931718394bfc7842bffab0545
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 open Gettext
21 open Md4
22 open LittleEndian
23 open Unix
24 open Printf2
26 let _s x = _s "Mld_hash" x
27 let _b x = _b "Mld_hash" x
29 let zero = Int64.zero
30 let one = Int64.one
31 let (++) = Int64.add
32 let (--) = Int64.sub
33 let ( ** ) x y = Int64.mul x y
34 let ( // ) x y = Int64.div x y
36 let edk_block_size = 9728000L
37 let edk_zone_size = 180L ** 1024L
38 let tiger_block_size = Int64.of_int (1024 * 1024)
39 let partial = ref false
40 let partial_zone = ref false
41 let keep_file_after_check = ref false
43 (*************************************************************************)
44 (* *)
45 (* tiger_of_array *)
46 (* *)
47 (*************************************************************************)
49 let rec tiger_of_array array pos block =
50 if block = 1 then
51 array.(pos)
52 else
53 let len = Array.length array in
54 if pos + block / 2 >= len then
55 tiger_of_array array pos (block/2)
56 else
57 let d1 = tiger_of_array array pos (block/2) in
58 let d2 = tiger_of_array array (pos+block/2) (block/2) in
59 let s = String.create (1 + Tiger.length * 2) in
60 s.[0] <- '\001';
61 String.blit (TigerTree.direct_to_string d1) 0 s 1 Tiger.length;
62 String.blit (TigerTree.direct_to_string d2) 0 s (1+Tiger.length) Tiger.length;
63 let t = Tiger.string s in
64 let t = TigerTree.direct_of_string (Tiger.direct_to_string t) in
67 (*************************************************************************)
68 (* *)
69 (* tiger_max_block_size *)
70 (* *)
71 (*************************************************************************)
73 let rec tiger_max_block_size block len =
74 if block >= len then block
75 else tiger_max_block_size (block*2) len
77 (*************************************************************************)
78 (* *)
79 (* tiger_of_array *)
80 (* *)
81 (*************************************************************************)
83 let tiger_of_array array =
84 tiger_of_array array 0 (tiger_max_block_size 1 (Array.length array))
87 (*************************************************************************)
88 (* *)
89 (* bitprint_file *)
90 (* *)
91 (*************************************************************************)
93 let bitprint_file fd file_size =
94 lprintf "Calculating SHA1\n";
95 let sha1 = Sha1.digest_subfile fd zero file_size in
96 lprintf "Calculating TigerTree\n";
97 let tiger = TigerTree.digest_subfile fd zero file_size in
98 lprintf "urn:bitprint:%s.%s\n" (Sha1.to_string sha1) (TigerTree.to_string tiger);
99 let file_size = Unix32.getsize64 fd in
100 let nchunks = Int64.to_int (Int64.pred file_size // tiger_block_size) + 1 in
101 let chunks =
102 let chunks = Array.make nchunks tiger in
103 for i = 0 to nchunks - 1 do
104 let begin_pos = tiger_block_size ** (Int64.of_int i) in
105 let end_pos = begin_pos ++ tiger_block_size in
106 let end_pos = min end_pos file_size in
107 let len = end_pos -- begin_pos in
108 let tiger = TigerTree.digest_subfile fd begin_pos len in
109 if !partial then lprintf " Partial %4d/%4d : %s\n" i nchunks (TigerTree.to_string tiger);
110 chunks.(i) <- tiger
111 done;
112 chunks
114 let tiger2 = tiger_of_array chunks in
115 sha1, tiger2
117 (*************************************************************************)
118 (* *)
119 (* bitprint_filename *)
120 (* *)
121 (*************************************************************************)
123 let bitprint_filename filename =
124 let fd = Unix32.create_ro filename in
125 let file_size = Unix32.getsize64 fd in
126 let (sha1, tiger2) = bitprint_file fd file_size in
127 lprintf "urn:bitprint:%s.%s\n" (Sha1.to_string sha1) (TigerTree.to_string tiger2);
131 (*************************************************************************)
132 (* *)
133 (* ed2k_hash_file *)
134 (* *)
135 (*************************************************************************)
137 let ed2k_hash_file fd file_size =
138 (* See: DonkeyGlobals *)
139 let nchunks = Int64.to_int (file_size // edk_block_size) + 1 in
140 let nchunk_hashes = Int64.to_int (file_size // edk_block_size) in
141 let nchunk_hashes = if nchunk_hashes <> 0 then nchunk_hashes + 1 else nchunk_hashes in
142 let md4 = if nchunk_hashes = 0 then
143 Md4.digest_subfile fd zero file_size
144 else
145 let chunks = String.create (nchunks*16) in
146 for i = 0 to nchunks - 1 do
147 let begin_pos = edk_block_size ** (Int64.of_int i) in
148 let end_pos = begin_pos ++ edk_block_size in
149 let end_pos = min end_pos file_size in
150 let len = end_pos -- begin_pos in
151 let md4 = Md4.digest_subfile fd begin_pos len in
152 if !partial then lprintf " Partial %4d/%4d (%Ld - %Ld): %s\n"
153 i nchunks begin_pos end_pos (Md4.to_string md4);
154 let md4 = Md4.direct_to_string md4 in
155 String.blit md4 0 chunks (i*16) 16;
156 done;
157 Md4.string chunks
161 (*************************************************************************)
162 (* *)
163 (* ed2k_hash_filename *)
164 (* *)
165 (*************************************************************************)
167 let ed2k_hash_filename filename =
168 lprintf "Calculating ed2k of %s\n" filename;
169 let fd = Unix32.create_ro filename in
170 let file_size = Unix32.getsize64 fd in
171 let md4 = ed2k_hash_file fd file_size in
172 lprintf "ed2k://|file|%s|%Ld|%s|/\n"
173 (Url.encode (Filename.basename filename))
174 file_size
175 (Md4.to_string md4)
177 (*************************************************************************)
178 (* *)
179 (* aich_hash_file *)
180 (* *)
181 (*************************************************************************)
183 type side = Left | Right
185 let aich_hash side nchunks f_hashchunk =
186 let combine_sha1 hash1 hash2 =
187 Sha1.string (Sha1.direct_to_string hash1 ^ Sha1.direct_to_string hash2)
189 let build_tree n =
190 let rec aux side n next_leaf cont =
191 if n = 1L then cont (f_hashchunk side next_leaf) (next_leaf ++ 1L)
192 else
193 let p, q = n // 2L, Int64.rem n 2L in
194 aux Left (if q = 0L || side = Right then p else p ++ 1L) next_leaf
195 (fun left_hash next_leaf ->
196 aux Right (if q = 0L || side = Left then p else p ++ 1L) next_leaf
197 (fun right_hash next_leaf ->
198 cont (combine_sha1 left_hash right_hash) next_leaf)) in
199 aux side n 0L (fun root_hash number_of_leaves -> root_hash)
201 build_tree nchunks
203 let aich_hash_chunk side fd offset len =
204 let nzones = (Int64.pred len // edk_zone_size) ++ 1L in
205 let compute_sha1_zone side nzone =
206 let begin_pos = offset ++ edk_zone_size ** nzone in
207 let end_pos = offset ++ (min (edk_zone_size ** (nzone ++ 1L)) len) in
208 let len = end_pos -- begin_pos in
209 if !partial_zone then
210 lprintf_nl "compute SHA1 of zone %Ld/%Ld (%Ld - %Ld) len %Ld"
211 nzone nzones begin_pos end_pos len;
212 Sha1.digest_subfile fd begin_pos len
214 aich_hash side nzones compute_sha1_zone
216 let aich_hash_file fd file_size =
217 let nchunks = (Int64.pred file_size // edk_block_size) ++ 1L in
218 let compute_sha1_chunk side nchunk =
219 let begin_pos = edk_block_size ** nchunk in
220 let end_pos = min (begin_pos ++ edk_block_size) file_size in
221 let len = end_pos -- begin_pos in
222 if !partial then
223 lprintf_nl "compute SHA1 of chunk %Ld/%Ld (%Ld - %Ld) len %Ld"
224 nchunk nchunks begin_pos end_pos len;
225 aich_hash_chunk side fd begin_pos len
227 aich_hash Left nchunks compute_sha1_chunk
229 (*************************************************************************)
230 (* *)
231 (* aich_hash_filename *)
232 (* *)
233 (*************************************************************************)
235 let aich_hash_filename filename =
236 lprintf "Calculating AICH of %s\n" filename;
237 let fd = Unix32.create_ro filename in
238 let file_size = Unix32.getsize64 fd in
239 let aich = aich_hash_file fd file_size in
240 lprintf "AICH of %s = %s\n"
241 (Url.encode (Filename.basename filename)) (Sha1.to_string aich)
243 (*************************************************************************)
244 (* *)
245 (* sha1_hash_file *)
246 (* *)
247 (*************************************************************************)
249 let sha1_hash_filename block_size filename =
250 let fd = Unix32.create_ro filename in
251 let file_size = Unix32.getsize64 fd in
252 let nchunks = Int64.to_int (Int64.pred file_size // block_size) + 1 in
253 for i = 0 to nchunks - 1 do
254 let begin_pos = block_size ** (Int64.of_int i) in
255 let end_pos = begin_pos ++ block_size in
256 let end_pos = min end_pos file_size in
257 let len = end_pos -- begin_pos in
258 let md4 = Sha1.digest_subfile fd begin_pos len in
259 if !partial then lprintf " Partial %4d/%4d (%Ld-%Ld) : %s\n" i nchunks begin_pos end_pos
260 (Sha1.to_string md4);
261 done
263 (*************************************************************************)
264 (* *)
265 (* sig2dat_hash_filename *)
266 (* *)
267 (*************************************************************************)
269 let sig2dat_hash_filename filename =
270 lprintf "Calculating sig2dat of %s\n" filename;
271 let fd = Unix32.create_ro filename in
272 let file_size = Unix32.getsize64 fd in
273 let len64 = min 307200L file_size in
274 let len = Int64.to_int len64 in
275 let s = String.create len in
276 Unix32.read fd zero s 0 len;
277 let md5ext = Md5Ext.string s in
278 lprintf "sig2dat://|File: %s|Length: %Ld Bytes|UUHash: %s|/\n"
279 (Url.encode (Filename.basename filename)) file_size (Md5Ext.to_string md5ext);
280 lprintf " Hash: %s\n" (Md5Ext.to_hexa_case false md5ext);
283 (*************************************************************************)
284 (* *)
285 (* check_external_functions *)
286 (* *)
287 (*************************************************************************)
289 let check_external_functions file_size =
290 partial := true;
291 let test_string_len = 43676 in
292 let dummy_string = "bonjourhello1" in
294 let create_diskfile filename size =
295 Unix32.create_diskfile filename true
297 let create_sparsefile filename size =
298 Unix32.create_sparsefile filename true
300 let create_multifile filename size =
301 let rec iter pos size list =
302 if size <> zero then
303 let new_size = (size // (Int64.of_int 2)) ++ one in
304 let filename = Printf.sprintf "%d-%Ld" pos new_size in
305 iter (pos+1) (size -- new_size)
306 ((filename, new_size) :: list)
307 else list
309 let files = iter 0 size [] in
310 lprintf "File %s will be:\n" filename;
311 List.iter (fun (name, size) ->
312 lprintf " %-50s %Ld\n" name size;
313 ) files;
314 Unix32.create_multifile filename
315 true files
318 let (file_types : (string * (string -> int64 -> Unix32.t)
319 * (string -> int64 -> Unix32.t) ) list) =
321 "diskfile", create_diskfile, create_diskfile;
322 "sparsefile", create_sparsefile, create_diskfile;
323 "multifile", create_multifile, create_multifile;
325 ] in
327 let test_string = String.create test_string_len in
328 let rec iter pos =
329 if pos < test_string_len then
330 let end_pos = min test_string_len (2*pos) in
331 let len = end_pos - pos in
332 String.blit test_string 0 test_string pos len;
333 iter end_pos
336 let dummy_string_len = String.length dummy_string in
337 String.blit dummy_string 0 test_string 0 dummy_string_len;
338 iter dummy_string_len;
341 let test_string_len64 = Int64.of_int test_string_len in
342 let rec iter pos waves =
343 if pos < file_size then
344 let end_pos = min file_size (pos ++ test_string_len64) in
345 let len64 = end_pos -- pos in
346 let len = Int64.to_int len64 in
347 iter end_pos
348 ((pos, len) :: waves)
349 else
350 waves
352 let waves = iter zero [] in
353 lprintf "\n";
354 List.iter (fun (name,f,f') ->
355 let filename = Printf.sprintf "test.%s.%Ld" name file_size in
357 lprintf "Creating file %s\n" filename;
358 let file = f filename file_size in
359 Unix32.ftruncate64 file file_size false;
362 lprintf "Computing ed2k hash of zeroed file\n";
363 let md4 = ed2k_hash_file file file_size in
364 lprintf "ed2k://|file|%s|%Ld|%s|\n"
365 filename
366 file_size
367 (Md4.to_string md4);
370 lprintf "Filling file\n";
371 List.iter (fun (pos, len) ->
372 Unix32.write file pos test_string 0 len;
373 ) waves;
375 lprintf "Computing ed2k hash\n";
376 let md4 = ed2k_hash_file file file_size in
377 let aich = aich_hash_file file file_size in
378 lprintf "ed2k://|file|%s|%Ld|%s|h=%s|/\n"
379 (Url.encode filename) file_size (Md4.to_string md4) (Sha1.to_string aich);
381 lprintf "Computing bitprint hash\n";
382 let (sha1, tiger2) = bitprint_file file file_size in
383 lprintf "urn:bitprint:%s.%s\n" (Sha1.to_string sha1) (TigerTree.to_string tiger2);
385 Unix32.close file;
387 if not !keep_file_after_check then begin
388 lprintf (_b "Renaming...\n");
389 Unix32.rename file (filename ^ ".final");
390 lprintf (_b "Removing %s\n") filename;
391 (try Unix32.remove file with _ -> ());
392 let file = f' (filename ^ ".final") file_size in
393 Unix32.close file;
394 Unix32.remove file;
395 end;
396 (try Sys.remove "zero_chunk" with _ -> ());
398 lprintf "done\n"
399 with e ->
400 lprintf (_b "Exception %s in check_external_functions %s.%Ld\n")
401 (Printexc2.to_string e) name file_size)
402 file_types
404 let max_diff_size = 30000000L
406 let diff_chunk args =
407 let filename1 = args.(0) in
408 let filename2 = args.(1) in
409 let begin_pos = Int64.of_string args.(2) in
410 let end_pos = Int64.of_string args.(3) in
412 let total = ref 0 in
414 if end_pos -- begin_pos > max_diff_size then
415 failwith (Printf.sprintf "Cannot diff chunk > %Ld bytes" max_diff_size);
416 let len = Int64.to_int (end_pos -- begin_pos) in
417 let s1 = String.create len in
418 let s2 = String.create len in
419 let fd1 = Unix32.create_ro filename1 in
420 let fd2 = Unix32.create_ro filename2 in
421 Unix32.read fd1 begin_pos s1 0 len;
422 Unix32.read fd2 begin_pos s2 0 len;
424 let rec iter_in old_pos pos =
425 if pos < len then
426 if s1.[pos] <> s2.[pos] then begin
427 lprintf " common %Ld-%Ld %d\n"
428 (begin_pos ++ Int64.of_int old_pos)
429 (begin_pos ++ Int64.of_int (pos-1))
430 (pos - old_pos);
431 iter_out pos (pos+1)
432 end else
433 iter_in old_pos (pos+1)
434 else
435 lprintf " common %Ld-%Ld %d\n"
436 (begin_pos ++ Int64.of_int old_pos)
437 (begin_pos ++ Int64.of_int (pos-1))
438 (pos - old_pos)
440 and iter_out old_pos pos =
441 if pos < len then
442 if s1.[pos] <> s2.[pos] then
443 iter_out old_pos (pos+1)
444 else
445 begin
446 lprintf " diff %Ld-%Ld %d\n"
447 (begin_pos ++ Int64.of_int old_pos)
448 (begin_pos ++ Int64.of_int (pos-1))
449 (pos - old_pos);
450 total := !total + (pos - old_pos);
451 iter_in (pos-1) (pos+1)
453 else begin
454 lprintf " diff %Ld-%Ld %d\n"
455 (begin_pos ++ Int64.of_int old_pos)
456 (begin_pos ++ Int64.of_int (pos-1))
457 (pos - old_pos);
458 total := !total + (pos - old_pos);
461 iter_in 0 0;
462 lprintf "Diff Total: %d/%d bytes\n" !total len
466 (*************************************************************************)
467 (* *)
468 (* MAIN *)
469 (* *)
470 (*************************************************************************)
473 let hash = ref ""
474 let chunk_size = ref zero
476 let _ =
477 MlUnix.set_signal Sys.sigint
478 (Sys.Signal_handle (fun _ -> lprintf_nl "Received SIGINT, stopping mld_hash...";
479 exit 0));
481 MlUnix.set_signal Sys.sigterm
482 (Sys.Signal_handle (fun _ -> lprintf_nl "Received SIGTERM, stopping mld_hash...";
483 exit 0));
485 Arg2.parse2 [
486 "-diff_chunk", Arg2.Array (4, diff_chunk),
487 "<filename1> <filename2> <begin_pos> <end_pos> : compute diff between the two files";
488 "-hash", Arg2.String ( (:=) hash), _s " <hash> : Set hash type you want to compute (ed2k, aich, sha1, sig2dat, bp)";
489 "-sha1", Arg2.String (fun size ->
490 hash := "sha1";
491 chunk_size := Int64.of_string size;
492 ), " <chunk_size> : Set hash type to sha1 and chunk_size to <chunk_size>";
493 "-partial", Arg2.Unit (fun _ -> partial := true), _s ": enable display of partial hash values";
494 "-partial_zone", Arg2.Unit (fun _ -> partial_zone := true), _s ": enable display of zone AICH hash values";
495 "-check_keep", Arg2.Unit (fun _ -> keep_file_after_check := true), _s ": keep files after checking functions";
496 "-check", Arg2.Int64 check_external_functions, _s " <size of testfile in bytes>: check C file functions";
497 ] (fun filename ->
498 match !hash with
499 | "ed2k" | "edk" -> ed2k_hash_filename filename
500 | "aich" -> aich_hash_filename filename
501 | "emule" -> ed2k_hash_filename filename; aich_hash_filename filename
502 | "sha1" -> sha1_hash_filename !chunk_size filename
503 | "sig2dat" -> sig2dat_hash_filename filename
504 | "bp" -> bitprint_filename filename
505 | _ ->
506 ed2k_hash_filename filename;
507 aich_hash_filename filename;
508 sig2dat_hash_filename filename;
509 bitprint_filename filename
510 ) (_s " <filenames> : compute hashes of filenames");
511 exit 0