1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 let _s x
= _s "Mld_hash" x
27 let _b x
= _b "Mld_hash" x
33 let ( ** ) x y
= Int64.mul x y
34 let ( // ) x y
= Int64.div x y
36 let edk_block_size = 9728000L
37 let edk_zone_size = 180L ** 1024L
38 let tiger_block_size = Int64.of_int
(1024 * 1024)
39 let partial = ref false
40 let partial_zone = ref false
41 let keep_file_after_check = ref false
43 (*************************************************************************)
47 (*************************************************************************)
49 let rec tiger_of_array array pos block
=
53 let len = Array.length array
in
54 if pos
+ block
/ 2 >= len then
55 tiger_of_array array pos
(block
/2)
57 let d1 = tiger_of_array array pos
(block
/2) in
58 let d2 = tiger_of_array array
(pos
+block
/2) (block
/2) in
59 let s = String.create
(1 + Tiger.length
* 2) in
61 String.blit
(TigerTree.direct_to_string
d1) 0 s 1 Tiger.length
;
62 String.blit
(TigerTree.direct_to_string
d2) 0 s (1+Tiger.length
) Tiger.length
;
63 let t = Tiger.string s in
64 let t = TigerTree.direct_of_string
(Tiger.direct_to_string
t) in
67 (*************************************************************************)
69 (* tiger_max_block_size *)
71 (*************************************************************************)
73 let rec tiger_max_block_size block
len =
74 if block
>= len then block
75 else tiger_max_block_size (block
*2) len
77 (*************************************************************************)
81 (*************************************************************************)
83 let tiger_of_array array
=
84 tiger_of_array array
0 (tiger_max_block_size 1 (Array.length array
))
87 (*************************************************************************)
91 (*************************************************************************)
93 let bitprint_file fd file_size
=
94 lprintf
"Calculating SHA1\n";
95 let sha1 = Sha1.digest_subfile fd
zero file_size
in
96 lprintf
"Calculating TigerTree\n";
97 let tiger = TigerTree.digest_subfile fd
zero file_size
in
98 lprintf
"urn:bitprint:%s.%s\n" (Sha1.to_string
sha1) (TigerTree.to_string
tiger);
99 let file_size = Unix32.getsize64 fd
in
100 let nchunks = Int64.to_int
(Int64.pred
file_size // tiger_block_size) + 1 in
102 let chunks = Array.make
nchunks tiger in
103 for i
= 0 to nchunks - 1 do
104 let begin_pos = tiger_block_size ** (Int64.of_int i
) in
105 let end_pos = begin_pos ++ tiger_block_size in
106 let end_pos = min
end_pos file_size in
107 let len = end_pos -- begin_pos in
108 let tiger = TigerTree.digest_subfile fd
begin_pos len in
109 if !partial then lprintf
" Partial %4d/%4d : %s\n" i
nchunks (TigerTree.to_string
tiger);
114 let tiger2 = tiger_of_array chunks in
117 (*************************************************************************)
119 (* bitprint_filename *)
121 (*************************************************************************)
123 let bitprint_filename filename
=
124 let fd = Unix32.create_ro filename
in
125 let file_size = Unix32.getsize64
fd in
126 let (sha1, tiger2) = bitprint_file fd file_size in
127 lprintf
"urn:bitprint:%s.%s\n" (Sha1.to_string
sha1) (TigerTree.to_string
tiger2);
131 (*************************************************************************)
135 (*************************************************************************)
137 let ed2k_hash_file fd file_size =
138 (* See: DonkeyGlobals *)
139 let nchunks = Int64.to_int
(file_size // edk_block_size) + 1 in
140 let nchunk_hashes = Int64.to_int
(file_size // edk_block_size) in
141 let nchunk_hashes = if nchunk_hashes <> 0 then nchunk_hashes + 1 else nchunk_hashes in
142 let md4 = if nchunk_hashes = 0 then
143 Md4.digest_subfile
fd zero file_size
145 let chunks = String.create
(nchunks*16) in
146 for i
= 0 to nchunks - 1 do
147 let begin_pos = edk_block_size ** (Int64.of_int i
) in
148 let end_pos = begin_pos ++ edk_block_size in
149 let end_pos = min
end_pos file_size in
150 let len = end_pos -- begin_pos in
151 let md4 = Md4.digest_subfile
fd begin_pos len in
152 if !partial then lprintf
" Partial %4d/%4d (%Ld - %Ld): %s\n"
153 i
nchunks begin_pos end_pos (Md4.to_string
md4);
154 let md4 = Md4.direct_to_string
md4 in
155 String.blit
md4 0 chunks (i
*16) 16;
161 (*************************************************************************)
163 (* ed2k_hash_filename *)
165 (*************************************************************************)
167 let ed2k_hash_filename filename
=
168 lprintf
"Calculating ed2k of %s\n" filename
;
169 let fd = Unix32.create_ro filename
in
170 let file_size = Unix32.getsize64
fd in
171 let md4 = ed2k_hash_file fd file_size in
172 lprintf
"ed2k://|file|%s|%Ld|%s|/\n"
173 (Url.encode
(Filename.basename filename
))
177 (*************************************************************************)
181 (*************************************************************************)
183 type side
= Left
| Right
185 let aich_hash side
nchunks f_hashchunk
=
186 let combine_sha1 hash1 hash2
=
187 Sha1.string (Sha1.direct_to_string hash1 ^
Sha1.direct_to_string hash2
)
190 let rec aux side n next_leaf cont
=
191 if n
= 1L then cont
(f_hashchunk side next_leaf
) (next_leaf
++ 1L)
193 let p, q
= n
// 2L, Int64.rem n
2L in
194 aux Left
(if q
= 0L || side
= Right
then p else p ++ 1L) next_leaf
195 (fun left_hash next_leaf
->
196 aux Right
(if q
= 0L || side
= Left
then p else p ++ 1L) next_leaf
197 (fun right_hash next_leaf
->
198 cont
(combine_sha1 left_hash right_hash
) next_leaf
)) in
199 aux side n
0L (fun root_hash number_of_leaves
-> root_hash
)
203 let aich_hash_chunk side
fd offset
len =
204 let nzones = (Int64.pred
len // edk_zone_size) ++ 1L in
205 let compute_sha1_zone side nzone
=
206 let begin_pos = offset
++ edk_zone_size ** nzone
in
207 let end_pos = offset
++ (min
(edk_zone_size ** (nzone
++ 1L)) len) in
208 let len = end_pos -- begin_pos in
209 if !partial_zone then
210 lprintf_nl
"compute SHA1 of zone %Ld/%Ld (%Ld - %Ld) len %Ld"
211 nzone
nzones begin_pos end_pos len;
212 Sha1.digest_subfile
fd begin_pos len
214 aich_hash side
nzones compute_sha1_zone
216 let aich_hash_file fd file_size =
217 let nchunks = (Int64.pred
file_size // edk_block_size) ++ 1L in
218 let compute_sha1_chunk side nchunk
=
219 let begin_pos = edk_block_size ** nchunk
in
220 let end_pos = min
(begin_pos ++ edk_block_size) file_size in
221 let len = end_pos -- begin_pos in
223 lprintf_nl
"compute SHA1 of chunk %Ld/%Ld (%Ld - %Ld) len %Ld"
224 nchunk
nchunks begin_pos end_pos len;
225 aich_hash_chunk side
fd begin_pos len
227 aich_hash Left
nchunks compute_sha1_chunk
229 (*************************************************************************)
231 (* aich_hash_filename *)
233 (*************************************************************************)
235 let aich_hash_filename filename
=
236 lprintf
"Calculating AICH of %s\n" filename
;
237 let fd = Unix32.create_ro filename
in
238 let file_size = Unix32.getsize64
fd in
239 let aich = aich_hash_file fd file_size in
240 lprintf
"AICH of %s = %s\n"
241 (Url.encode
(Filename.basename filename
)) (Sha1.to_string
aich)
243 (*************************************************************************)
247 (*************************************************************************)
249 let sha1_hash_filename block_size filename
=
250 let fd = Unix32.create_ro filename
in
251 let file_size = Unix32.getsize64
fd in
252 let nchunks = Int64.to_int
(Int64.pred
file_size // block_size
) + 1 in
253 for i
= 0 to nchunks - 1 do
254 let begin_pos = block_size
** (Int64.of_int i
) in
255 let end_pos = begin_pos ++ block_size
in
256 let end_pos = min
end_pos file_size in
257 let len = end_pos -- begin_pos in
258 let md4 = Sha1.digest_subfile
fd begin_pos len in
259 if !partial then lprintf
" Partial %4d/%4d (%Ld-%Ld) : %s\n" i
nchunks begin_pos end_pos
260 (Sha1.to_string
md4);
263 (*************************************************************************)
265 (* sig2dat_hash_filename *)
267 (*************************************************************************)
269 let sig2dat_hash_filename filename
=
270 lprintf
"Calculating sig2dat of %s\n" filename
;
271 let fd = Unix32.create_ro filename
in
272 let file_size = Unix32.getsize64
fd in
273 let len64 = min
307200L file_size in
274 let len = Int64.to_int
len64 in
275 let s = String.create
len in
276 Unix32.read
fd zero s 0 len;
277 let md5ext = Md5Ext.string s in
278 lprintf
"sig2dat://|File: %s|Length: %Ld Bytes|UUHash: %s|/\n"
279 (Url.encode
(Filename.basename filename
)) file_size (Md5Ext.to_string
md5ext);
280 lprintf
" Hash: %s\n" (Md5Ext.to_hexa_case
false md5ext);
283 (*************************************************************************)
285 (* check_external_functions *)
287 (*************************************************************************)
289 let check_external_functions file_size =
291 let test_string_len = 43676 in
292 let dummy_string = "bonjourhello1" in
294 let create_diskfile filename size
=
295 Unix32.create_diskfile filename
true
297 let create_sparsefile filename size
=
298 Unix32.create_sparsefile filename
true
300 let create_multifile filename size
=
301 let rec iter pos size list
=
303 let new_size = (size
// (Int64.of_int
2)) ++ one in
304 let filename = Printf.sprintf
"%d-%Ld" pos
new_size in
305 iter (pos
+1) (size
-- new_size)
306 ((filename, new_size) :: list
)
309 let files = iter 0 size
[] in
310 lprintf
"File %s will be:\n" filename;
311 List.iter (fun (name
, size
) ->
312 lprintf
" %-50s %Ld\n" name size
;
314 Unix32.create_multifile filename
318 let (file_types
: (string * (string -> int64
-> Unix32.t)
319 * (string -> int64
-> Unix32.t) ) list
) =
321 "diskfile", create_diskfile, create_diskfile;
322 "sparsefile", create_sparsefile, create_diskfile;
323 "multifile", create_multifile, create_multifile;
327 let test_string = String.create
test_string_len in
329 if pos
< test_string_len then
330 let end_pos = min
test_string_len (2*pos
) in
331 let len = end_pos - pos
in
332 String.blit
test_string 0 test_string pos
len;
336 let dummy_string_len = String.length
dummy_string in
337 String.blit
dummy_string 0 test_string 0 dummy_string_len;
338 iter dummy_string_len;
341 let test_string_len64 = Int64.of_int
test_string_len in
342 let rec iter pos waves
=
343 if pos
< file_size then
344 let end_pos = min
file_size (pos
++ test_string_len64) in
345 let len64 = end_pos -- pos
in
346 let len = Int64.to_int
len64 in
348 ((pos
, len) :: waves
)
352 let waves = iter zero [] in
354 List.iter (fun (name
,f
,f'
) ->
355 let filename = Printf.sprintf
"test.%s.%Ld" name
file_size in
357 lprintf
"Creating file %s\n" filename;
358 let file = f
filename file_size in
359 Unix32.ftruncate64
file file_size false;
362 lprintf "Computing ed2k hash of zeroed file\n";
363 let md4 = ed2k_hash_file file file_size in
364 lprintf "ed2k://|file|%s|%Ld|%s|\n"
370 lprintf
"Filling file\n";
371 List.iter (fun (pos
, len) ->
372 Unix32.write
file pos
test_string 0 len;
375 lprintf
"Computing ed2k hash\n";
376 let md4 = ed2k_hash_file file file_size in
377 let aich = aich_hash_file file file_size in
378 lprintf
"ed2k://|file|%s|%Ld|%s|h=%s|/\n"
379 (Url.encode
filename) file_size (Md4.to_string
md4) (Sha1.to_string
aich);
381 lprintf
"Computing bitprint hash\n";
382 let (sha1, tiger2) = bitprint_file file file_size in
383 lprintf
"urn:bitprint:%s.%s\n" (Sha1.to_string
sha1) (TigerTree.to_string
tiger2);
387 if not
!keep_file_after_check then begin
388 lprintf
(_b "Renaming...\n");
389 Unix32.rename
file (filename ^
".final");
390 lprintf
(_b "Removing %s\n") filename;
391 (try Unix32.remove
file with _
-> ());
392 let file = f'
(filename ^
".final") file_size in
396 (try Sys.remove
"zero_chunk" with _
-> ());
400 lprintf
(_b "Exception %s in check_external_functions %s.%Ld\n")
401 (Printexc2.to_string e
) name
file_size)
404 let max_diff_size = 30000000L
406 let diff_chunk args
=
407 let filename1 = args
.(0) in
408 let filename2 = args
.(1) in
409 let begin_pos = Int64.of_string args
.(2) in
410 let end_pos = Int64.of_string args
.(3) in
414 if end_pos -- begin_pos > max_diff_size then
415 failwith
(Printf.sprintf
"Cannot diff chunk > %Ld bytes" max_diff_size);
416 let len = Int64.to_int
(end_pos -- begin_pos) in
417 let s1 = String.create
len in
418 let s2 = String.create
len in
419 let fd1 = Unix32.create_ro
filename1 in
420 let fd2 = Unix32.create_ro
filename2 in
421 Unix32.read
fd1 begin_pos s1 0 len;
422 Unix32.read
fd2 begin_pos s2 0 len;
424 let rec iter_in old_pos pos
=
426 if s1.[pos
] <> s2.[pos
] then begin
427 lprintf
" common %Ld-%Ld %d\n"
428 (begin_pos ++ Int64.of_int old_pos
)
429 (begin_pos ++ Int64.of_int
(pos
-1))
433 iter_in old_pos
(pos
+1)
435 lprintf
" common %Ld-%Ld %d\n"
436 (begin_pos ++ Int64.of_int old_pos
)
437 (begin_pos ++ Int64.of_int
(pos
-1))
440 and iter_out old_pos pos
=
442 if s1.[pos
] <> s2.[pos
] then
443 iter_out old_pos
(pos
+1)
446 lprintf
" diff %Ld-%Ld %d\n"
447 (begin_pos ++ Int64.of_int old_pos
)
448 (begin_pos ++ Int64.of_int
(pos
-1))
450 total := !total + (pos
- old_pos
);
451 iter_in (pos
-1) (pos
+1)
454 lprintf
" diff %Ld-%Ld %d\n"
455 (begin_pos ++ Int64.of_int old_pos
)
456 (begin_pos ++ Int64.of_int
(pos
-1))
458 total := !total + (pos
- old_pos
);
462 lprintf
"Diff Total: %d/%d bytes\n" !total len
466 (*************************************************************************)
470 (*************************************************************************)
474 let chunk_size = ref zero
477 MlUnix.set_signal
Sys.sigint
478 (Sys.Signal_handle
(fun _ -> lprintf_nl
"Received SIGINT, stopping mld_hash...";
481 MlUnix.set_signal
Sys.sigterm
482 (Sys.Signal_handle
(fun _ -> lprintf_nl
"Received SIGTERM, stopping mld_hash...";
486 "-diff_chunk", Arg2.Array
(4, diff_chunk),
487 "<filename1> <filename2> <begin_pos> <end_pos> : compute diff between the two files";
488 "-hash", Arg2.String
( (:=) hash), _s " <hash> : Set hash type you want to compute (ed2k, aich, sha1, sig2dat, bp)";
489 "-sha1", Arg2.String
(fun size
->
491 chunk_size := Int64.of_string size
;
492 ), " <chunk_size> : Set hash type to sha1 and chunk_size to <chunk_size>";
493 "-partial", Arg2.Unit
(fun _ -> partial := true), _s ": enable display of partial hash values";
494 "-partial_zone", Arg2.Unit
(fun _ -> partial_zone := true), _s ": enable display of zone AICH hash values";
495 "-check_keep", Arg2.Unit
(fun _ -> keep_file_after_check := true), _s ": keep files after checking functions";
496 "-check", Arg2.Int64
check_external_functions, _s " <size of testfile in bytes>: check C file functions";
499 | "ed2k" | "edk" -> ed2k_hash_filename filename
500 | "aich" -> aich_hash_filename filename
501 | "emule" -> ed2k_hash_filename filename; aich_hash_filename filename
502 | "sha1" -> sha1_hash_filename !chunk_size filename
503 | "sig2dat" -> sig2dat_hash_filename filename
504 | "bp" -> bitprint_filename filename
506 ed2k_hash_filename filename;
507 aich_hash_filename filename;
508 sig2dat_hash_filename filename;
509 bitprint_filename filename
510 ) (_s " <filenames> : compute hashes of filenames");