fix some "deprecated" warnings
[mldonkey.git] / src / utils / net / ip_set.ml
blob600b91d10273b6a8b4de45727e40384a191bd771
2 open Printf2
3 open Gettext
5 let _s x = _s "Ip_set" x
6 let _b x = _b "Ip_set" x
8 (* prints a new logline with date, module and starts newline *)
9 let log_prefix = "[IPblock]"
11 let lprintf_nl fmt =
12 lprintf_nl2 log_prefix fmt
14 (* prints a new logline with date, module and does not start newline *)
15 let lprintf_n fmt =
16 lprintf2 log_prefix fmt
18 module H = Weak.Make(struct
19 type t = string
20 let hash s = Hashtbl.hash s
21 let equal x y = x = y
22 end)
24 let descriptions = H.create 13
26 let shared_description s =
27 (* Currently trims strings left and right;
28 feel free to add other heuristics: convert to lowercase,
29 remove punctuation, remove duplicate spaces,... *)
30 let canonize s =
31 let len = String.length s in
32 let b = ref 0 in
33 let e = ref len in
34 while (!b < !e && s.[!b] = ' ') do incr b done;
35 if !b < !e then
36 while (s.[!e - 1] = ' ') do decr e done;
37 if !b = 0 && !e = len then s
38 else String.sub s !b (!e - !b)
40 H.merge descriptions (canonize s)
42 let unknown_description = shared_description "Unknown";
44 (* range name, ip min, ip max (inclusive) *)
45 type blocking_range = {
46 blocking_description: string;
47 blocking_begin: Ip.t;
48 blocking_end: Ip.t;
49 blocking_hits: int
52 type short_entry = {
53 short_begin_high: int; (* 31..3 bits of IP address *)
54 sort_end_high: int; (* 31..3 bits of IP address *)
55 mutable short_low_hits: int; (* 0..2 -> 0..2 bits of start address, 3..5 -> 0..2 bits of end address, 6..30 -> 0..24 bits of counter *)
56 short_description: string;
59 let dummy_range = {
60 short_description = unknown_description;
61 short_begin_high = 0;
62 sort_end_high = 0;
63 short_low_hits = 0 }
65 let store_blocking_descriptions = ref true
67 module Array2 = struct
68 type 'a t = {
69 mutable len: int;
70 mutable prev_len: int;
71 mutable a: 'a array
73 let length a = a.len
74 let set a n x =
75 if n < a.len then a.a.(n) <- x
76 else invalid_arg "Array2.set"
78 let get a n =
79 if n < a.len then a.a.(n)
80 else invalid_arg "Array2.get"
82 let rec append a x =
83 if a.len < Array.length a.a then begin
84 a.a.(a.len) <- x;
85 a.len <- a.len + 1;
86 end else begin
87 let new_len = a.len + a.prev_len
88 and b = a.a.(0) in
89 (* lprintf_nl (_b "append resize[0] %d to %d") a.len new_len; *)
91 let t = Array.init new_len (fun n -> if n < a.len then a.a.(n) else b) in
92 a.prev_len <- a.len;
93 a.a <- t;
94 append a x;
95 end
97 let iter f a =
98 Array.iteri (fun n b -> if n < a.len then f b) a.a
100 let empty x = {len= 0 ; prev_len=1; a = Array.make 0 x}
102 let make n x = {len = n ; prev_len = n+1; a = Array.make (n+2) x}
104 let init n f = {len = n ; prev_len = n+1; a = Array.init n f}
106 let copy a = {len = a.len; prev_len = a.prev_len; a = Array.copy a.a}
108 let sort cmp a =
109 if Array.length a.a != a.len then a.a <- Array.init a.len (Array.get a.a);
110 Array.sort cmp a.a
118 type compact = {
120 Instead of storing array of records, keep record of arrays.
121 This way it eleminates need of boxing, so each entry occupies 3 words (if no description needed, the desc array isn't allocated).
122 Also on lookup only first array is accesed, unless hit is detected (better cache locality) and when hit detected,
123 dara updated only in single array.
125 compact_begin_high: int array;
126 compact_end_high: int array;
127 compact_low_hits: int array;
128 compact_desc: string array;
131 (* either simple array or compact (non boxed) representation *)
132 type blocking_list =
133 Short of short_entry Array2.t
134 | Compact of compact
137 let bl_empty = Short (Array2.make 0 dummy_range)
139 let bl_length bl = match bl with
140 Short a -> Array2.length a
141 | Compact a -> Array.length a.compact_begin_high
143 let ip_split_hi ip = (* msb 30 bits *)
144 ((Ip.get_hi16 ip) lsl 14) lor ((Ip.get_lo16 ip) lsr 2)
146 let ip_split_lo ip = (* lsb 2 bits *)
147 (Ip.get_lo16 ip) land 0x3
149 let ip_combine hi lo =
150 Ip.of_ints ( (hi lsr 22), ((hi lsr 14) land 0xFF), ((hi lsr 6) land 0xFF), (((hi land 0x3F) lsl 2) lor lo))
152 let init_low_bits begin_lo end_lo hits =
153 begin_lo lor (end_lo lsl 2) lor (hits lsl 4)
155 let begin_low_bits n =
156 n land 0x3
158 let end_low_bits n =
159 (n lsr 2) land 0x3
161 let get_hits n =
162 n lsr 4
164 let succ_hits n =
165 let count = (get_hits n) + 1 in
166 (n land 0xF) lor (count lsl 4)
168 let make_br ip_begin ip_end desc =
169 let begin_hi = ip_split_hi ip_begin
170 and begin_lo = ip_split_lo ip_begin
171 and end_hi = ip_split_hi ip_end
172 and end_lo = ip_split_lo ip_end in
173 {short_begin_high=begin_hi;
174 sort_end_high=end_hi;
175 short_low_hits= init_low_bits begin_lo end_lo 0;
176 short_description=desc}
179 let compare_split a_hi a_low b_hi b_low =
180 let hicompare = Pervasives.compare a_hi b_hi in
181 if hicompare <> 0 then
182 hicompare
183 else
184 Pervasives.compare a_low b_low
186 (* increment and then compare *)
187 let compare_split_next a_hi a_low b_hi b_low =
188 if b_low < 3 then compare_split a_hi a_low b_hi (b_low + 1)
189 else compare_split a_hi a_low (b_hi+1) 0
193 let match_ip_aux bl ip =
194 let ip_hi = ip_split_hi ip
195 and ip_lo = ip_split_lo ip in
196 match bl with
197 Short a ->
198 let rec short_march_aux a ip_hi ip_lo n =
199 if n < Array2.length a then
200 let br = Array2.get a n in
201 if (compare_split ip_hi ip_lo br.short_begin_high (begin_low_bits br.short_low_hits) >= 0) &&
202 (compare_split ip_hi ip_lo br.sort_end_high (end_low_bits br.short_low_hits) <= 0) then begin
203 br.short_low_hits <- succ_hits br.short_low_hits;
205 end else short_march_aux a ip_hi ip_lo (n+1)
206 else -1 in
207 short_march_aux a ip_hi ip_lo 0
208 | Compact a ->
209 let compare_begin a ip_hi ip_lo n =
210 let cmp_hi = Pervasives.compare ip_hi (Array.get a.compact_begin_high n) in
211 if cmp_hi <> 0 then cmp_hi
212 else Pervasives.compare ip_lo (begin_low_bits (Array.get a.compact_low_hits n))
213 and compare_end a ip_hi ip_lo n =
214 let cmp_hi = Pervasives.compare ip_hi (Array.get a.compact_end_high n) in
215 if cmp_hi <> 0 then cmp_hi
216 else Pervasives.compare ip_lo (end_low_bits (Array.get a.compact_low_hits n))
217 and mark_entry a n =
218 Array.set a.compact_low_hits n (succ_hits (Array.get a.compact_low_hits n));
219 n in
221 let rec binary_search_aux a ip_hi ip_lo lo hi =
222 if lo <= hi then
223 let n = (lo + hi) / 2 in
224 let cmp = compare_begin a ip_hi ip_lo n in
225 if cmp < 0 then
226 binary_search_aux a ip_hi ip_lo lo (n-1)
227 else if cmp > 0 then
228 binary_search_aux a ip_hi ip_lo (n+1) hi
229 else mark_entry a n
230 else begin
231 (* Printf.printf "%d %d\n" lo hi; *)
232 if hi >= 0 && hi < Array.length a.compact_begin_high then begin
233 let cmp = compare_begin a ip_hi ip_lo hi in
234 if cmp > 0 then
235 if (compare_end a ip_hi ip_lo hi) <= 0 then mark_entry a hi
236 else -1
237 else if cmp < 0 then -1
238 else mark_entry a hi
239 end else -1
240 end in
241 binary_search_aux a ip_hi ip_lo 0 ((Array.length a.compact_begin_high) - 1)
243 let make_range desc begin_high end_high low_hits = {
244 blocking_description = desc;
245 blocking_begin = ip_combine begin_high (begin_low_bits low_hits);
246 blocking_end = ip_combine end_high (end_low_bits low_hits);
247 blocking_hits = get_hits low_hits; }
249 let compact_get_desc a n =
250 if Array.length a.compact_desc > n then Array.get a.compact_desc n else unknown_description
252 let match_blocking_range bl ip =
253 let n = match_ip_aux bl ip in
254 if n >= 0 then Some (
255 match bl with
256 Short a ->
257 let br = Array2.get a n in
258 make_range br.short_description br.short_begin_high br.sort_end_high br.short_low_hits
259 | Compact a ->
260 make_range (compact_get_desc a n) (Array.get a.compact_begin_high n) (Array.get a.compact_end_high n) (Array.get a.compact_low_hits n))
261 else None
267 let match_ip bl ip =
268 match_ip_aux bl ip >= 0
270 let append_range bl br =
271 match bl with
272 Short a ->
273 Array2.append a br
274 | Compact _ ->
275 assert(false)
277 let copy_range bl =
278 match bl with
279 Short a ->
280 let b = Array2.copy a in
281 Short b
282 | Compact c ->
283 let b = Array2.init (Array.length c.compact_begin_high)
284 (fun n ->
286 short_begin_high = Array.get c.compact_begin_high n;
287 sort_end_high = Array.get c.compact_end_high n;
288 short_low_hits = Array.get c.compact_low_hits n;
289 short_description = if Array.length c.compact_desc > n then Array.get c.compact_desc n else unknown_description
290 }) in
291 Short b
293 let add_range bl ip_begin ip_end desc =
294 let bl = copy_range bl
295 and br = make_br ip_begin ip_end desc in
296 append_range bl br
298 let br_compare r1 r2 =
299 let cmp_begin = compare_split
300 r1.short_begin_high (begin_low_bits r1.short_low_hits)
301 r2.short_begin_high (begin_low_bits r2.short_low_hits) in
302 if cmp_begin <> 0 then
303 cmp_begin
304 else
305 compare_split
306 r1.sort_end_high (end_low_bits r1.short_low_hits)
307 r2.sort_end_high (end_low_bits r2.short_low_hits)
309 let bl_optimize a =
310 match a with
311 Short a ->
312 (* lprintf_nl (_b "sort %d") (Array2.length a); *)
313 Array2.sort br_compare a;
314 let rec bl_optimize_aux a rd wr last_hi last_lo =
315 if rd < Array2.length a then
316 let br = Array2.get a rd
317 and rd = rd + 1 in
318 if (compare_split br.sort_end_high (end_low_bits br.short_low_hits) last_hi last_lo) > 0 then begin (* new record is going further then last *)
319 if wr >= 0 && (compare_split_next br.short_begin_high (begin_low_bits br.short_low_hits) last_hi last_lo) <= 0 then begin (* but it starts inside previous block, so concatenate them *)
320 let last_hi = br.sort_end_high
321 and last_lo = (end_low_bits br.short_low_hits) in
322 let prev = Array2.get a wr in
323 Array2.set a wr {short_begin_high = prev.short_begin_high;
324 sort_end_high = last_hi;
325 short_low_hits = init_low_bits (begin_low_bits prev.short_low_hits) last_lo
326 ((get_hits prev.short_low_hits) + (get_hits br.short_low_hits));
327 short_description = prev.short_description};
328 bl_optimize_aux a rd wr last_hi last_lo
329 end else begin (* there is nothing to optimize *)
330 let wr = wr+1 in
331 Array2.set a wr br;
332 bl_optimize_aux a rd wr br.sort_end_high (end_low_bits br.short_low_hits)
333 end
334 end else (* just ignore current record *)
335 bl_optimize_aux a rd wr last_hi last_lo
336 else
337 wr+1 in
338 let len = bl_optimize_aux a 0 (-1) 0 0 in
339 (* lprintf_nl (_b "copy %d") (Array2.length a); *)
340 Compact {
341 compact_begin_high = Array.init len (fun n -> (Array2.get a n).short_begin_high);
342 compact_end_high = Array.init len (fun n -> (Array2.get a n).sort_end_high);
343 compact_low_hits = Array.init len (fun n -> (Array2.get a n).short_low_hits);
344 compact_desc =
345 if !store_blocking_descriptions then
346 Array.init len (fun n -> (Array2.get a n).short_description)
347 else Array.make 0 unknown_description
349 | Compact _ -> a
353 let load_merge bl filename remove =
354 let guardian_regexp = Str.regexp "^\\(.*\\): *\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)-\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)" in
355 let ipfilter_regexp = Str.regexp "^\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\) *- *\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\) *, *[0-9]+ *, *\\(.*\\)$" in
357 let bl = copy_range bl in
358 let nranges = ref 0 in
359 let error = ref false in
360 Unix2.tryopen_read filename (fun cin ->
361 let nlines = ref 0 in
362 let append line ip_begin ip_end desc =
363 let ip_begin = Ip.of_string (Str.matched_group ip_begin line)
364 and ip_end = Ip.of_string (Str.matched_group ip_end line)
365 and desc =
366 if !store_blocking_descriptions then
367 shared_description (Str.matched_group desc line)
368 else
369 unknown_description in
370 append_range bl (make_br ip_begin ip_end desc);
371 incr nranges in
373 while true do
374 let line = input_line cin in
375 incr nlines;
377 if Str.string_match ipfilter_regexp line 0 then
378 append line 1 2 3
379 else if Str.string_match guardian_regexp line 0 then
380 append line 2 3 1
381 else
382 raise Not_found
383 with _ ->
384 if not !error then
385 begin
386 lprintf_n "Syntax error while loading IP blocklist in line";
387 error := true
388 end;
389 lprintf " %d" !nlines;
390 done
391 with End_of_file -> ());
392 if !error then lprint_newline ();
393 if remove then (try Sys.remove filename with _ -> ());
394 let optimized_bl = bl_optimize bl in
395 lprintf_nl (_b "%d ranges loaded - optimized to %d") !nranges (bl_length optimized_bl);
396 optimized_bl
398 let load filename =
399 lprintf_nl (_b "loading %s") filename;
400 if Sys.file_exists filename then
401 let last_ext = String.lowercase (Filename2.last_extension filename) in
402 if last_ext = ".zip" then
403 let filenames_list =
404 Unix2.tryopen_read_zip filename (fun ic ->
406 List.map (fun e -> e.Zip.filename) (Zip.entries ic)
407 with _ -> []) in
408 (try
409 Unix2.tryopen_read_zip filename (fun ic ->
411 let rec find_in_zip l =
412 match l with
413 | [] -> raise Not_found
414 | h :: q ->
416 let file = Zip.find_entry ic h in
417 lprintf_nl (_b "%s found in zip file") h;
418 ignore(Misc.archive_extract filename "zip");
419 let bl = load_merge bl_empty file.Zip.filename true in
420 if bl_length bl = 0 then
421 raise Not_found
422 else
424 with Not_found ->
425 find_in_zip q in
426 find_in_zip filenames_list
427 with e ->
428 lprintf_nl "Exception %s while extracting %s from %s"
429 (Printexc2.to_string e)
430 (String.concat "/" filenames_list)
431 filename;
432 lprintf_nl "One of the mentioned files has to be a valid IP blocklist";
433 bl_empty)
434 with e ->
435 lprintf_nl "Exception %s while opening %s"
436 (Printexc2.to_string e)
437 filename;
438 bl_empty)
439 else
440 let ext = String.lowercase (Filename2.extension filename) in
441 match ext with
442 | ".bz2" | ".p2p.bz2" | ".dat.bz2"
443 | ".gz" | ".p2p.gz" | ".dat.gz" ->
444 let filetype =
445 if String2.check_suffix ext ".bz2" then "bz2" else "gz" in
446 (try
447 let s = Misc.archive_extract filename filetype in
448 load_merge bl_empty s true
449 with e ->
450 lprintf_nl "Exception %s while extracting from %s"
451 (Printexc2.to_string e) filename;
452 bl_empty)
453 | ".tar.bz2" | ".p2p.tar.bz2" | ".dat.tar.bz2"
454 | ".tar.gz" | ".p2p.tar.gz" | ".dat.tar.gz" ->
455 lprintf_nl "tar files are not (yet) supported, please untar %s" filename;
456 bl_empty
457 | _ -> load_merge bl_empty filename false
458 else
459 begin
460 lprintf_nl (_b "file %s not found") filename;
461 bl_empty
464 let of_list l =
465 let bl = copy_range bl_empty in
466 List.iter (fun r ->
467 let range =
468 match r with
469 | Ip.RangeSingleIp ip ->
470 make_br ip ip unknown_description
471 | Ip.RangeRange (ip1, ip2) ->
472 make_br ip1 ip2 unknown_description
473 | Ip.RangeCIDR (ip, shift) ->
474 let mask = Ip.mask_of_shift shift in
475 make_br (Ip.network_address ip mask) (Ip.broadcast_address ip mask) unknown_description
477 append_range bl range
478 ) l;
479 bl_optimize bl
481 let bl_fold_left f a bl =
482 let a' = ref a in
483 let f desc begin_high end_high low_hits =
484 a' := f !a' (make_range desc begin_high end_high low_hits) in
486 (match bl with
487 Short a ->
488 Array2.iter (fun br ->
489 if get_hits br.short_low_hits > 0 then
490 f br.short_description br.short_begin_high br.sort_end_high br.short_low_hits;
492 | Compact a ->
493 Array.iteri (fun n low_hits ->
494 if get_hits low_hits > 0 then
495 f (compact_get_desc a n)
496 (Array.get a.compact_begin_high n) (Array.get a.compact_end_high n) low_hits
497 ) a.compact_low_hits);
501 let print_list buf bl =
502 let print_entry () br =
503 Printf.bprintf buf "%s (%d hits): %s - %s\n"
504 br.blocking_description
505 br.blocking_hits
506 (Ip.to_string br.blocking_begin)
507 (Ip.to_string br.blocking_end) in
508 bl_fold_left print_entry () bl;
509 let nranges = bl_length bl in
510 Printf.bprintf buf "%d ranges\n" nranges
516 open Benchmark
518 let n = 10000
520 let ips = Array.init n (fun _ -> Ip.of_ints (Random.int 256, Random.int 256, Random.int 256, Random.int 256))
522 let check_ips ips =
523 let blocked = ref 0 in
524 for i=0 to n-1 do
526 match_ip !bl ips.(i)
527 with MatchedIP s ->
528 incr blocked
529 done;
530 Printf2.lprintf "Blocked: %d\n" !blocked
532 let _ =
533 latency1 10 check_ips ips;
534 exit 0
536 let _ =
537 Heap.add_memstat "Ip_set" (fun level buf ->
538 let counter = ref 0 in
539 H.iter (fun _ -> incr counter) descriptions;
540 Printf.bprintf buf " descriptions: %d\n" !counter)