5 let _s x
= _s "Ip_set" x
6 let _b x
= _b "Ip_set" x
8 (* prints a new logline with date, module and starts newline *)
9 let log_prefix = "[IPblock]"
12 lprintf_nl2
log_prefix fmt
14 (* prints a new logline with date, module and does not start newline *)
16 lprintf2
log_prefix fmt
18 module H
= Weak.Make
(struct
20 let hash s
= Hashtbl.hash s
24 let descriptions = H.create
13
26 let shared_description s
=
27 (* Currently trims strings left and right;
28 feel free to add other heuristics: convert to lowercase,
29 remove punctuation, remove duplicate spaces,... *)
31 let len = String.length s
in
34 while (!b < !e && s
.[!b] = ' '
) do incr
b done;
36 while (s
.[!e - 1] = ' '
) do decr
e done;
37 if !b = 0 && !e = len then s
38 else String.sub s
!b (!e - !b)
40 H.merge
descriptions (canonize s
)
42 let unknown_description = shared_description "Unknown";
44 (* range name, ip min, ip max (inclusive) *)
45 type blocking_range
= {
46 blocking_description
: string;
53 short_begin_high
: int; (* 31..3 bits of IP address *)
54 sort_end_high
: int; (* 31..3 bits of IP address *)
55 mutable short_low_hits
: int; (* 0..2 -> 0..2 bits of start address, 3..5 -> 0..2 bits of end address, 6..30 -> 0..24 bits of counter *)
56 short_description
: string;
60 short_description
= unknown_description;
65 let store_blocking_descriptions = ref true
67 module Array2
= struct
70 mutable prev_len
: int;
75 if n
< a
.len then a
.a
.(n
) <- x
76 else invalid_arg
"Array2.set"
79 if n
< a
.len then a
.a
.(n
)
80 else invalid_arg
"Array2.get"
83 if a
.len < Array.length a
.a
then begin
87 let new_len = a
.len + a
.prev_len
89 (* lprintf_nl (_b "append resize[0] %d to %d") a.len new_len; *)
91 let t = Array.init
new_len (fun n
-> if n
< a
.len then a
.a
.(n
) else b) in
98 Array.iteri
(fun n
b -> if n
< a
.len then f
b) a
.a
100 let empty x
= {len= 0 ; prev_len
=1; a
= Array.make
0 x
}
102 let make n x
= {len = n
; prev_len
= n
+1; a
= Array.make (n
+2) x
}
104 let init n f
= {len = n
; prev_len
= n
+1; a
= Array.init n f
}
106 let copy a
= {len = a
.len; prev_len
= a
.prev_len
; a
= Array.copy a
.a
}
109 if Array.length a
.a
!= a
.len then a
.a
<- Array.init a
.len (Array.get a
.a
);
120 Instead of storing array of records, keep record of arrays.
121 This way it eleminates need of boxing, so each entry occupies 3 words (if no description needed, the desc array isn't allocated).
122 Also on lookup only first array is accesed, unless hit is detected (better cache locality) and when hit detected,
123 dara updated only in single array.
125 compact_begin_high
: int array
;
126 compact_end_high
: int array
;
127 compact_low_hits
: int array
;
128 compact_desc
: string array
;
131 (* either simple array or compact (non boxed) representation *)
133 Short
of short_entry
Array2.t
137 let bl_empty = Short
(Array2.make 0 dummy_range)
139 let bl_length bl
= match bl
with
140 Short a
-> Array2.length a
141 | Compact a
-> Array.length a
.compact_begin_high
143 let ip_split_hi ip
= (* msb 30 bits *)
144 ((Ip.get_hi16 ip
) lsl 14) lor ((Ip.get_lo16 ip
) lsr 2)
146 let ip_split_lo ip
= (* lsb 2 bits *)
147 (Ip.get_lo16 ip
) land 0x3
149 let ip_combine hi lo
=
150 Ip.of_ints
( (hi
lsr 22), ((hi
lsr 14) land 0xFF), ((hi
lsr 6) land 0xFF), (((hi
land 0x3F) lsl 2) lor lo
))
152 let init_low_bits begin_lo end_lo hits
=
153 begin_lo
lor (end_lo
lsl 2) lor (hits
lsl 4)
155 let begin_low_bits n
=
165 let count = (get_hits n
) + 1 in
166 (n
land 0xF) lor (count lsl 4)
168 let make_br ip_begin ip_end desc
=
169 let begin_hi = ip_split_hi ip_begin
170 and begin_lo
= ip_split_lo ip_begin
171 and end_hi
= ip_split_hi ip_end
172 and end_lo
= ip_split_lo ip_end
in
173 {short_begin_high
=begin_hi;
174 sort_end_high
=end_hi
;
175 short_low_hits
= init_low_bits begin_lo end_lo
0;
176 short_description
=desc
}
179 let compare_split a_hi a_low b_hi b_low
=
180 let hicompare = Pervasives.compare a_hi b_hi
in
181 if hicompare <> 0 then
184 Pervasives.compare a_low b_low
186 (* increment and then compare *)
187 let compare_split_next a_hi a_low b_hi b_low
=
188 if b_low
< 3 then compare_split a_hi a_low b_hi
(b_low
+ 1)
189 else compare_split a_hi a_low
(b_hi
+1) 0
193 let match_ip_aux bl ip
=
194 let ip_hi = ip_split_hi ip
195 and ip_lo
= ip_split_lo ip
in
198 let rec short_march_aux a
ip_hi ip_lo n
=
199 if n
< Array2.length a
then
200 let br = Array2.get a n
in
201 if (compare_split ip_hi ip_lo
br.short_begin_high
(begin_low_bits br.short_low_hits
) >= 0) &&
202 (compare_split ip_hi ip_lo
br.sort_end_high
(end_low_bits br.short_low_hits
) <= 0) then begin
203 br.short_low_hits
<- succ_hits br.short_low_hits
;
205 end else short_march_aux a
ip_hi ip_lo
(n
+1)
207 short_march_aux a
ip_hi ip_lo
0
209 let compare_begin a
ip_hi ip_lo n
=
210 let cmp_hi = Pervasives.compare
ip_hi (Array.get a
.compact_begin_high n
) in
211 if cmp_hi <> 0 then cmp_hi
212 else Pervasives.compare ip_lo
(begin_low_bits (Array.get a
.compact_low_hits n
))
213 and compare_end a
ip_hi ip_lo n
=
214 let cmp_hi = Pervasives.compare
ip_hi (Array.get a
.compact_end_high n
) in
215 if cmp_hi <> 0 then cmp_hi
216 else Pervasives.compare ip_lo
(end_low_bits (Array.get a
.compact_low_hits n
))
218 Array.set a
.compact_low_hits n
(succ_hits (Array.get a
.compact_low_hits n
));
221 let rec binary_search_aux a
ip_hi ip_lo lo hi
=
223 let n = (lo
+ hi
) / 2 in
224 let cmp = compare_begin a
ip_hi ip_lo
n in
226 binary_search_aux a
ip_hi ip_lo lo
(n-1)
228 binary_search_aux a
ip_hi ip_lo
(n+1) hi
231 (* Printf.printf "%d %d\n" lo hi; *)
232 if hi
>= 0 && hi
< Array.length a
.compact_begin_high
then begin
233 let cmp = compare_begin a
ip_hi ip_lo hi
in
235 if (compare_end a
ip_hi ip_lo hi
) <= 0 then mark_entry a hi
237 else if cmp < 0 then -1
241 binary_search_aux a
ip_hi ip_lo
0 ((Array.length a
.compact_begin_high
) - 1)
243 let make_range desc begin_high end_high low_hits
= {
244 blocking_description
= desc
;
245 blocking_begin
= ip_combine begin_high
(begin_low_bits low_hits
);
246 blocking_end
= ip_combine end_high
(end_low_bits low_hits
);
247 blocking_hits
= get_hits low_hits
; }
249 let compact_get_desc a
n =
250 if Array.length a
.compact_desc
> n then Array.get a
.compact_desc
n else unknown_description
252 let match_blocking_range bl ip
=
253 let n = match_ip_aux bl ip
in
254 if n >= 0 then Some
(
257 let br = Array2.get a
n in
258 make_range br.short_description
br.short_begin_high
br.sort_end_high
br.short_low_hits
260 make_range (compact_get_desc a
n) (Array.get a
.compact_begin_high
n) (Array.get a
.compact_end_high
n) (Array.get a
.compact_low_hits
n))
268 match_ip_aux bl ip
>= 0
270 let append_range bl
br =
280 let b = Array2.copy a
in
283 let b = Array2.init (Array.length c
.compact_begin_high
)
286 short_begin_high
= Array.get c
.compact_begin_high
n;
287 sort_end_high
= Array.get c
.compact_end_high
n;
288 short_low_hits
= Array.get c
.compact_low_hits
n;
289 short_description
= if Array.length c
.compact_desc
> n then Array.get c
.compact_desc
n else unknown_description
293 let add_range bl ip_begin ip_end desc
=
294 let bl = copy_range bl
295 and br = make_br ip_begin ip_end desc
in
298 let br_compare r1 r2
=
299 let cmp_begin = compare_split
300 r1
.short_begin_high
(begin_low_bits r1
.short_low_hits
)
301 r2
.short_begin_high
(begin_low_bits r2
.short_low_hits
) in
302 if cmp_begin <> 0 then
306 r1
.sort_end_high
(end_low_bits r1
.short_low_hits
)
307 r2
.sort_end_high
(end_low_bits r2
.short_low_hits
)
312 (* lprintf_nl (_b "sort %d") (Array2.length a); *)
313 Array2.sort br_compare a
;
314 let rec bl_optimize_aux a rd wr last_hi last_lo
=
315 if rd
< Array2.length a
then
316 let br = Array2.get a rd
318 if (compare_split br.sort_end_high
(end_low_bits br.short_low_hits
) last_hi last_lo
) > 0 then begin (* new record is going further then last *)
319 if wr
>= 0 && (compare_split_next br.short_begin_high
(begin_low_bits br.short_low_hits
) last_hi last_lo
) <= 0 then begin (* but it starts inside previous block, so concatenate them *)
320 let last_hi = br.sort_end_high
321 and last_lo
= (end_low_bits br.short_low_hits
) in
322 let prev = Array2.get a wr
in
323 Array2.set a wr
{short_begin_high
= prev.short_begin_high
;
324 sort_end_high
= last_hi;
325 short_low_hits
= init_low_bits (begin_low_bits prev.short_low_hits
) last_lo
326 ((get_hits prev.short_low_hits
) + (get_hits br.short_low_hits
));
327 short_description
= prev.short_description
};
328 bl_optimize_aux a rd wr
last_hi last_lo
329 end else begin (* there is nothing to optimize *)
332 bl_optimize_aux a rd
wr br.sort_end_high
(end_low_bits br.short_low_hits
)
334 end else (* just ignore current record *)
335 bl_optimize_aux a rd
wr last_hi last_lo
338 let len = bl_optimize_aux a
0 (-1) 0 0 in
339 (* lprintf_nl (_b "copy %d") (Array2.length a); *)
341 compact_begin_high
= Array.init len (fun n -> (Array2.get a
n).short_begin_high
);
342 compact_end_high
= Array.init len (fun n -> (Array2.get a
n).sort_end_high
);
343 compact_low_hits
= Array.init len (fun n -> (Array2.get a
n).short_low_hits
);
345 if !store_blocking_descriptions then
346 Array.init len (fun n -> (Array2.get a
n).short_description
)
347 else Array.make 0 unknown_description
353 let load_merge bl filename remove
=
354 let guardian_regexp = Str.regexp
"^\\(.*\\): *\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)-\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)" in
355 let ipfilter_regexp = Str.regexp
"^\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\) *- *\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\) *, *[0-9]+ *, *\\(.*\\)$" in
357 let bl = copy_range bl in
358 let nranges = ref 0 in
359 let error = ref false in
360 Unix2.tryopen_read filename
(fun cin
->
361 let nlines = ref 0 in
362 let append line ip_begin ip_end desc
=
363 let ip_begin = Ip.of_string
(Str.matched_group
ip_begin line
)
364 and ip_end
= Ip.of_string
(Str.matched_group ip_end line
)
366 if !store_blocking_descriptions then
367 shared_description (Str.matched_group desc line
)
369 unknown_description in
370 append_range bl (make_br ip_begin ip_end desc
);
374 let line = input_line cin
in
377 if Str.string_match
ipfilter_regexp line 0 then
379 else if Str.string_match
guardian_regexp line 0 then
386 lprintf_n "Syntax error while loading IP blocklist in line";
389 lprintf
" %d" !nlines;
391 with End_of_file
-> ());
392 if !error then lprint_newline
();
393 if remove
then (try Sys.remove filename
with _
-> ());
394 let optimized_bl = bl_optimize bl in
395 lprintf_nl (_b "%d ranges loaded - optimized to %d") !nranges (bl_length optimized_bl);
399 lprintf_nl (_b "loading %s") filename
;
400 if Sys.file_exists filename
then
401 let last_ext = String.lowercase
(Filename2.last_extension filename
) in
402 if last_ext = ".zip" then
404 Unix2.tryopen_read_zip filename
(fun ic
->
406 List.map
(fun e -> e.Zip.filename
) (Zip.entries ic
)
409 Unix2.tryopen_read_zip filename
(fun ic
->
411 let rec find_in_zip l
=
413 | [] -> raise Not_found
416 let file = Zip.find_entry ic h
in
417 lprintf_nl (_b "%s found in zip file") h
;
418 ignore
(Misc.archive_extract filename
"zip");
419 let bl = load_merge bl_empty file.Zip.filename
true in
420 if bl_length bl = 0 then
426 find_in_zip filenames_list
428 lprintf_nl "Exception %s while extracting %s from %s"
429 (Printexc2.to_string
e)
430 (String.concat
"/" filenames_list)
432 lprintf_nl "One of the mentioned files has to be a valid IP blocklist";
435 lprintf_nl "Exception %s while opening %s"
436 (Printexc2.to_string
e)
440 let ext = String.lowercase
(Filename2.extension filename
) in
442 | ".bz2" | ".p2p.bz2" | ".dat.bz2"
443 | ".gz" | ".p2p.gz" | ".dat.gz" ->
445 if String2.check_suffix
ext ".bz2" then "bz2" else "gz" in
447 let s = Misc.archive_extract filename
filetype in
448 load_merge bl_empty s true
450 lprintf_nl "Exception %s while extracting from %s"
451 (Printexc2.to_string
e) filename
;
453 | ".tar.bz2" | ".p2p.tar.bz2" | ".dat.tar.bz2"
454 | ".tar.gz" | ".p2p.tar.gz" | ".dat.tar.gz" ->
455 lprintf_nl "tar files are not (yet) supported, please untar %s" filename
;
457 | _
-> load_merge bl_empty filename
false
460 lprintf_nl (_b "file %s not found") filename
;
465 let bl = copy_range bl_empty in
469 | Ip.RangeSingleIp ip
->
470 make_br ip ip
unknown_description
471 | Ip.RangeRange
(ip1
, ip2
) ->
472 make_br ip1 ip2
unknown_description
473 | Ip.RangeCIDR
(ip
, shift
) ->
474 let mask = Ip.mask_of_shift shift
in
475 make_br (Ip.network_address ip
mask) (Ip.broadcast_address ip
mask) unknown_description
477 append_range bl range
481 let bl_fold_left f a
bl =
483 let f desc begin_high end_high low_hits
=
484 a'
:= f !a'
(make_range desc begin_high end_high low_hits
) in
488 Array2.iter (fun br ->
489 if get_hits br.short_low_hits
> 0 then
490 f br.short_description
br.short_begin_high
br.sort_end_high
br.short_low_hits
;
493 Array.iteri
(fun n low_hits
->
494 if get_hits low_hits
> 0 then
495 f (compact_get_desc a n)
496 (Array.get a.compact_begin_high
n) (Array.get a.compact_end_high
n) low_hits
497 ) a.compact_low_hits
);
501 let print_list buf
bl =
502 let print_entry () br =
503 Printf.bprintf buf
"%s (%d hits): %s - %s\n"
504 br.blocking_description
506 (Ip.to_string
br.blocking_begin
)
507 (Ip.to_string
br.blocking_end
) in
508 bl_fold_left print_entry () bl;
509 let nranges = bl_length bl in
510 Printf.bprintf buf
"%d ranges\n" nranges
520 let ips = Array.init n (fun _ -> Ip.of_ints (Random.int 256, Random.int 256, Random.int 256, Random.int 256))
523 let blocked = ref 0 in
530 Printf2.lprintf "Blocked: %d\n" !blocked
533 latency1 10 check_ips ips;
537 Heap.add_memstat
"Ip_set" (fun level buf
->
538 let counter = ref 0 in
539 H.iter (fun _ -> incr
counter) descriptions;
540 Printf.bprintf buf
" descriptions: %d\n" !counter)