src/utils/lib/bitv.ml

   1 (*
   2  * Bit vectors.
   3  * Copyright (C) 1999 Jean-Christophe FILLIATRE
   4  *
   5  * This software is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Library General Public
   7  * License version 2, as published by the Free Software Foundation.
   8  *
   9  * This software is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12  *
  13  * See the GNU Library General Public License version 2 for more details
  14  * (enclosed in the file LGPL).
  15  *)
  16
  17 (*i $Id$ i*)
  18
  19 (*s Bit vectors. The interface and part of the code are borrowed from the
  20     [Array] module of the ocaml standard library (but things are simplified
  21     here since we can always initialize a bit vector). This module also
  22     provides bitwise operations. *)
  23
  24 (*s We represent a bit vector by a vector of integers (field [bits]),
  25     and we keep the information of the size of the bit vector since it
  26     can not be found out with the size of the array (field [length]). *)
  27
  28 type t = {
  29   length : int;
  30   bits   : int array }
  31
  32 let length v = v.length
  33
  34 (*s Each element of the array is an integer containing [bpi] bits, where
  35     [bpi] is determined according to the machine word size. Since we do not
  36     use the sign bit, [bpi] is 30 on a 32-bits machine and 62 on a 64-bits
  37     machines. We maintain the following invariant:
  38     {\em The unused bits of the last integer are always
  39     zeros.} This is ensured by [create] and maintained in other functions
  40     using [normalize]. [bit_j], [bit_not_j], [low_mask] and [up_mask]
  41     are arrays used to extract and mask bits in a single integer. *)
  42
  43 let bpi = Sys.word_size - 2
  44
  45 let max_length = Sys.max_array_length * bpi
  46
  47 let bit_j = Array.init bpi (fun j -> 1 lsl j)
  48 let bit_not_j = Array.init bpi (fun j -> max_int - bit_j.(j))
  49
  50 let low_mask = Array.make (succ bpi) 0
  51 let _ =
  52   for i = 1 to bpi do low_mask.(i) <- low_mask.(i-1) lor bit_j.(pred i) done
  53
  54 let keep_lowest_bits a j = a land low_mask.(j)
  55
  56 let high_mask = Array.init (succ bpi) (fun j -> low_mask.(j) lsl (bpi-j))
  57
  58 let keep_highest_bits a j = a land high_mask.(j)
  59
  60 (*s Creating and normalizing a bit vector is easy: it is just a matter of
  61     taking care of the invariant. Copy is immediate. *)
  62
  63 let create n b =
  64   let initv = if b then max_int else 0 in
  65   let r = n mod bpi in
  66   if r = 0 then
  67     { length = n; bits = Array.make (n / bpi) initv }
  68   else begin
  69     let s = n / bpi in
  70     let b = Array.make (succ s) initv in
  71     b.(s) <- b.(s) land low_mask.(r);
  72     { length = n; bits = b }
  73   end
  74
  75 let normalize v =
  76   let r = v.length mod bpi in
  77   if r > 0 then
  78     let b = v.bits in
  79     let s = Array.length b in
  80     b.(s-1) <- b.(s-1) land low_mask.(r)
  81
  82 let copy v = { length = v.length; bits = Array.copy v.bits }
  83
  84 (*s Access and assignment. The [n]th bit of a bit vector is the [j]th
  85     bit of the [i]th integer, where [i = n / bpi] and [j = n mod
  86     bpi]. Both [i] and [j] and computed by the function [pos].
  87     Accessing a bit is testing whether the result of the corresponding
  88     mask operation is non-zero, and assigning it is done with a
  89     bitwiwe operation: an {\em or} with [bit_j] to set it, and an {\em
  90     and} with [bit_not_j] to unset it. *)
  91
  92 let pos n =
  93   let i = n / bpi and j = n mod bpi in
  94   if j < 0 then (i - 1, j + bpi) else (i,j)
  95
  96 let unsafe_get v n =
  97   let (i,j) = pos n in
  98   ((Array.unsafe_get v.bits i) land (Array.unsafe_get bit_j j)) > 0
  99
 100 let unsafe_set v n b =
 101   let (i,j) = pos n in
 102   if b then
 103     Array.unsafe_set v.bits i
 104       ((Array.unsafe_get v.bits i) lor (Array.unsafe_get bit_j j))
 105   else
 106     Array.unsafe_set v.bits i
 107       ((Array.unsafe_get v.bits i) land (Array.unsafe_get bit_not_j j))
 108
 109 (*s The corresponding safe operations test the validiy of the access. *)
 110
 111 let get v n =
 112   if n < 0 || n >= v.length then invalid_arg "Bitv.get";
 113   let (i,j) = pos n in
 114   ((Array.unsafe_get v.bits i) land (Array.unsafe_get bit_j j)) > 0
 115
 116 let set v n b =
 117   if n < 0 || n >= v.length then invalid_arg "Bitv.set";
 118   let (i,j) = pos n in
 119   if b then
 120     Array.unsafe_set v.bits i
 121       ((Array.unsafe_get v.bits i) lor (Array.unsafe_get bit_j j))
 122   else
 123     Array.unsafe_set v.bits i
 124       ((Array.unsafe_get v.bits i) land (Array.unsafe_get bit_not_j j))
 125
 126 (*s [init] is implemented naively using [unsafe_set]. *)
 127
 128 let init n f =
 129   let v = create n false in
 130   for i = 0 to pred n do
 131     unsafe_set v i (f i)
 132   done;
 133   v
 134
 135 (*s Handling bits by packets is the key for efficiency of functions
 136     [append], [concat], [sub] and [blit].
 137     We start by a very general function [blit_bits a i m v n] which blits
 138     the bits [i] to [i+m-1] of a native integer [a]
 139     onto the bit vector [v] at index [n]. It assumes that [i..i+m-1] and
 140     [n..n+m-1] are respectively valid subparts of [a] and [v].
 141     It is optimized when the bits fit the lowest boundary of an integer
 142     (case [j == 0]). *)
 143
 144 let blit_bits a i m v n =
 145   let (i',j) = pos n in
 146   if j == 0 then
 147     Array.unsafe_set v i'
 148       ((keep_lowest_bits (a lsr i) m) lor
 149        (keep_highest_bits (Array.unsafe_get v i') (bpi - m)))
 150   else
 151     let d = m + j - bpi in
 152     if d > 0 then begin
 153       Array.unsafe_set v i'
 154         (((keep_lowest_bits (a lsr i) (bpi - j)) lsl j) lor
 155          (keep_lowest_bits (Array.unsafe_get v i') j));
 156       Array.unsafe_set v (succ i')
 157         ((keep_lowest_bits (a lsr (i + bpi - j)) d) lor
 158          (keep_highest_bits (Array.unsafe_get v (succ i')) (bpi - d)))
 159     end else
 160       Array.unsafe_set v i'
 161         (((keep_lowest_bits (a lsr i) m) lsl j) lor
 162          ((Array.unsafe_get v i') land (low_mask.(j) lor high_mask.(-d))))
 163
 164 (*s [blit_int] implements [blit_bits] in the particular case when
 165     [i=0] and [m=bpi] i.e. when we blit all the bits of [a]. *)
 166
 167 let blit_int a v n =
 168   let (i,j) = pos n in
 169   if j == 0 then
 170     Array.unsafe_set v i a
 171   else begin
 172     Array.unsafe_set v i
 173       ( (keep_lowest_bits (Array.unsafe_get v i) j) lor
 174        ((keep_lowest_bits a (bpi - j)) lsl j));
 175     Array.unsafe_set v (succ i)
 176       ((keep_highest_bits (Array.unsafe_get v (succ i)) (bpi - j)) lor
 177        (a lsr (bpi - j)))
 178   end
 179
 180 (*s When blitting a subpart of a bit vector into another bit vector, there
 181     are two possible cases: (1) all the bits are contained in a single integer
 182     of the first bit vector, and a single call to [blit_bits] is the
 183     only thing to do, or (2) the source bits overlap on several integers of
 184     the source array, and then we do a loop of [blit_int], with two calls
 185     to [blit_bits] for the two bounds. *)
 186
 187 let unsafe_blit v1 ofs1 v2 ofs2 len =
 188   let (bi,bj) = pos ofs1 in
 189   let (ei,ej) = pos (ofs1 + len - 1) in
 190   if bi == ei then
 191     blit_bits (Array.unsafe_get v1 bi) bj len v2 ofs2
 192   else begin
 193     blit_bits (Array.unsafe_get v1 bi) bj (bpi - bj) v2 ofs2;
 194     let n = ref (ofs2 + bpi - bj) in
 195     for i = succ bi to pred ei do
 196       blit_int (Array.unsafe_get v1 i) v2 !n;
 197       n := !n + bpi
 198     done;
 199     blit_bits (Array.unsafe_get v1 ei) 0 (succ ej) v2 !n
 200   end
 201
 202 let blit v1 ofs1 v2 ofs2 len =
 203   if len < 0 || ofs1 < 0 || ofs1 + len > v1.length
 204              || ofs2 < 0 || ofs2 + len > v2.length
 205   then invalid_arg "Bitv.blit";
 206   unsafe_blit v1.bits ofs1 v2.bits ofs2 len
 207
 208 (*s Extracting the subvector [ofs..ofs+len-1] of [v] is just creating a
 209     new vector of length [len] and blitting the subvector of [v] inside. *)
 210
 211 let sub v ofs len =
 212   if ofs < 0 || len < 0 || ofs + len > v.length then invalid_arg "Bitv.sub";
 213   let r = create len false in
 214   unsafe_blit v.bits ofs r.bits 0 len;
 215   r
 216
 217 (*s The concatenation of two bit vectors [v1] and [v2] is obtained by
 218     creating a vector for the result and blitting inside the two vectors.
 219     [v1] is copied directly. *)
 220
 221 let append v1 v2 =
 222   let l1 = v1.length
 223   and l2 = v2.length in
 224   let r = create (l1 + l2) false in
 225   let b1 = v1.bits in
 226   let b2 = v2.bits in
 227   let b = r.bits in
 228   for i = 0 to Array.length b1 - 1 do
 229     Array.unsafe_set b i (Array.unsafe_get b1 i)
 230   done;
 231   unsafe_blit b2 0 b l1 l2;
 232   r
 233
 234 (*s The concatenation of a list of bit vectors is obtained by iterating
 235     [unsafe_blit]. *)
 236
 237 let concat vl =
 238   let size = List.fold_left (fun sz v -> sz + v.length) 0 vl in
 239   let res = create size false in
 240   let b = res.bits in
 241   let pos = ref 0 in
 242   List.iter
 243     (fun v ->
 244        let n = v.length in
 245        unsafe_blit v.bits 0 b !pos n;
 246        pos := !pos + n)
 247     vl;
 248   res
 249
 250 (*s Filling is a particular case of blitting with a source made of all
 251     ones or all zeros. Thus we instanciate [unsafe_blit], with 0 and
 252     [max_int]. *)
 253
 254 let blit_zeros v ofs len =
 255   let (bi,bj) = pos ofs in
 256   let (ei,ej) = pos (ofs + len - 1) in
 257   if bi == ei then
 258     blit_bits 0 bj len v ofs
 259   else begin
 260     blit_bits 0 bj (bpi - bj) v ofs;
 261     let n = ref (ofs + bpi - bj) in
 262     for i = succ bi to pred ei do
 263       blit_int 0 v !n;
 264       n := !n + bpi
 265     done;
 266     blit_bits 0 0 (succ ej) v !n
 267   end
 268
 269 let blit_ones v ofs len =
 270   let (bi,bj) = pos ofs in
 271   let (ei,ej) = pos (ofs + len - 1) in
 272   if bi == ei then
 273     blit_bits max_int bj len v ofs
 274   else begin
 275     blit_bits max_int bj (bpi - bj) v ofs;
 276     let n = ref (ofs + bpi - bj) in
 277     for i = succ bi to pred ei do
 278       blit_int max_int v !n;
 279       n := !n + bpi
 280     done;
 281     blit_bits max_int 0 (succ ej) v !n
 282   end
 283
 284 let fill v ofs len b =
 285   if ofs < 0 || len < 0 || ofs + len > v.length then invalid_arg "Bitv.fill";
 286   if b then blit_ones v.bits ofs len else blit_zeros v.bits ofs len
 287
 288 (*s All the iterators are implemented as for traditional arrays, using
 289     [unsafe_get]. For [iter] and [map], we do not precompute [(f
 290     true)] and [(f false)] since [f] is likely to have
 291     side-effects. *)
 292
 293 let iter f v =
 294   for i = 0 to v.length - 1 do f (unsafe_get v i) done
 295
 296 let map f v =
 297   let l = v.length in
 298   let r = create l false in
 299   for i = 0 to l - 1 do
 300     unsafe_set r i (f (unsafe_get v i))
 301   done;
 302   r
 303
 304 let iteri f v =
 305   for i = 0 to v.length - 1 do f i (unsafe_get v i) done
 306
 307 let mapi f v =
 308   let l = v.length in
 309   let r = create l false in
 310   for i = 0 to l - 1 do
 311     unsafe_set r i (f i (unsafe_get v i))
 312   done;
 313   r
 314
 315 let fold_left f x v =
 316   let r = ref x in
 317   for i = 0 to v.length - 1 do
 318     r := f !r (unsafe_get v i)
 319   done;
 320   !r
 321
 322 let fold_right f v x =
 323   let r = ref x in
 324   for i = v.length - 1 downto 0 do
 325     r := f (unsafe_get v i) !r
 326   done;
 327   !r
 328
 329 let foldi_left f x v =
 330   let r = ref x in
 331   for i = 0 to v.length - 1 do
 332     r := f !r i (unsafe_get v i)
 333   done;
 334   !r
 335
 336 let foldi_right f v x =
 337   let r = ref x in
 338   for i = v.length - 1 downto 0 do
 339     r := f i (unsafe_get v i) !r
 340   done;
 341   !r
 342
 343 let iteri_true f v =
 344   Array.iteri
 345     (fun i n -> if n != 0 then begin
 346        let i_bpi = i * bpi in
 347        for j = 0 to bpi - 1 do
 348          if n land (Array.unsafe_get bit_j j) > 0 then f (i_bpi + j)
 349        done
 350      end)
 351     v.bits
 352
 353 (*s Bitwise operations. It is straigthforward, since bitwise operations
 354     can be realized by the corresponding bitwise operations over integers.
 355     However, one has to take care of normalizing the result of [bwnot]
 356     which introduces ones in highest significant positions. *)
 357
 358 let bw_and v1 v2 =
 359   let l = v1.length in
 360   if l <> v2.length then invalid_arg "Bitv.bw_and";
 361   let b1 = v1.bits
 362   and b2 = v2.bits in
 363   let n = Array.length b1 in
 364   let a = Array.make n 0 in
 365   for i = 0 to n - 1 do
 366     a.(i) <- b1.(i) land b2.(i)
 367   done;
 368   { length = l; bits = a }
 369
 370 let bw_or v1 v2 =
 371   let l = v1.length in
 372   if l <> v2.length then invalid_arg "Bitv.bw_or";
 373   let b1 = v1.bits
 374   and b2 = v2.bits in
 375   let n = Array.length b1 in
 376   let a = Array.make n 0 in
 377   for i = 0 to n - 1 do
 378     a.(i) <- b1.(i) lor b2.(i)
 379   done;
 380   { length = l; bits = a }
 381
 382 let bw_xor v1 v2 =
 383   let l = v1.length in
 384   if l <> v2.length then invalid_arg "Bitv.bw_xor";
 385   let b1 = v1.bits
 386   and b2 = v2.bits in
 387   let n = Array.length b1 in
 388   let a = Array.make n 0 in
 389   for i = 0 to n - 1 do
 390     a.(i) <- b1.(i) lxor b2.(i)
 391   done;
 392   { length = l; bits = a }
 393
 394 let bw_not v =
 395   let b = v.bits in
 396   let n = Array.length b in
 397   let a = Array.make n 0 in
 398   for i = 0 to n - 1 do
 399     a.(i) <- max_int land (lnot b.(i))
 400   done;
 401   let r = { length = v.length; bits = a } in
 402   normalize r;
 403   r
 404
 405 (*s Shift operations. It is easy to reuse [unsafe_blit], although it is
 406     probably slightly less efficient than a ad-hoc piece of code. *)
 407
 408 let rec shiftl v d =
 409   if d == 0 then
 410     copy v
 411   else if d < 0 then
 412     shiftr v (-d)
 413   else begin
 414     let n = v.length in
 415     let r = create n false in
 416     if d < n then unsafe_blit v.bits 0 r.bits d (n - d);
 417     r
 418   end
 419
 420 and shiftr v d =
 421   if d == 0 then
 422     copy v
 423   else if d < 0 then
 424     shiftl v (-d)
 425   else begin
 426     let n = v.length in
 427     let r = create n false in
 428     if d < n then unsafe_blit v.bits d r.bits 0 (n - d);
 429     r
 430   end
 431
 432 (*s Testing for all zeros and all ones. *)
 433
 434 let all_zeros v =
 435   let b = v.bits in
 436   let n = Array.length b in
 437   let rec test i =
 438     (i == n) || ((Array.unsafe_get b i == 0) && test (succ i))
 439   in
 440   test 0
 441
 442 let all_ones v =
 443   let b = v.bits in
 444   let n = Array.length b in
 445   let rec test i =
 446     if i == n - 1 then
 447       let m = v.length mod bpi in
 448       (Array.unsafe_get b i) == (if m == 0 then max_int else low_mask.(m))
 449     else
 450       ((Array.unsafe_get b i) == max_int) && test (succ i)
 451   in
 452   test 0
 453
 454 (*s Conversions to and from strings. *)
 455
 456 let to_string v =
 457   let n = v.length in
 458   let s = String.make n '0' in
 459   for i = 0 to n - 1 do
 460     if unsafe_get v i then s.[i] <- '1'
 461   done;
 462   s
 463
 464 let print fmt v = Format.pp_print_string fmt (to_string v)
 465
 466 let of_string s =
 467   let n = String.length s in
 468   let v = create n false in
 469   for i = 0 to n - 1 do
 470     let c = String.unsafe_get s i in
 471     if c = '1' then
 472       unsafe_set v i true
 473     else
 474       if c <> '0' then invalid_arg "Bitv.of_string"
 475   done;
 476   v
 477
 478 (*s Iteration on all bit vectors of length [n] using a Gray code. *)
 479
 480 let first_set v n =
 481   let rec lookup i =
 482     if i = n then raise Not_found ;
 483     if unsafe_get v i then i else lookup (i + 1)
 484   in
 485   lookup 0
 486
 487 let gray_iter f n =
 488   let bv = create n false in
 489   let rec iter () =
 490     f bv;
 491     unsafe_set bv 0 (not (unsafe_get bv 0));
 492     f bv;
 493     let pos = succ (first_set bv n) in
 494     if pos < n then begin
 495       unsafe_set bv pos (not (unsafe_get bv pos));
 496       iter ()
 497     end
 498   in
 499   if n > 0 then iter ()
 500
 501
 502 (*s Coercions to/from lists of integers *)
 503
 504 let of_list l =
 505   let n = List.fold_left max 0 l in
 506   let b = create (succ n) false in
 507   let add_element i =
 508     (* negative numbers are invalid *)
 509     if i < 0 then invalid_arg "Bitv.of_list";
 510     unsafe_set b i true
 511   in
 512   List.iter add_element l;
 513   b
 514
 515 let of_list_with_length l len =
 516   let b = create len false in
 517   let add_element i =
 518     if i < 0 || i >= len then invalid_arg "Bitv.of_list_with_length";
 519     unsafe_set b i true
 520   in
 521   List.iter add_element l;
 522   b
 523
 524 let to_list b =
 525   let n = length b in
 526   let rec make i acc =
 527     if i < 0 then acc
 528     else make (pred i) (if unsafe_get b i then i :: acc else acc)
 529   in
 530   make (pred n) []
 531
 532
 533 (*s To/from integers. *)
 534
 535 (* [int] *)
 536 let of_int_us i =
 537   { length = bpi; bits = [| i land max_int |] }
 538 let to_int_us v =
 539   if v.length < bpi then invalid_arg "Bitv.to_int_us";
 540   v.bits.(0)
 541
 542 let of_int_s i =
 543   { length = succ bpi; bits = [| i land max_int; (i lsr bpi) land 1 |] }
 544 let to_int_s v =
 545   if v.length < succ bpi then invalid_arg "Bitv.to_int_s";
 546   v.bits.(0) lor (v.bits.(1) lsl bpi)
 547
 548 (* [Int32] *)
 549 let of_int32_us i = match Sys.word_size with
 550   | 32 -> { length = 31;
 551             bits = [| (Int32.to_int i) land max_int;
 552                       let hi = Int32.shift_right_logical i 30 in
 553                       (Int32.to_int hi) land 1 |] }
 554   | 64 -> { length = 31; bits = [| (Int32.to_int i) land 0x7fffffff |] }
 555   | _ -> assert false
 556 let to_int32_us v =
 557   if v.length < 31 then invalid_arg "Bitv.to_int32_us";
 558   match Sys.word_size with
 559     | 32 ->
 560         Int32.logor (Int32.of_int v.bits.(0))
 561                     (Int32.shift_left (Int32.of_int (v.bits.(1) land 1)) 30)
 562     | 64 ->
 563         Int32.of_int (v.bits.(0) land 0x7fffffff)
 564     | _ -> assert false
 565
 566 (* this is 0xffffffff (ocaml >= 3.08 checks for literal overflow) *)
 567 let ffffffff = (0xffff lsl 16) lor 0xffff
 568
 569 let of_int32_s i = match Sys.word_size with
 570   | 32 -> { length = 32;
 571             bits = [| (Int32.to_int i) land max_int;
 572                       let hi = Int32.shift_right_logical i 30 in
 573                       (Int32.to_int hi) land 3 |] }
 574   | 64 -> { length = 32; bits = [| (Int32.to_int i) land ffffffff |] }
 575   | _ -> assert false
 576 let to_int32_s v =
 577   if v.length < 32 then invalid_arg "Bitv.to_int32_s";
 578   match Sys.word_size with
 579     | 32 ->
 580         Int32.logor (Int32.of_int v.bits.(0))
 581                     (Int32.shift_left (Int32.of_int (v.bits.(1) land 3)) 30)
 582     | 64 ->
 583         Int32.of_int (v.bits.(0) land ffffffff)
 584     | _ -> assert false
 585
 586 (* [Int64] *)
 587 let of_int64_us i = match Sys.word_size with
 588   | 32 -> { length = 63;
 589             bits = [| (Int64.to_int i) land max_int;
 590                       (let mi = Int64.shift_right_logical i 30 in
 591                        (Int64.to_int mi) land max_int);
 592                       let hi = Int64.shift_right_logical i 60 in
 593                       (Int64.to_int hi) land 1 |] }
 594   | 64 -> { length = 63;
 595             bits = [| (Int64.to_int i) land max_int;
 596                       let hi = Int64.shift_right_logical i 62 in
 597                       (Int64.to_int hi) land 1 |] }
 598   | _ -> assert false
 599 let to_int64_us v = failwith "todo"
 600
 601 let of_int64_s i = failwith "todo"
 602 let to_int64_s v = failwith "todo"
 603
 604 (* [Nativeint] *)
 605 let select_of f32 f64 = match Sys.word_size with
 606   | 32 -> (fun i -> f32 (Nativeint.to_int32 i))
 607   | 64 -> (fun i -> f64 (Int64.of_nativeint i))
 608   | _ -> assert false
 609 let of_nativeint_s = select_of of_int32_s of_int64_s
 610 let of_nativeint_us = select_of of_int32_us of_int64_us
 611 let select_to f32 f64 = match Sys.word_size with
 612   | 32 -> (fun i -> Nativeint.of_int32 (f32 i))
 613   | 64 -> (fun i -> Int64.to_nativeint (f64 i))
 614   | _ -> assert false
 615 let to_nativeint_s = select_to to_int32_s to_int64_s
 616 let to_nativeint_us = select_to to_int32_us to_int64_us
 617
 618