tests/integerdiv.pure.lisp

   1 ;;; Assert that all odd divisors less than 32769 have an optimized
   2 ;;; remainder coefficient using 32-bit operations.
   3 ;;; Symbol hashsets with fewer than that many cells can use fast remainder.
   4 ;;; (I could/should implement the 64-bit algorithm I suppose)
   5 (with-test (:name :optimized-remainder-exists)
   6   (loop for divisor from 3 below 32769 by 2
   7         do
   8      (multiple-value-bind (mask c) (sb-impl::optimized-symtbl-remainder-params divisor)
   9        (assert (and (plusp c) (plusp mask))))))
  10
  11 #+interpreter (invoke-restart 'run-tests::skip-file)
  12
  13 (defvar *rs* (make-random-state t)) ; get a random random-state
  14
  15 #+x86-64
  16 (with-test (:name :udiv-magic)
  17   (dotimes (i 1000)
  18     (let ((divisor (+ 5 (random #xfffffff *rs*))))
  19       (multiple-value-bind (m a s) (sb-c:compute-udiv32-magic divisor)
  20         (dotimes (i 10000)
  21           (let* ((dividend (random (ash 1 32) *rs*))
  22                  (quotient (sb-vm::udiv32-via-multiply dividend m a s))
  23                  (expected (truncate dividend divisor)))
  24             (assert (= quotient expected))))))))
  25
  26 #+x86-64
  27 (with-test (:name :urem-magic)
  28   (dotimes (i 1000)
  29     (let ((divisor (+ 5 (random #xfffffff *rs*))))
  30       (multiple-value-bind (m a s) (sb-c:compute-udiv32-magic divisor)
  31         (dotimes (i 10000)
  32           (let* ((dividend (random (ash 1 32) *rs*))
  33                  (remainder (sb-vm::urem32-via-multiply dividend divisor m a s))
  34                  (expected (rem dividend divisor)))
  35             (assert (= remainder expected))))))))
  36
  37 (with-test (:name :urem32-ultrafast)
  38   (dotimes (i 1000)
  39     ;; The 32-bit algorithm can only handle random 16-bit inputs (or some inputs
  40     ;; with more than 16 bits, but it's random which inputs are OK)
  41     ;; A 64-bit variant would handle all 32-bit inputs, but I don't need it.
  42     (let* ((divisor (+ 5 (random #xffff *rs*)))
  43            (c (sb-c:compute-fastrem-coefficient divisor 16 32)))
  44       (dotimes (i 20000)
  45         (let* ((dividend (random (ash 1 16) *rs*))
  46                (remainder (sb-vm::fastrem-32 dividend c divisor))
  47                (expected (rem dividend divisor)))
  48           (assert (= remainder expected)))))))
  49
  50 #+64-bit
  51 (with-test (:name :urem64-ultrafast)
  52   (dotimes (i 1000)
  53     (let* ((divisor (+ 5 (random #xfffffff *rs*)))
  54            (c (sb-c:compute-fastrem-coefficient divisor 32 64)))
  55       (dotimes (i 20000)
  56         (let* ((dividend (random (ash 1 32) *rs*))
  57                (remainder (sb-vm::fastrem-64 dividend c divisor))
  58                (expected (rem dividend divisor)))
  59           (assert (= remainder expected)))))))
  60
  61 ;;;; The assembly code for the benchmarks contains no full calls.
  62 ;;;; Therefore we're measuring the speed of the vops as accurately as possible.
  63
  64 ;;; These loops show the advantage of division by using multiplication
  65 ;;; even when the CPU directly implements division.
  66 ;;; BASIC-WAY takes about 4 to 6 times as many CPU cycles as TRICKY-WAY.
  67 ;;; But these loops are too time-consuming for a regression test.
  68 (defun div-basic-way (&aux (result 0))
  69   (declare (fixnum result))
  70   (loop for divisor from 3 to 1000
  71         do (dotimes (dividend #xfffff)
  72              (let ((ans (truncate dividend divisor)))
  73                (setq result (logxor result ans)))))
  74   result)
  75
  76 #+x86-64
  77 (defun div-tricky-way (&aux (result 0))
  78   (declare (fixnum result))
  79   (loop for divisor from 3 to 1000
  80         do (multiple-value-bind (m a s) (sb-c:compute-udiv32-magic divisor)
  81              (dotimes (dividend #xfffff)
  82                (let ((ans (sb-vm::udiv32-via-multiply dividend m a s)))
  83                  (setq result (logxor result ans))))))
  84   result)
  85
  86 ;;; * (time (rem-basic-way))
  87 ;;; Evaluation took:
  88 ;;;   30.123 seconds of real time
  89 ;;;   30.112920 seconds of total run time (30.112919 user, 0.000001 system)
  90 ;;;   99.97% CPU
  91 ;;;   84,142,880,093 processor cycles
  92 (defun rem-basic-way (&aux (result 0)) ; about 32 CPU cycles per operation
  93   (declare (fixnum result))
  94   (loop for divisor from 3 to 10000
  95         do (dotimes (dividend #x3ffff)
  96              (let ((ans (rem dividend divisor)))
  97                (setq result (logxor result ans)))))
  98   result)
  99
 100 ;;; * (time (rem-tricky-way))
 101 ;;; Evaluation took:
 102 ;;;   9.999 seconds of real time
 103 ;;;   9.998409 seconds of total run time (9.994458 user, 0.003951 system)
 104 ;;;   99.99% CPU
 105 ;;;   27,938,111,662 processor cycles
 106 #+x86-64
 107 (defun rem-tricky-way (&aux (result 0)) ; about 11 CPU cycles per operation
 108   (declare (fixnum result))
 109   (loop for divisor from 3 to 10000
 110         do (multiple-value-bind (m a s) (sb-c:compute-udiv32-magic divisor)
 111              (dotimes (dividend #x3ffff)
 112                (let ((ans (sb-vm::urem32-via-multiply dividend divisor m a s)))
 113                  (setq result (logxor result ans))))))
 114   result)
 115
 116 ;;; * (time (ultrafastrem-way))
 117 ;;; Evaluation took:
 118 ;;;   6.175 seconds of real time
 119 ;;;   6.171059 seconds of total run time (6.171059 user, 0.000000 system)
 120 ;;;   99.94% CPU
 121 ;;;   17,243,611,816 processor cycles
 122 (defun ultrafastrem-way (&aux (result 0)) ; about 7 CPU cycles per operation
 123   (declare (fixnum result))
 124   (loop for divisor from 3 to 10000
 125         do (let ((c (sb-c:compute-fastrem-coefficient divisor 18 32)))
 126              (dotimes (dividend #x3ffff)
 127                (let ((ans (sb-vm::fastrem-32 dividend c divisor)))
 128                  (setq result (logxor result ans))))))
 129   result)
 130
 131 ;;; This is the generalized code for the ultrafast way, which isn't actually
 132 ;;; ultrafast, but it shows the correctness of the algorithm at various precisions.
 133 (defun try-fastrem (divisor nbits)  ;; nbits = number of bits in numerator
 134   (macrolet ((try (fraction-bits)
 135                `(multiple-value-bind (fraction-bits c)
 136                     (sb-c:compute-fastrem-coefficient divisor nbits ,fraction-bits)
 137                   ;;(format t "~&F=~D~%" fraction-bits)
 138                   (dotimes (numerator (ash 1 nbits)) ; exhaustively test all possible numerators
 139                     (let* ((frac (ldb (byte fraction-bits 0) (* c numerator)))
 140                            (answer (ash (* frac divisor) (- fraction-bits)))
 141                            (expect  (mod numerator divisor)))
 142                       (unless (= answer expect)
 143                         (format t "~12,'0b ~12,'0b  ~3d ~3d~%"
 144                                 numerator frac answer (mod numerator divisor))))))))
 145     (try :variable)
 146     ;(try 16)
 147     (try 32)))
 148
 149 (defun try-fastrem-bigly ()
 150   (loop for divisor from 3 to 10000
 151         do (format t "divisor=~d~%" divisor)
 152             (try-fastrem divisor 18)))