1 ;;; Assert that all odd divisors less than 32769 have an optimized
2 ;;; remainder coefficient using 32-bit operations.
3 ;;; Symbol hashsets with fewer than that many cells can use fast remainder.
4 ;;; (I could/should implement the 64-bit algorithm I suppose)
5 (with-test (:name
:optimized-remainder-exists
)
6 (loop for divisor from
3 below
32769 by
2
8 (multiple-value-bind (mask c
) (sb-impl::optimized-symtbl-remainder-params divisor
)
9 (assert (and (plusp c
) (plusp mask
))))))
11 #+interpreter
(invoke-restart 'run-tests
::skip-file
)
13 (defvar *rs
* (make-random-state t
)) ; get a random random-state
16 (with-test (:name
:udiv-magic
)
18 (let ((divisor (+ 5 (random #xfffffff
*rs
*))))
19 (multiple-value-bind (m a s
) (sb-c:compute-udiv32-magic divisor
)
21 (let* ((dividend (random (ash 1 32) *rs
*))
22 (quotient (sb-vm::udiv32-via-multiply dividend m a s
))
23 (expected (truncate dividend divisor
)))
24 (assert (= quotient expected
))))))))
27 (with-test (:name
:urem-magic
)
29 (let ((divisor (+ 5 (random #xfffffff
*rs
*))))
30 (multiple-value-bind (m a s
) (sb-c:compute-udiv32-magic divisor
)
32 (let* ((dividend (random (ash 1 32) *rs
*))
33 (remainder (sb-vm::urem32-via-multiply dividend divisor m a s
))
34 (expected (rem dividend divisor
)))
35 (assert (= remainder expected
))))))))
37 (with-test (:name
:urem32-ultrafast
)
39 ;; The 32-bit algorithm can only handle random 16-bit inputs (or some inputs
40 ;; with more than 16 bits, but it's random which inputs are OK)
41 ;; A 64-bit variant would handle all 32-bit inputs, but I don't need it.
42 (let* ((divisor (+ 5 (random #xffff
*rs
*)))
43 (c (sb-c:compute-fastrem-coefficient divisor
16 32)))
45 (let* ((dividend (random (ash 1 16) *rs
*))
46 (remainder (sb-vm::fastrem-32 dividend c divisor
))
47 (expected (rem dividend divisor
)))
48 (assert (= remainder expected
)))))))
51 (with-test (:name
:urem64-ultrafast
)
53 (let* ((divisor (+ 5 (random #xfffffff
*rs
*)))
54 (c (sb-c:compute-fastrem-coefficient divisor
32 64)))
56 (let* ((dividend (random (ash 1 32) *rs
*))
57 (remainder (sb-vm::fastrem-64 dividend c divisor
))
58 (expected (rem dividend divisor
)))
59 (assert (= remainder expected
)))))))
61 ;;;; The assembly code for the benchmarks contains no full calls.
62 ;;;; Therefore we're measuring the speed of the vops as accurately as possible.
64 ;;; These loops show the advantage of division by using multiplication
65 ;;; even when the CPU directly implements division.
66 ;;; BASIC-WAY takes about 4 to 6 times as many CPU cycles as TRICKY-WAY.
67 ;;; But these loops are too time-consuming for a regression test.
68 (defun div-basic-way (&aux
(result 0))
69 (declare (fixnum result
))
70 (loop for divisor from
3 to
1000
71 do
(dotimes (dividend #xfffff
)
72 (let ((ans (truncate dividend divisor
)))
73 (setq result
(logxor result ans
)))))
77 (defun div-tricky-way (&aux
(result 0))
78 (declare (fixnum result
))
79 (loop for divisor from
3 to
1000
80 do
(multiple-value-bind (m a s
) (sb-c:compute-udiv32-magic divisor
)
81 (dotimes (dividend #xfffff
)
82 (let ((ans (sb-vm::udiv32-via-multiply dividend m a s
)))
83 (setq result
(logxor result ans
))))))
86 ;;; * (time (rem-basic-way))
88 ;;; 30.123 seconds of real time
89 ;;; 30.112920 seconds of total run time (30.112919 user, 0.000001 system)
91 ;;; 84,142,880,093 processor cycles
92 (defun rem-basic-way (&aux
(result 0)) ; about 32 CPU cycles per operation
93 (declare (fixnum result
))
94 (loop for divisor from
3 to
10000
95 do
(dotimes (dividend #x3ffff
)
96 (let ((ans (rem dividend divisor
)))
97 (setq result
(logxor result ans
)))))
100 ;;; * (time (rem-tricky-way))
102 ;;; 9.999 seconds of real time
103 ;;; 9.998409 seconds of total run time (9.994458 user, 0.003951 system)
105 ;;; 27,938,111,662 processor cycles
107 (defun rem-tricky-way (&aux
(result 0)) ; about 11 CPU cycles per operation
108 (declare (fixnum result
))
109 (loop for divisor from
3 to
10000
110 do
(multiple-value-bind (m a s
) (sb-c:compute-udiv32-magic divisor
)
111 (dotimes (dividend #x3ffff
)
112 (let ((ans (sb-vm::urem32-via-multiply dividend divisor m a s
)))
113 (setq result
(logxor result ans
))))))
116 ;;; * (time (ultrafastrem-way))
118 ;;; 6.175 seconds of real time
119 ;;; 6.171059 seconds of total run time (6.171059 user, 0.000000 system)
121 ;;; 17,243,611,816 processor cycles
122 (defun ultrafastrem-way (&aux
(result 0)) ; about 7 CPU cycles per operation
123 (declare (fixnum result
))
124 (loop for divisor from
3 to
10000
125 do
(let ((c (sb-c:compute-fastrem-coefficient divisor
18 32)))
126 (dotimes (dividend #x3ffff
)
127 (let ((ans (sb-vm::fastrem-32 dividend c divisor
)))
128 (setq result
(logxor result ans
))))))
131 ;;; This is the generalized code for the ultrafast way, which isn't actually
132 ;;; ultrafast, but it shows the correctness of the algorithm at various precisions.
133 (defun try-fastrem (divisor nbits
) ;; nbits = number of bits in numerator
134 (macrolet ((try (fraction-bits)
135 `(multiple-value-bind (fraction-bits c
)
136 (sb-c:compute-fastrem-coefficient divisor nbits
,fraction-bits
)
137 ;;(format t "~&F=~D~%" fraction-bits)
138 (dotimes (numerator (ash 1 nbits
)) ; exhaustively test all possible numerators
139 (let* ((frac (ldb (byte fraction-bits
0) (* c numerator
)))
140 (answer (ash (* frac divisor
) (- fraction-bits
)))
141 (expect (mod numerator divisor
)))
142 (unless (= answer expect
)
143 (format t
"~12,'0b ~12,'0b ~3d ~3d~%"
144 numerator frac answer
(mod numerator divisor
))))))))
149 (defun try-fastrem-bigly ()
150 (loop for divisor from
3 to
10000
151 do
(format t
"divisor=~d~%" divisor
)
152 (try-fastrem divisor
18)))