1.0.23.16: more generic assembly op optimizations on x86 and x86-64
[sbcl/pkhuong.git] / src / assembly / x86 / arith.lisp
blob535e023758e8319fe2fd2fc7e83ea5248fc6b144
1 ;;;; simple cases for generic arithmetic
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!VM")
14 ;;;; addition, subtraction, and multiplication
16 (macrolet ((define-generic-arith-routine ((fun cost) &body body)
17 `(define-assembly-routine (,(symbolicate "GENERIC-" fun)
18 (:cost ,cost)
19 (:return-style :full-call)
20 (:translate ,fun)
21 (:policy :safe)
22 (:save-p t))
23 ((:arg x (descriptor-reg any-reg) edx-offset)
24 (:arg y (descriptor-reg any-reg)
25 ;; this seems wrong esi-offset -- FIXME: What's it mean?
26 edi-offset)
28 (:res res (descriptor-reg any-reg) edx-offset)
30 (:temp eax unsigned-reg eax-offset)
31 (:temp ecx unsigned-reg ecx-offset))
33 (inst mov ecx x)
34 (inst or ecx y)
35 (inst test ecx fixnum-tag-mask) ; both fixnums?
36 (inst jmp :nz DO-STATIC-FUN) ; no - do generic
38 ,@body
39 (inst clc) ; single-value return
40 (inst ret)
42 DO-STATIC-FUN
43 (inst pop eax)
44 (inst push ebp-tn)
45 (inst lea
46 ebp-tn
47 (make-ea :dword :base esp-tn :disp n-word-bytes))
48 (inst sub esp-tn (fixnumize 2))
49 (inst push eax) ; callers return addr
50 (inst mov ecx (fixnumize 2)) ; arg count
51 (inst jmp
52 (make-ea :dword
53 :disp (+ nil-value
54 (static-fun-offset
55 ',(symbolicate "TWO-ARG-" fun))))))))
57 (define-generic-arith-routine (+ 10)
58 (move res x)
59 (inst add res y)
60 (inst jmp :no OKAY)
61 (inst rcr res 1) ; carry has correct sign
62 (inst sar res 1) ; remove type bits
64 (move ecx res)
66 (with-fixed-allocation (res bignum-widetag (1+ bignum-digits-offset))
67 (storew ecx res bignum-digits-offset other-pointer-lowtag))
69 OKAY)
71 (define-generic-arith-routine (- 10)
72 (move res x)
73 (inst sub res y)
74 (inst jmp :no OKAY)
75 (inst cmc) ; carry has correct sign now
76 (inst rcr res 1)
77 (inst sar res 1) ; remove type bits
79 (move ecx res)
81 (with-fixed-allocation (res bignum-widetag (1+ bignum-digits-offset))
82 (storew ecx res bignum-digits-offset other-pointer-lowtag))
83 OKAY)
85 (define-generic-arith-routine (* 30)
86 (move eax x) ; must use eax for 64-bit result
87 (inst sar eax n-fixnum-tag-bits) ; remove *4 fixnum bias
88 (inst imul y) ; result in edx:eax
89 (inst jmp :no okay) ; still fixnum
91 ;; zzz jrd changed edx to ebx in here, as edx isn't listed as a temp, above
92 ;; pfw says that loses big -- edx is target for arg x and result res
93 ;; note that 'edx' is not defined -- using x
94 (inst shrd eax x n-fixnum-tag-bits) ; high bits from edx
95 (inst sar x n-fixnum-tag-bits) ; now shift edx too
97 (move ecx x) ; save high bits from cdq
98 (inst cdq) ; edx:eax <- sign-extend of eax
99 (inst cmp x ecx)
100 (inst jmp :e SINGLE-WORD-BIGNUM)
102 (with-fixed-allocation (res bignum-widetag (+ bignum-digits-offset 2))
103 (storew eax res bignum-digits-offset other-pointer-lowtag)
104 (storew ecx res (1+ bignum-digits-offset) other-pointer-lowtag))
105 (inst jmp DONE)
107 SINGLE-WORD-BIGNUM
109 (with-fixed-allocation (res bignum-widetag (1+ bignum-digits-offset))
110 (storew eax res bignum-digits-offset other-pointer-lowtag))
111 (inst jmp DONE)
113 OKAY
114 (move res eax)
115 DONE))
117 ;;;; negation
119 (define-assembly-routine (generic-negate
120 (:cost 10)
121 (:return-style :full-call)
122 (:policy :safe)
123 (:translate %negate)
124 (:save-p t))
125 ((:arg x (descriptor-reg any-reg) edx-offset)
126 (:res res (descriptor-reg any-reg) edx-offset)
128 (:temp eax unsigned-reg eax-offset)
129 (:temp ecx unsigned-reg ecx-offset))
130 (inst test x fixnum-tag-mask)
131 (inst jmp :z FIXNUM)
133 (inst pop eax)
134 (inst push ebp-tn)
135 (inst lea ebp-tn (make-ea :dword :base esp-tn :disp n-word-bytes))
136 (inst sub esp-tn (fixnumize 2))
137 (inst push eax)
138 (inst mov ecx (fixnumize 1)) ; arg count
139 (inst jmp (make-ea :dword
140 :disp (+ nil-value (static-fun-offset '%negate))))
142 FIXNUM
143 (move res x)
144 (inst neg res) ; (- most-negative-fixnum) is BIGNUM
145 (inst jmp :no OKAY)
146 (inst shr res n-fixnum-tag-bits) ; sign bit is data - remove type bits
147 (move ecx res)
149 (with-fixed-allocation (res bignum-widetag (1+ bignum-digits-offset))
150 (storew ecx res bignum-digits-offset other-pointer-lowtag))
152 OKAY)
154 ;;;; comparison
156 (macrolet ((define-cond-assem-rtn (name translate static-fn test)
157 `(define-assembly-routine (,name
158 (:cost 10)
159 (:return-style :full-call)
160 (:policy :safe)
161 (:translate ,translate)
162 (:save-p t))
163 ((:arg x (descriptor-reg any-reg) edx-offset)
164 (:arg y (descriptor-reg any-reg) edi-offset)
166 (:res res descriptor-reg edx-offset)
168 (:temp eax unsigned-reg eax-offset)
169 (:temp ecx unsigned-reg ecx-offset))
171 (inst mov ecx x)
172 (inst or ecx y)
173 (inst test ecx fixnum-tag-mask)
174 (inst jmp :nz DO-STATIC-FUN) ; are both fixnums?
176 (inst cmp x y)
177 (cond ((member :cmov *backend-subfeatures*)
178 (load-symbol res t)
179 (inst mov eax nil-value)
180 (inst cmov ,test res eax))
182 (inst mov res nil-value)
183 (inst jmp ,test RETURN)
184 (load-symbol res t)))
185 RETURN
186 (inst clc) ; single-value return
187 (inst ret)
189 DO-STATIC-FUN
190 (inst pop eax)
191 (inst push ebp-tn)
192 (inst lea ebp-tn (make-ea :dword
193 :base esp-tn
194 :disp n-word-bytes))
195 (inst sub esp-tn (fixnumize 2)) ; FIXME: Push 2 words on stack,
196 ; weirdly?
197 (inst push eax)
198 (inst mov ecx (fixnumize 2)) ; FIXME: FIXNUMIZE and
199 ; SINGLE-FLOAT-BITS are parallel,
200 ; should be named parallelly.
201 (inst jmp (make-ea :dword
202 :disp (+ nil-value
203 (static-fun-offset ',static-fn)))))))
205 (define-cond-assem-rtn generic-< < two-arg-< :ge)
206 (define-cond-assem-rtn generic-> > two-arg-> :le))
208 (define-assembly-routine (generic-eql
209 (:cost 10)
210 (:return-style :full-call)
211 (:policy :safe)
212 (:translate eql)
213 (:save-p t))
214 ((:arg x (descriptor-reg any-reg) edx-offset)
215 (:arg y (descriptor-reg any-reg) edi-offset)
217 (:res res descriptor-reg edx-offset)
219 (:temp eax unsigned-reg eax-offset)
220 (:temp ecx unsigned-reg ecx-offset))
221 (inst mov ecx x)
222 (inst and ecx y)
223 (inst test ecx fixnum-tag-mask)
224 (inst jmp :nz DO-STATIC-FUN)
226 ;; At least one fixnum
227 (inst cmp x y)
228 (load-symbol res t)
229 (cond ((member :cmov *backend-subfeatures*)
230 (inst mov eax nil-value)
231 (inst cmov :ne res eax))
233 (inst jmp :e RETURN)
234 (inst mov res nil-value)))
235 RETURN
236 (inst clc)
237 (inst ret)
239 ;; FIXME: We could handle all non-numbers here easily enough: go to
240 ;; TWO-ARG-EQL only if lowtags and widetags match, lowtag is
241 ;; other-pointer-lowtag and widetag is < code-header-widetag.
242 DO-STATIC-FUN
243 (inst pop eax)
244 (inst push ebp-tn)
245 (inst lea ebp-tn (make-ea :dword :base esp-tn :disp n-word-bytes))
246 (inst sub esp-tn (fixnumize 2))
247 (inst push eax)
248 (inst mov ecx (fixnumize 2))
249 (inst jmp (make-ea :dword
250 :disp (+ nil-value (static-fun-offset 'eql)))))
252 (define-assembly-routine (generic-=
253 (:cost 10)
254 (:return-style :full-call)
255 (:policy :safe)
256 (:translate =)
257 (:save-p t))
258 ((:arg x (descriptor-reg any-reg) edx-offset)
259 (:arg y (descriptor-reg any-reg) edi-offset)
261 (:res res descriptor-reg edx-offset)
263 (:temp eax unsigned-reg eax-offset)
264 (:temp ecx unsigned-reg ecx-offset))
265 (inst mov ecx x)
266 (inst or ecx y)
267 (inst test ecx fixnum-tag-mask) ; both fixnums?
268 (inst jmp :nz DO-STATIC-FUN)
270 (inst cmp x y)
271 (load-symbol res t)
272 (cond ((member :cmov *backend-subfeatures*)
273 (inst mov eax nil-value)
274 (inst cmov :ne res eax))
276 (inst jmp :e RETURN)
277 (inst mov res nil-value)))
278 RETURN
279 (inst clc)
280 (inst ret)
282 DO-STATIC-FUN
283 (inst pop eax)
284 (inst push ebp-tn)
285 (inst lea ebp-tn (make-ea :dword :base esp-tn :disp n-word-bytes))
286 (inst sub esp-tn (fixnumize 2))
287 (inst push eax)
288 (inst mov ecx (fixnumize 2))
289 (inst jmp (make-ea :dword
290 :disp (+ nil-value (static-fun-offset 'two-arg-=)))))
293 ;;; Support for the Mersenne Twister, MT19937, random number generator
294 ;;; due to Matsumoto and Nishimura.
296 ;;; Makoto Matsumoto and T. Nishimura, "Mersenne twister: A
297 ;;; 623-dimensionally equidistributed uniform pseudorandom number
298 ;;; generator.", ACM Transactions on Modeling and Computer Simulation,
299 ;;; 1997, to appear.
301 ;;; State:
302 ;;; 0-1: Constant matrix A. [0, #x9908b0df] (not used here)
303 ;;; 2: Index; init. to 1.
304 ;;; 3-626: State.
306 ;;; This assembly routine is called from the inline VOP and updates
307 ;;; the state vector with new random numbers. The state vector is
308 ;;; passed in the EAX register.
309 #+sb-assembling ; We don't want a vop for this one.
310 (define-assembly-routine
311 (random-mt19937-update)
312 ((:temp state unsigned-reg eax-offset)
313 (:temp k unsigned-reg ebx-offset)
314 (:temp y unsigned-reg ecx-offset)
315 (:temp tmp unsigned-reg edx-offset))
317 ;; Save the temporary registers.
318 (inst push k)
319 (inst push y)
320 (inst push tmp)
322 ;; Generate a new set of results.
323 (inst xor k k)
324 LOOP1
325 (inst mov y (make-ea-for-vector-data state :index k :offset 3))
326 (inst mov tmp (make-ea-for-vector-data state :index k :offset (+ 1 3)))
327 (inst and y #x80000000)
328 (inst and tmp #x7fffffff)
329 (inst or y tmp)
330 (inst shr y 1)
331 (inst jmp :nc skip1)
332 (inst xor y #x9908b0df)
333 SKIP1
334 (inst xor y (make-ea-for-vector-data state :index k :offset (+ 397 3)))
335 (inst mov (make-ea-for-vector-data state :index k :offset 3) y)
336 (inst inc k)
337 (inst cmp k (- 624 397))
338 (inst jmp :b loop1)
339 LOOP2
340 (inst mov y (make-ea-for-vector-data state :index k :offset 3))
341 (inst mov tmp (make-ea-for-vector-data state :index k :offset (+ 1 3)))
342 (inst and y #x80000000)
343 (inst and tmp #x7fffffff)
344 (inst or y tmp)
345 (inst shr y 1)
346 (inst jmp :nc skip2)
347 (inst xor y #x9908b0df)
348 SKIP2
349 (inst xor y (make-ea-for-vector-data state :index k :offset (+ (- 397 624) 3)))
350 (inst mov (make-ea-for-vector-data state :index k :offset 3) y)
351 (inst inc k)
352 (inst cmp k (- 624 1))
353 (inst jmp :b loop2)
355 (inst mov y (make-ea-for-vector-data state :offset (+ (- 624 1) 3)))
356 (inst mov tmp (make-ea-for-vector-data state :offset (+ 0 3)))
357 (inst and y #x80000000)
358 (inst and tmp #x7fffffff)
359 (inst or y tmp)
360 (inst shr y 1)
361 (inst jmp :nc skip3)
362 (inst xor y #x9908b0df)
363 SKIP3
364 (inst xor y (make-ea-for-vector-data state :offset (+ (- 397 1) 3)))
365 (inst mov (make-ea-for-vector-data state :offset (+ (- 624 1) 3)) y)
367 ;; Restore the temporary registers and return.
368 (inst pop tmp)
369 (inst pop y)
370 (inst pop k)
371 (inst ret))