Merged sbcl-1.0.14 with the sb-simd 1.3 patches
[sbcl/simd.git] / src / compiler / x86-64 / arith.lisp
blob1f63042a69b44edc160a6d2a75cdc0fb3b915ba6
1 ;;;; the VM definition of arithmetic VOPs for the x86
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!VM")
14 ;;;; unary operations
16 (define-vop (fast-safe-arith-op)
17 (:policy :fast-safe)
18 (:effects)
19 (:affected))
21 (define-vop (fixnum-unop fast-safe-arith-op)
22 (:args (x :scs (any-reg) :target res))
23 (:results (res :scs (any-reg)))
24 (:note "inline fixnum arithmetic")
25 (:arg-types tagged-num)
26 (:result-types tagged-num))
28 (define-vop (signed-unop fast-safe-arith-op)
29 (:args (x :scs (signed-reg) :target res))
30 (:results (res :scs (signed-reg)))
31 (:note "inline (signed-byte 64) arithmetic")
32 (:arg-types signed-num)
33 (:result-types signed-num))
35 (define-vop (fast-negate/fixnum fixnum-unop)
36 (:translate %negate)
37 (:generator 1
38 (move res x)
39 (inst neg res)))
41 (define-vop (fast-negate/signed signed-unop)
42 (:translate %negate)
43 (:generator 2
44 (move res x)
45 (inst neg res)))
47 (define-vop (fast-lognot/fixnum fixnum-unop)
48 (:translate lognot)
49 (:generator 2
50 (move res x)
51 (inst xor res (fixnumize -1))))
53 (define-vop (fast-lognot/signed signed-unop)
54 (:translate lognot)
55 (:generator 1
56 (move res x)
57 (inst not res)))
59 ;;;; binary fixnum operations
61 ;;; Assume that any constant operand is the second arg...
63 (define-vop (fast-fixnum-binop fast-safe-arith-op)
64 (:args (x :target r :scs (any-reg)
65 :load-if (not (and (sc-is x control-stack)
66 (sc-is y any-reg)
67 (sc-is r control-stack)
68 (location= x r))))
69 (y :scs (any-reg control-stack)))
70 (:arg-types tagged-num tagged-num)
71 (:results (r :scs (any-reg) :from (:argument 0)
72 :load-if (not (and (sc-is x control-stack)
73 (sc-is y any-reg)
74 (sc-is r control-stack)
75 (location= x r)))))
76 (:result-types tagged-num)
77 (:note "inline fixnum arithmetic"))
79 (define-vop (fast-unsigned-binop fast-safe-arith-op)
80 (:args (x :target r :scs (unsigned-reg)
81 :load-if (not (and (sc-is x unsigned-stack)
82 (sc-is y unsigned-reg)
83 (sc-is r unsigned-stack)
84 (location= x r))))
85 (y :scs (unsigned-reg unsigned-stack)))
86 (:arg-types unsigned-num unsigned-num)
87 (:results (r :scs (unsigned-reg) :from (:argument 0)
88 :load-if (not (and (sc-is x unsigned-stack)
89 (sc-is y unsigned-reg)
90 (sc-is r unsigned-stack)
91 (location= x r)))))
92 (:result-types unsigned-num)
93 (:note "inline (unsigned-byte 64) arithmetic"))
95 (define-vop (fast-signed-binop fast-safe-arith-op)
96 (:args (x :target r :scs (signed-reg)
97 :load-if (not (and (sc-is x signed-stack)
98 (sc-is y signed-reg)
99 (sc-is r signed-stack)
100 (location= x r))))
101 (y :scs (signed-reg signed-stack)))
102 (:arg-types signed-num signed-num)
103 (:results (r :scs (signed-reg) :from (:argument 0)
104 :load-if (not (and (sc-is x signed-stack)
105 (sc-is y signed-reg)
106 (sc-is r signed-stack)
107 (location= x r)))))
108 (:result-types signed-num)
109 (:note "inline (signed-byte 64) arithmetic"))
111 (define-vop (fast-fixnum-binop-c fast-safe-arith-op)
112 (:args (x :target r :scs (any-reg control-stack)))
113 (:info y)
114 (:arg-types tagged-num (:constant (signed-byte 29)))
115 (:results (r :scs (any-reg)
116 :load-if (not (location= x r))))
117 (:result-types tagged-num)
118 (:note "inline fixnum arithmetic"))
120 ;; 31 not 64 because it's hard work loading 64 bit constants, and since
121 ;; sign-extension of immediates causes problems with 32.
122 (define-vop (fast-unsigned-binop-c fast-safe-arith-op)
123 (:args (x :target r :scs (unsigned-reg unsigned-stack)))
124 (:info y)
125 (:arg-types unsigned-num (:constant (unsigned-byte 31)))
126 (:results (r :scs (unsigned-reg)
127 :load-if (not (location= x r))))
128 (:result-types unsigned-num)
129 (:note "inline (unsigned-byte 64) arithmetic"))
131 (define-vop (fast-signed-binop-c fast-safe-arith-op)
132 (:args (x :target r :scs (signed-reg signed-stack)))
133 (:info y)
134 (:arg-types signed-num (:constant (signed-byte 32)))
135 (:results (r :scs (signed-reg)
136 :load-if (not (location= x r))))
137 (:result-types signed-num)
138 (:note "inline (signed-byte 64) arithmetic"))
140 (macrolet ((define-binop (translate untagged-penalty op)
141 `(progn
142 (define-vop (,(symbolicate "FAST-" translate "/FIXNUM=>FIXNUM")
143 fast-fixnum-binop)
144 (:translate ,translate)
145 (:generator 2
146 (move r x)
147 (inst ,op r y)))
148 (define-vop (,(symbolicate 'fast- translate '-c/fixnum=>fixnum)
149 fast-fixnum-binop-c)
150 (:translate ,translate)
151 (:generator 1
152 (move r x)
153 (inst ,op r (fixnumize y))))
154 (define-vop (,(symbolicate "FAST-" translate "/SIGNED=>SIGNED")
155 fast-signed-binop)
156 (:translate ,translate)
157 (:generator ,(1+ untagged-penalty)
158 (move r x)
159 (inst ,op r y)))
160 (define-vop (,(symbolicate 'fast- translate '-c/signed=>signed)
161 fast-signed-binop-c)
162 (:translate ,translate)
163 (:generator ,untagged-penalty
164 (move r x)
165 (inst ,op r y)))
166 (define-vop (,(symbolicate "FAST-"
167 translate
168 "/UNSIGNED=>UNSIGNED")
169 fast-unsigned-binop)
170 (:translate ,translate)
171 (:generator ,(1+ untagged-penalty)
172 (move r x)
173 (inst ,op r y)))
174 (define-vop (,(symbolicate 'fast-
175 translate
176 '-c/unsigned=>unsigned)
177 fast-unsigned-binop-c)
178 (:translate ,translate)
179 (:generator ,untagged-penalty
180 (move r x)
181 (inst ,op r y))))))
183 ;;(define-binop + 4 add)
184 (define-binop - 4 sub)
185 (define-binop logand 2 and)
186 (define-binop logior 2 or)
187 (define-binop logxor 2 xor))
189 ;;; Special handling of add on the x86; can use lea to avoid a
190 ;;; register load, otherwise it uses add.
191 (define-vop (fast-+/fixnum=>fixnum fast-safe-arith-op)
192 (:translate +)
193 (:args (x :scs (any-reg) :target r
194 :load-if (not (and (sc-is x control-stack)
195 (sc-is y any-reg)
196 (sc-is r control-stack)
197 (location= x r))))
198 (y :scs (any-reg control-stack)))
199 (:arg-types tagged-num tagged-num)
200 (:results (r :scs (any-reg) :from (:argument 0)
201 :load-if (not (and (sc-is x control-stack)
202 (sc-is y any-reg)
203 (sc-is r control-stack)
204 (location= x r)))))
205 (:result-types tagged-num)
206 (:note "inline fixnum arithmetic")
207 (:generator 2
208 (cond ((and (sc-is x any-reg) (sc-is y any-reg) (sc-is r any-reg)
209 (not (location= x r)))
210 (inst lea r (make-ea :qword :base x :index y :scale 1)))
212 (move r x)
213 (inst add r y)))))
215 (define-vop (fast-+-c/fixnum=>fixnum fast-safe-arith-op)
216 (:translate +)
217 (:args (x :target r :scs (any-reg control-stack)))
218 (:info y)
219 (:arg-types tagged-num (:constant (signed-byte 29)))
220 (:results (r :scs (any-reg)
221 :load-if (not (location= x r))))
222 (:result-types tagged-num)
223 (:note "inline fixnum arithmetic")
224 (:generator 1
225 (cond ((and (sc-is x any-reg) (sc-is r any-reg) (not (location= x r)))
226 (inst lea r (make-ea :qword :base x :disp (fixnumize y))))
228 (move r x)
229 (inst add r (fixnumize y))))))
231 (define-vop (fast-+/signed=>signed fast-safe-arith-op)
232 (:translate +)
233 (:args (x :scs (signed-reg) :target r
234 :load-if (not (and (sc-is x signed-stack)
235 (sc-is y signed-reg)
236 (sc-is r signed-stack)
237 (location= x r))))
238 (y :scs (signed-reg signed-stack)))
239 (:arg-types signed-num signed-num)
240 (:results (r :scs (signed-reg) :from (:argument 0)
241 :load-if (not (and (sc-is x signed-stack)
242 (sc-is y signed-reg)
243 (location= x r)))))
244 (:result-types signed-num)
245 (:note "inline (signed-byte 64) arithmetic")
246 (:generator 5
247 (cond ((and (sc-is x signed-reg) (sc-is y signed-reg) (sc-is r signed-reg)
248 (not (location= x r)))
249 (inst lea r (make-ea :qword :base x :index y :scale 1)))
251 (move r x)
252 (inst add r y)))))
255 ;;;; Special logand cases: (logand signed unsigned) => unsigned
257 (define-vop (fast-logand/signed-unsigned=>unsigned
258 fast-logand/unsigned=>unsigned)
259 (:args (x :target r :scs (signed-reg)
260 :load-if (not (and (sc-is x signed-stack)
261 (sc-is y unsigned-reg)
262 (sc-is r unsigned-stack)
263 (location= x r))))
264 (y :scs (unsigned-reg unsigned-stack)))
265 (:arg-types signed-num unsigned-num))
267 (define-vop (fast-logand-c/signed-unsigned=>unsigned
268 fast-logand-c/unsigned=>unsigned)
269 (:args (x :target r :scs (signed-reg signed-stack)))
270 (:arg-types signed-num (:constant (unsigned-byte 31))))
272 (define-vop (fast-logand/unsigned-signed=>unsigned
273 fast-logand/unsigned=>unsigned)
274 (:args (x :target r :scs (unsigned-reg)
275 :load-if (not (and (sc-is x unsigned-stack)
276 (sc-is y signed-reg)
277 (sc-is r unsigned-stack)
278 (location= x r))))
279 (y :scs (signed-reg signed-stack)))
280 (:arg-types unsigned-num signed-num))
283 (define-vop (fast-+-c/signed=>signed fast-safe-arith-op)
284 (:translate +)
285 (:args (x :target r :scs (signed-reg signed-stack)))
286 (:info y)
287 (:arg-types signed-num (:constant (signed-byte 32)))
288 (:results (r :scs (signed-reg)
289 :load-if (not (location= x r))))
290 (:result-types signed-num)
291 (:note "inline (signed-byte 64) arithmetic")
292 (:generator 4
293 (cond ((and (sc-is x signed-reg) (sc-is r signed-reg)
294 (not (location= x r)))
295 (inst lea r (make-ea :qword :base x :disp y)))
297 (move r x)
298 (if (= y 1)
299 (inst inc r)
300 (inst add r y))))))
302 (define-vop (fast-+/unsigned=>unsigned fast-safe-arith-op)
303 (:translate +)
304 (:args (x :scs (unsigned-reg) :target r
305 :load-if (not (and (sc-is x unsigned-stack)
306 (sc-is y unsigned-reg)
307 (sc-is r unsigned-stack)
308 (location= x r))))
309 (y :scs (unsigned-reg unsigned-stack)))
310 (:arg-types unsigned-num unsigned-num)
311 (:results (r :scs (unsigned-reg) :from (:argument 0)
312 :load-if (not (and (sc-is x unsigned-stack)
313 (sc-is y unsigned-reg)
314 (sc-is r unsigned-stack)
315 (location= x r)))))
316 (:result-types unsigned-num)
317 (:note "inline (unsigned-byte 64) arithmetic")
318 (:generator 5
319 (cond ((and (sc-is x unsigned-reg) (sc-is y unsigned-reg)
320 (sc-is r unsigned-reg) (not (location= x r)))
321 (inst lea r (make-ea :qword :base x :index y :scale 1)))
323 (move r x)
324 (inst add r y)))))
326 (define-vop (fast-+-c/unsigned=>unsigned fast-safe-arith-op)
327 (:translate +)
328 (:args (x :target r :scs (unsigned-reg unsigned-stack)))
329 (:info y)
330 (:arg-types unsigned-num (:constant (unsigned-byte 31)))
331 (:results (r :scs (unsigned-reg)
332 :load-if (not (location= x r))))
333 (:result-types unsigned-num)
334 (:note "inline (unsigned-byte 64) arithmetic")
335 (:generator 4
336 (cond ((and (sc-is x unsigned-reg) (sc-is r unsigned-reg)
337 (not (location= x r)))
338 (inst lea r (make-ea :qword :base x :disp y)))
340 (move r x)
341 (if (= y 1)
342 (inst inc r)
343 (inst add r y))))))
345 ;;;; multiplication and division
347 (define-vop (fast-*/fixnum=>fixnum fast-safe-arith-op)
348 (:translate *)
349 ;; We need different loading characteristics.
350 (:args (x :scs (any-reg) :target r)
351 (y :scs (any-reg control-stack)))
352 (:arg-types tagged-num tagged-num)
353 (:results (r :scs (any-reg) :from (:argument 0)))
354 (:result-types tagged-num)
355 (:note "inline fixnum arithmetic")
356 (:generator 4
357 (move r x)
358 (inst sar r 3)
359 (inst imul r y)))
361 (define-vop (fast-*-c/fixnum=>fixnum fast-safe-arith-op)
362 (:translate *)
363 ;; We need different loading characteristics.
364 (:args (x :scs (any-reg control-stack)))
365 (:info y)
366 (:arg-types tagged-num (:constant (signed-byte 29)))
367 (:results (r :scs (any-reg)))
368 (:result-types tagged-num)
369 (:note "inline fixnum arithmetic")
370 (:generator 3
371 (inst imul r x y)))
373 (define-vop (fast-*/signed=>signed fast-safe-arith-op)
374 (:translate *)
375 ;; We need different loading characteristics.
376 (:args (x :scs (signed-reg) :target r)
377 (y :scs (signed-reg signed-stack)))
378 (:arg-types signed-num signed-num)
379 (:results (r :scs (signed-reg) :from (:argument 0)))
380 (:result-types signed-num)
381 (:note "inline (signed-byte 64) arithmetic")
382 (:generator 5
383 (move r x)
384 (inst imul r y)))
386 (define-vop (fast-*-c/signed=>signed fast-safe-arith-op)
387 (:translate *)
388 ;; We need different loading characteristics.
389 (:args (x :scs (signed-reg signed-stack)))
390 (:info y)
391 (:arg-types signed-num (:constant (signed-byte 32)))
392 (:results (r :scs (signed-reg)))
393 (:result-types signed-num)
394 (:note "inline (signed-byte 64) arithmetic")
395 (:generator 4
396 (inst imul r x y)))
398 (define-vop (fast-*/unsigned=>unsigned fast-safe-arith-op)
399 (:translate *)
400 (:args (x :scs (unsigned-reg) :target eax)
401 (y :scs (unsigned-reg unsigned-stack)))
402 (:arg-types unsigned-num unsigned-num)
403 (:temporary (:sc unsigned-reg :offset eax-offset :target r
404 :from (:argument 0) :to :result) eax)
405 (:temporary (:sc unsigned-reg :offset edx-offset
406 :from :eval :to :result) edx)
407 (:ignore edx)
408 (:results (r :scs (unsigned-reg)))
409 (:result-types unsigned-num)
410 (:note "inline (unsigned-byte 64) arithmetic")
411 (:vop-var vop)
412 (:save-p :compute-only)
413 (:generator 6
414 (move eax x)
415 (inst mul eax y)
416 (move r eax)))
419 (define-vop (fast-truncate/fixnum=>fixnum fast-safe-arith-op)
420 (:translate truncate)
421 (:args (x :scs (any-reg) :target eax)
422 (y :scs (any-reg control-stack)))
423 (:arg-types tagged-num tagged-num)
424 (:temporary (:sc signed-reg :offset eax-offset :target quo
425 :from (:argument 0) :to (:result 0)) eax)
426 (:temporary (:sc unsigned-reg :offset edx-offset :target rem
427 :from (:argument 0) :to (:result 1)) edx)
428 (:results (quo :scs (any-reg))
429 (rem :scs (any-reg)))
430 (:result-types tagged-num tagged-num)
431 (:note "inline fixnum arithmetic")
432 (:vop-var vop)
433 (:save-p :compute-only)
434 (:generator 31
435 (let ((zero (generate-error-code vop division-by-zero-error x y)))
436 (if (sc-is y any-reg)
437 (inst test y y) ; smaller instruction
438 (inst cmp y 0))
439 (inst jmp :eq zero))
440 (move eax x)
441 (inst cqo)
442 (inst idiv eax y)
443 (if (location= quo eax)
444 (inst shl eax 3)
445 (inst lea quo (make-ea :qword :index eax :scale 8)))
446 (move rem edx)))
448 (define-vop (fast-truncate-c/fixnum=>fixnum fast-safe-arith-op)
449 (:translate truncate)
450 (:args (x :scs (any-reg) :target eax))
451 (:info y)
452 (:arg-types tagged-num (:constant (signed-byte 29)))
453 (:temporary (:sc signed-reg :offset eax-offset :target quo
454 :from :argument :to (:result 0)) eax)
455 (:temporary (:sc any-reg :offset edx-offset :target rem
456 :from :eval :to (:result 1)) edx)
457 (:temporary (:sc any-reg :from :eval :to :result) y-arg)
458 (:results (quo :scs (any-reg))
459 (rem :scs (any-reg)))
460 (:result-types tagged-num tagged-num)
461 (:note "inline fixnum arithmetic")
462 (:vop-var vop)
463 (:save-p :compute-only)
464 (:generator 30
465 (move eax x)
466 (inst cqo)
467 (inst mov y-arg (fixnumize y))
468 (inst idiv eax y-arg)
469 (if (location= quo eax)
470 (inst shl eax 3)
471 (inst lea quo (make-ea :qword :index eax :scale 8)))
472 (move rem edx)))
474 (define-vop (fast-truncate/unsigned=>unsigned fast-safe-arith-op)
475 (:translate truncate)
476 (:args (x :scs (unsigned-reg) :target eax)
477 (y :scs (unsigned-reg signed-stack)))
478 (:arg-types unsigned-num unsigned-num)
479 (:temporary (:sc unsigned-reg :offset eax-offset :target quo
480 :from (:argument 0) :to (:result 0)) eax)
481 (:temporary (:sc unsigned-reg :offset edx-offset :target rem
482 :from (:argument 0) :to (:result 1)) edx)
483 (:results (quo :scs (unsigned-reg))
484 (rem :scs (unsigned-reg)))
485 (:result-types unsigned-num unsigned-num)
486 (:note "inline (unsigned-byte 64) arithmetic")
487 (:vop-var vop)
488 (:save-p :compute-only)
489 (:generator 33
490 (let ((zero (generate-error-code vop division-by-zero-error x y)))
491 (if (sc-is y unsigned-reg)
492 (inst test y y) ; smaller instruction
493 (inst cmp y 0))
494 (inst jmp :eq zero))
495 (move eax x)
496 (inst xor edx edx)
497 (inst div eax y)
498 (move quo eax)
499 (move rem edx)))
501 (define-vop (fast-truncate-c/unsigned=>unsigned fast-safe-arith-op)
502 (:translate truncate)
503 (:args (x :scs (unsigned-reg) :target eax))
504 (:info y)
505 (:arg-types unsigned-num (:constant (unsigned-byte 31)))
506 (:temporary (:sc unsigned-reg :offset eax-offset :target quo
507 :from :argument :to (:result 0)) eax)
508 (:temporary (:sc unsigned-reg :offset edx-offset :target rem
509 :from :eval :to (:result 1)) edx)
510 (:temporary (:sc unsigned-reg :from :eval :to :result) y-arg)
511 (:results (quo :scs (unsigned-reg))
512 (rem :scs (unsigned-reg)))
513 (:result-types unsigned-num unsigned-num)
514 (:note "inline (unsigned-byte 64) arithmetic")
515 (:vop-var vop)
516 (:save-p :compute-only)
517 (:generator 32
518 (move eax x)
519 (inst xor edx edx)
520 (inst mov y-arg y)
521 (inst div eax y-arg)
522 (move quo eax)
523 (move rem edx)))
525 (define-vop (fast-truncate/signed=>signed fast-safe-arith-op)
526 (:translate truncate)
527 (:args (x :scs (signed-reg) :target eax)
528 (y :scs (signed-reg signed-stack)))
529 (:arg-types signed-num signed-num)
530 (:temporary (:sc signed-reg :offset eax-offset :target quo
531 :from (:argument 0) :to (:result 0)) eax)
532 (:temporary (:sc signed-reg :offset edx-offset :target rem
533 :from (:argument 0) :to (:result 1)) edx)
534 (:results (quo :scs (signed-reg))
535 (rem :scs (signed-reg)))
536 (:result-types signed-num signed-num)
537 (:note "inline (signed-byte 64) arithmetic")
538 (:vop-var vop)
539 (:save-p :compute-only)
540 (:generator 33
541 (let ((zero (generate-error-code vop division-by-zero-error x y)))
542 (if (sc-is y signed-reg)
543 (inst test y y) ; smaller instruction
544 (inst cmp y 0))
545 (inst jmp :eq zero))
546 (move eax x)
547 (inst cqo)
548 (inst idiv eax y)
549 (move quo eax)
550 (move rem edx)))
552 (define-vop (fast-truncate-c/signed=>signed fast-safe-arith-op)
553 (:translate truncate)
554 (:args (x :scs (signed-reg) :target eax))
555 (:info y)
556 (:arg-types signed-num (:constant (signed-byte 32)))
557 (:temporary (:sc signed-reg :offset eax-offset :target quo
558 :from :argument :to (:result 0)) eax)
559 (:temporary (:sc signed-reg :offset edx-offset :target rem
560 :from :eval :to (:result 1)) edx)
561 (:temporary (:sc signed-reg :from :eval :to :result) y-arg)
562 (:results (quo :scs (signed-reg))
563 (rem :scs (signed-reg)))
564 (:result-types signed-num signed-num)
565 (:note "inline (signed-byte 64) arithmetic")
566 (:vop-var vop)
567 (:save-p :compute-only)
568 (:generator 32
569 (move eax x)
570 (inst cqo)
571 (inst mov y-arg y)
572 (inst idiv eax y-arg)
573 (move quo eax)
574 (move rem edx)))
578 ;;;; Shifting
579 (define-vop (fast-ash-c/fixnum=>fixnum)
580 (:translate ash)
581 (:policy :fast-safe)
582 (:args (number :scs (any-reg) :target result
583 :load-if (not (and (sc-is number any-reg control-stack)
584 (sc-is result any-reg control-stack)
585 (location= number result)))))
586 (:info amount)
587 (:arg-types tagged-num (:constant integer))
588 (:results (result :scs (any-reg)
589 :load-if (not (and (sc-is number control-stack)
590 (sc-is result control-stack)
591 (location= number result)))))
592 (:result-types tagged-num)
593 (:note "inline ASH")
594 (:generator 2
595 (cond ((and (= amount 1) (not (location= number result)))
596 (inst lea result (make-ea :qword :base number :index number)))
597 ((and (= amount 2) (not (location= number result)))
598 (inst lea result (make-ea :qword :index number :scale 4)))
599 ((and (= amount 3) (not (location= number result)))
600 (inst lea result (make-ea :qword :index number :scale 8)))
602 (move result number)
603 (cond ((< -64 amount 64)
604 ;; this code is used both in ASH and ASH-SMOD61, so
605 ;; be careful
606 (if (plusp amount)
607 (inst shl result amount)
608 (progn
609 (inst sar result (- amount))
610 (inst and result (lognot fixnum-tag-mask)))))
611 ((plusp amount)
612 (if (sc-is result any-reg)
613 (inst xor result result)
614 (inst mov result 0)))
615 (t (inst sar result 63)
616 (inst and result (lognot fixnum-tag-mask))))))))
618 (define-vop (fast-ash-left/fixnum=>fixnum)
619 (:translate ash)
620 (:args (number :scs (any-reg) :target result
621 :load-if (not (and (sc-is number control-stack)
622 (sc-is result control-stack)
623 (location= number result))))
624 (amount :scs (unsigned-reg) :target ecx))
625 (:arg-types tagged-num positive-fixnum)
626 (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx)
627 (:results (result :scs (any-reg) :from (:argument 0)
628 :load-if (not (and (sc-is number control-stack)
629 (sc-is result control-stack)
630 (location= number result)))))
631 (:result-types tagged-num)
632 (:policy :fast-safe)
633 (:note "inline ASH")
634 (:generator 3
635 (move result number)
636 (move ecx amount)
637 ;; The result-type ensures us that this shift will not overflow.
638 (inst shl result :cl)))
640 (define-vop (fast-ash-c/signed=>signed)
641 (:translate ash)
642 (:policy :fast-safe)
643 (:args (number :scs (signed-reg) :target result
644 :load-if (not (and (sc-is number signed-stack)
645 (sc-is result signed-stack)
646 (location= number result)))))
647 (:info amount)
648 (:arg-types signed-num (:constant integer))
649 (:results (result :scs (signed-reg)
650 :load-if (not (and (sc-is number signed-stack)
651 (sc-is result signed-stack)
652 (location= number result)))))
653 (:result-types signed-num)
654 (:note "inline ASH")
655 (:generator 3
656 (cond ((and (= amount 1) (not (location= number result)))
657 (inst lea result (make-ea :qword :base number :index number)))
658 ((and (= amount 2) (not (location= number result)))
659 (inst lea result (make-ea :qword :index number :scale 4)))
660 ((and (= amount 3) (not (location= number result)))
661 (inst lea result (make-ea :qword :index number :scale 8)))
663 (move result number)
664 (cond ((plusp amount) (inst shl result amount))
665 (t (inst sar result (min 63 (- amount)))))))))
667 (define-vop (fast-ash-c/unsigned=>unsigned)
668 (:translate ash)
669 (:policy :fast-safe)
670 (:args (number :scs (unsigned-reg) :target result
671 :load-if (not (and (sc-is number unsigned-stack)
672 (sc-is result unsigned-stack)
673 (location= number result)))))
674 (:info amount)
675 (:arg-types unsigned-num (:constant integer))
676 (:results (result :scs (unsigned-reg)
677 :load-if (not (and (sc-is number unsigned-stack)
678 (sc-is result unsigned-stack)
679 (location= number result)))))
680 (:result-types unsigned-num)
681 (:note "inline ASH")
682 (:generator 3
683 (cond ((and (= amount 1) (not (location= number result)))
684 (inst lea result (make-ea :qword :base number :index number)))
685 ((and (= amount 2) (not (location= number result)))
686 (inst lea result (make-ea :qword :index number :scale 4)))
687 ((and (= amount 3) (not (location= number result)))
688 (inst lea result (make-ea :qword :index number :scale 8)))
690 (move result number)
691 (cond ((< -64 amount 64) ;; XXXX
692 ;; this code is used both in ASH and ASH-MOD32, so
693 ;; be careful
694 (if (plusp amount)
695 (inst shl result amount)
696 (inst shr result (- amount))))
697 (t (if (sc-is result unsigned-reg)
698 (zeroize result)
699 (inst mov result 0))))))))
701 (define-vop (fast-ash-left/signed=>signed)
702 (:translate ash)
703 (:args (number :scs (signed-reg) :target result
704 :load-if (not (and (sc-is number signed-stack)
705 (sc-is result signed-stack)
706 (location= number result))))
707 (amount :scs (unsigned-reg) :target ecx))
708 (:arg-types signed-num positive-fixnum)
709 (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx)
710 (:results (result :scs (signed-reg) :from (:argument 0)
711 :load-if (not (and (sc-is number signed-stack)
712 (sc-is result signed-stack)
713 (location= number result)))))
714 (:result-types signed-num)
715 (:policy :fast-safe)
716 (:note "inline ASH")
717 (:generator 4
718 (move result number)
719 (move ecx amount)
720 (inst shl result :cl)))
722 (define-vop (fast-ash-left/unsigned=>unsigned)
723 (:translate ash)
724 (:args (number :scs (unsigned-reg) :target result
725 :load-if (not (and (sc-is number unsigned-stack)
726 (sc-is result unsigned-stack)
727 (location= number result))))
728 (amount :scs (unsigned-reg) :target ecx))
729 (:arg-types unsigned-num positive-fixnum)
730 (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx)
731 (:results (result :scs (unsigned-reg) :from (:argument 0)
732 :load-if (not (and (sc-is number unsigned-stack)
733 (sc-is result unsigned-stack)
734 (location= number result)))))
735 (:result-types unsigned-num)
736 (:policy :fast-safe)
737 (:note "inline ASH")
738 (:generator 4
739 (move result number)
740 (move ecx amount)
741 (inst shl result :cl)))
743 (define-vop (fast-ash/signed=>signed)
744 (:translate ash)
745 (:policy :fast-safe)
746 (:args (number :scs (signed-reg) :target result)
747 (amount :scs (signed-reg) :target ecx))
748 (:arg-types signed-num signed-num)
749 (:results (result :scs (signed-reg) :from (:argument 0)))
750 (:result-types signed-num)
751 (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
752 (:note "inline ASH")
753 (:generator 5
754 (move result number)
755 (move ecx amount)
756 (inst or ecx ecx)
757 (inst jmp :ns POSITIVE)
758 (inst neg ecx)
759 (inst cmp ecx 63)
760 (inst jmp :be OKAY)
761 (inst mov ecx 63)
762 OKAY
763 (inst sar result :cl)
764 (inst jmp DONE)
766 POSITIVE
767 ;; The result-type ensures us that this shift will not overflow.
768 (inst shl result :cl)
770 DONE))
772 (define-vop (fast-ash/unsigned=>unsigned)
773 (:translate ash)
774 (:policy :fast-safe)
775 (:args (number :scs (unsigned-reg) :target result)
776 (amount :scs (signed-reg) :target ecx))
777 (:arg-types unsigned-num signed-num)
778 (:results (result :scs (unsigned-reg) :from (:argument 0)))
779 (:result-types unsigned-num)
780 (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
781 (:note "inline ASH")
782 (:generator 5
783 (move result number)
784 (move ecx amount)
785 (inst or ecx ecx)
786 (inst jmp :ns POSITIVE)
787 (inst neg ecx)
788 (inst cmp ecx 63)
789 (inst jmp :be OKAY)
790 (zeroize result)
791 (inst jmp DONE)
792 OKAY
793 (inst shr result :cl)
794 (inst jmp DONE)
796 POSITIVE
797 ;; The result-type ensures us that this shift will not overflow.
798 (inst shl result :cl)
800 DONE))
802 (in-package "SB!C")
804 (defknown %lea (integer integer (member 1 2 4 8 16) (signed-byte 64))
805 integer
806 (foldable flushable movable))
808 (defoptimizer (%lea derive-type) ((base index scale disp))
809 (when (and (constant-lvar-p scale)
810 (constant-lvar-p disp))
811 (let ((scale (lvar-value scale))
812 (disp (lvar-value disp))
813 (base-type (lvar-type base))
814 (index-type (lvar-type index)))
815 (when (and (numeric-type-p base-type)
816 (numeric-type-p index-type))
817 (let ((base-lo (numeric-type-low base-type))
818 (base-hi (numeric-type-high base-type))
819 (index-lo (numeric-type-low index-type))
820 (index-hi (numeric-type-high index-type)))
821 (make-numeric-type :class 'integer
822 :complexp :real
823 :low (when (and base-lo index-lo)
824 (+ base-lo (* index-lo scale) disp))
825 :high (when (and base-hi index-hi)
826 (+ base-hi (* index-hi scale) disp))))))))
828 (defun %lea (base index scale disp)
829 (+ base (* index scale) disp))
831 (in-package "SB!VM")
833 (define-vop (%lea/unsigned=>unsigned)
834 (:translate %lea)
835 (:policy :fast-safe)
836 (:args (base :scs (unsigned-reg))
837 (index :scs (unsigned-reg)))
838 (:info scale disp)
839 (:arg-types unsigned-num unsigned-num
840 (:constant (member 1 2 4 8))
841 (:constant (signed-byte 64)))
842 (:results (r :scs (unsigned-reg)))
843 (:result-types unsigned-num)
844 (:generator 5
845 (inst lea r (make-ea :qword :base base :index index
846 :scale scale :disp disp))))
848 (define-vop (%lea/signed=>signed)
849 (:translate %lea)
850 (:policy :fast-safe)
851 (:args (base :scs (signed-reg))
852 (index :scs (signed-reg)))
853 (:info scale disp)
854 (:arg-types signed-num signed-num
855 (:constant (member 1 2 4 8))
856 (:constant (signed-byte 64)))
857 (:results (r :scs (signed-reg)))
858 (:result-types signed-num)
859 (:generator 4
860 (inst lea r (make-ea :qword :base base :index index
861 :scale scale :disp disp))))
863 (define-vop (%lea/fixnum=>fixnum)
864 (:translate %lea)
865 (:policy :fast-safe)
866 (:args (base :scs (any-reg))
867 (index :scs (any-reg)))
868 (:info scale disp)
869 (:arg-types tagged-num tagged-num
870 (:constant (member 1 2 4 8))
871 (:constant (signed-byte 64)))
872 (:results (r :scs (any-reg)))
873 (:result-types tagged-num)
874 (:generator 3
875 (inst lea r (make-ea :qword :base base :index index
876 :scale scale :disp disp))))
878 ;;; FIXME: before making knowledge of this too public, it needs to be
879 ;;; fixed so that it's actually _faster_ than the non-CMOV version; at
880 ;;; least on my Celeron-XXX laptop, this version is marginally slower
881 ;;; than the above version with branches. -- CSR, 2003-09-04
882 (define-vop (fast-cmov-ash/unsigned=>unsigned)
883 (:translate ash)
884 (:policy :fast-safe)
885 (:args (number :scs (unsigned-reg) :target result)
886 (amount :scs (signed-reg) :target ecx))
887 (:arg-types unsigned-num signed-num)
888 (:results (result :scs (unsigned-reg) :from (:argument 0)))
889 (:result-types unsigned-num)
890 (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
891 (:temporary (:sc any-reg :from (:eval 0) :to (:eval 1)) zero)
892 (:note "inline ASH")
893 (:guard (member :cmov *backend-subfeatures*))
894 (:generator 4
895 (move result number)
896 (move ecx amount)
897 (inst or ecx ecx)
898 (inst jmp :ns POSITIVE)
899 (inst neg ecx)
900 (zeroize zero)
901 (inst shr result :cl)
902 (inst cmp ecx 63)
903 (inst cmov :nbe result zero)
904 (inst jmp DONE)
906 POSITIVE
907 ;; The result-type ensures us that this shift will not overflow.
908 (inst shl result :cl)
910 DONE))
912 (define-vop (signed-byte-64-len)
913 (:translate integer-length)
914 (:note "inline (signed-byte 64) integer-length")
915 (:policy :fast-safe)
916 (:args (arg :scs (signed-reg) :target res))
917 (:arg-types signed-num)
918 (:results (res :scs (unsigned-reg)))
919 (:result-types unsigned-num)
920 (:generator 28
921 (move res arg)
922 (inst cmp res 0)
923 (inst jmp :ge POS)
924 (inst not res)
926 (inst bsr res res)
927 (inst jmp :z ZERO)
928 (inst inc res)
929 (inst jmp DONE)
930 ZERO
931 (zeroize res)
932 DONE))
934 (define-vop (unsigned-byte-64-len)
935 (:translate integer-length)
936 (:note "inline (unsigned-byte 64) integer-length")
937 (:policy :fast-safe)
938 (:args (arg :scs (unsigned-reg)))
939 (:arg-types unsigned-num)
940 (:results (res :scs (unsigned-reg)))
941 (:result-types unsigned-num)
942 (:generator 26
943 (inst bsr res arg)
944 (inst jmp :z ZERO)
945 (inst inc res)
946 (inst jmp DONE)
947 ZERO
948 (zeroize res)
949 DONE))
951 (define-vop (unsigned-byte-64-count)
952 (:translate logcount)
953 (:note "inline (unsigned-byte 64) logcount")
954 (:policy :fast-safe)
955 (:args (arg :scs (unsigned-reg) :target result))
956 (:arg-types unsigned-num)
957 (:results (result :scs (unsigned-reg)))
958 (:result-types positive-fixnum)
959 (:temporary (:sc unsigned-reg) temp)
960 (:temporary (:sc unsigned-reg) mask)
961 (:generator 14
962 ;; See the comments below for how the algorithm works. The tricks
963 ;; used can be found for example in AMD's software optimization
964 ;; guide or at "http://www.hackersdelight.org/HDcode/pop.cc" in the
965 ;; function "pop1", for 32-bit words. The extension to 64 bits is
966 ;; straightforward.
967 ;; Calculate 2-bit sums. Note that the value of a two-digit binary
968 ;; number is the sum of the right digit and twice the left digit.
969 ;; Thus we can calculate the sum of the two digits by shifting the
970 ;; left digit to the right position and doing a two-bit subtraction.
971 ;; This subtraction will never create a borrow and thus can be made
972 ;; on all 32 2-digit numbers at once.
973 (move result arg)
974 (move temp arg)
975 (inst shr result 1)
976 (inst mov mask #x5555555555555555)
977 (inst and result mask)
978 (inst sub temp result)
979 ;; Calculate 4-bit sums by straightforward shift, mask and add.
980 ;; Note that we shift the source operand of the MOV and not its
981 ;; destination so that the SHR and the MOV can execute in the same
982 ;; clock cycle.
983 (inst mov result temp)
984 (inst shr temp 2)
985 (inst mov mask #x3333333333333333)
986 (inst and result mask)
987 (inst and temp mask)
988 (inst add result temp)
989 ;; Calculate 8-bit sums. Since each sum is at most 8, which fits
990 ;; into 4 bits, we can apply the mask after the addition, saving one
991 ;; instruction.
992 (inst mov temp result)
993 (inst shr result 4)
994 (inst add result temp)
995 (inst mov mask #x0f0f0f0f0f0f0f0f)
996 (inst and result mask)
997 ;; Add all 8 bytes at once by multiplying with #256r11111111.
998 ;; We need to calculate only the lower 8 bytes of the product.
999 ;; Of these the most significant byte contains the final result.
1000 ;; Note that there can be no overflow from one byte to the next
1001 ;; as the sum is at most 64 which needs only 7 bits.
1002 (inst mov mask #x0101010101010101)
1003 (inst imul result mask)
1004 (inst shr result 56)))
1006 ;;;; binary conditional VOPs
1008 (define-vop (fast-conditional)
1009 (:conditional)
1010 (:info target not-p)
1011 (:effects)
1012 (:affected)
1013 (:policy :fast-safe))
1015 ;;; constant variants are declared for 32 bits not 64 bits, because
1016 ;;; loading a 64 bit constant is silly
1018 (define-vop (fast-conditional/fixnum fast-conditional)
1019 (:args (x :scs (any-reg)
1020 :load-if (not (and (sc-is x control-stack)
1021 (sc-is y any-reg))))
1022 (y :scs (any-reg control-stack)))
1023 (:arg-types tagged-num tagged-num)
1024 (:note "inline fixnum comparison"))
1026 (define-vop (fast-conditional-c/fixnum fast-conditional/fixnum)
1027 (:args (x :scs (any-reg control-stack)))
1028 (:arg-types tagged-num (:constant (signed-byte 29)))
1029 (:info target not-p y))
1031 (define-vop (fast-conditional/signed fast-conditional)
1032 (:args (x :scs (signed-reg)
1033 :load-if (not (and (sc-is x signed-stack)
1034 (sc-is y signed-reg))))
1035 (y :scs (signed-reg signed-stack)))
1036 (:arg-types signed-num signed-num)
1037 (:note "inline (signed-byte 64) comparison"))
1039 (define-vop (fast-conditional-c/signed fast-conditional/signed)
1040 (:args (x :scs (signed-reg signed-stack)))
1041 (:arg-types signed-num (:constant (signed-byte 31)))
1042 (:info target not-p y))
1044 (define-vop (fast-conditional/unsigned fast-conditional)
1045 (:args (x :scs (unsigned-reg)
1046 :load-if (not (and (sc-is x unsigned-stack)
1047 (sc-is y unsigned-reg))))
1048 (y :scs (unsigned-reg unsigned-stack)))
1049 (:arg-types unsigned-num unsigned-num)
1050 (:note "inline (unsigned-byte 64) comparison"))
1052 (define-vop (fast-conditional-c/unsigned fast-conditional/unsigned)
1053 (:args (x :scs (unsigned-reg unsigned-stack)))
1054 (:arg-types unsigned-num (:constant (unsigned-byte 31)))
1055 (:info target not-p y))
1057 (macrolet ((define-conditional-vop (tran cond unsigned not-cond not-unsigned)
1058 `(progn
1059 ,@(mapcar
1060 (lambda (suffix cost signed)
1061 `(define-vop (;; FIXME: These could be done more
1062 ;; cleanly with SYMBOLICATE.
1063 ,(intern (format nil "~:@(FAST-IF-~A~A~)"
1064 tran suffix))
1065 ,(intern
1066 (format nil "~:@(FAST-CONDITIONAL~A~)"
1067 suffix)))
1068 (:translate ,tran)
1069 (:generator ,cost
1070 (inst cmp x
1071 ,(if (eq suffix '-c/fixnum)
1072 '(fixnumize y)
1073 'y))
1074 (inst jmp (if not-p
1075 ,(if signed
1076 not-cond
1077 not-unsigned)
1078 ,(if signed
1079 cond
1080 unsigned))
1081 target))))
1082 '(/fixnum -c/fixnum /signed -c/signed /unsigned -c/unsigned)
1083 ; '(/fixnum /signed /unsigned)
1084 '(4 3 6 5 6 5)
1085 '(t t t t nil nil)))))
1087 (define-conditional-vop < :l :b :ge :ae)
1088 (define-conditional-vop > :g :a :le :be))
1090 (define-vop (fast-if-eql/signed fast-conditional/signed)
1091 (:translate eql)
1092 (:generator 6
1093 (inst cmp x y)
1094 (inst jmp (if not-p :ne :e) target)))
1096 (define-vop (fast-if-eql-c/signed fast-conditional-c/signed)
1097 (:translate eql)
1098 (:generator 5
1099 (cond ((and (sc-is x signed-reg) (zerop y))
1100 (inst test x x)) ; smaller instruction
1102 (inst cmp x y)))
1103 (inst jmp (if not-p :ne :e) target)))
1105 (define-vop (fast-if-eql/unsigned fast-conditional/unsigned)
1106 (:translate eql)
1107 (:generator 6
1108 (inst cmp x y)
1109 (inst jmp (if not-p :ne :e) target)))
1111 (define-vop (fast-if-eql-c/unsigned fast-conditional-c/unsigned)
1112 (:translate eql)
1113 (:generator 5
1114 (cond ((and (sc-is x unsigned-reg) (zerop y))
1115 (inst test x x)) ; smaller instruction
1117 (inst cmp x y)))
1118 (inst jmp (if not-p :ne :e) target)))
1120 ;;; EQL/FIXNUM is funny because the first arg can be of any type, not just a
1121 ;;; known fixnum.
1123 ;;; These versions specify a fixnum restriction on their first arg. We have
1124 ;;; also generic-eql/fixnum VOPs which are the same, but have no restriction on
1125 ;;; the first arg and a higher cost. The reason for doing this is to prevent
1126 ;;; fixnum specific operations from being used on word integers, spuriously
1127 ;;; consing the argument.
1129 (define-vop (fast-eql/fixnum fast-conditional)
1130 (:args (x :scs (any-reg)
1131 :load-if (not (and (sc-is x control-stack)
1132 (sc-is y any-reg))))
1133 (y :scs (any-reg control-stack)))
1134 (:arg-types tagged-num tagged-num)
1135 (:note "inline fixnum comparison")
1136 (:translate eql)
1137 (:generator 4
1138 (inst cmp x y)
1139 (inst jmp (if not-p :ne :e) target)))
1140 (define-vop (generic-eql/fixnum fast-eql/fixnum)
1141 (:args (x :scs (any-reg descriptor-reg)
1142 :load-if (not (and (sc-is x control-stack)
1143 (sc-is y any-reg))))
1144 (y :scs (any-reg control-stack)))
1145 (:arg-types * tagged-num)
1146 (:variant-cost 7))
1149 (define-vop (fast-eql-c/fixnum fast-conditional/fixnum)
1150 (:args (x :scs (any-reg control-stack)))
1151 (:arg-types tagged-num (:constant (signed-byte 29)))
1152 (:info target not-p y)
1153 (:translate eql)
1154 (:generator 2
1155 (cond ((and (sc-is x any-reg) (zerop y))
1156 (inst test x x)) ; smaller instruction
1158 (inst cmp x (fixnumize y))))
1159 (inst jmp (if not-p :ne :e) target)))
1161 (define-vop (generic-eql-c/fixnum fast-eql-c/fixnum)
1162 (:args (x :scs (any-reg descriptor-reg control-stack)))
1163 (:arg-types * (:constant (signed-byte 29)))
1164 (:variant-cost 6))
1166 ;;;; 32-bit logical operations
1168 (define-vop (merge-bits)
1169 (:translate merge-bits)
1170 (:args (shift :scs (signed-reg unsigned-reg) :target ecx)
1171 (prev :scs (unsigned-reg) :target result)
1172 (next :scs (unsigned-reg)))
1173 (:arg-types tagged-num unsigned-num unsigned-num)
1174 (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 0)) ecx)
1175 (:results (result :scs (unsigned-reg) :from (:argument 1)))
1176 (:result-types unsigned-num)
1177 (:policy :fast-safe)
1178 (:generator 4
1179 (move ecx shift)
1180 (move result prev)
1181 (inst shrd result next :cl)))
1183 ;;; Only the lower 6 bits of the shift amount are significant.
1184 (define-vop (shift-towards-someplace)
1185 (:policy :fast-safe)
1186 (:args (num :scs (unsigned-reg) :target r)
1187 (amount :scs (signed-reg) :target ecx))
1188 (:arg-types unsigned-num tagged-num)
1189 (:temporary (:sc signed-reg :offset ecx-offset :from (:argument 1)) ecx)
1190 (:results (r :scs (unsigned-reg) :from (:argument 0)))
1191 (:result-types unsigned-num))
1193 (define-vop (shift-towards-start shift-towards-someplace)
1194 (:translate shift-towards-start)
1195 (:note "SHIFT-TOWARDS-START")
1196 (:generator 1
1197 (move r num)
1198 (move ecx amount)
1199 (inst shr r :cl)))
1201 (define-vop (shift-towards-end shift-towards-someplace)
1202 (:translate shift-towards-end)
1203 (:note "SHIFT-TOWARDS-END")
1204 (:generator 1
1205 (move r num)
1206 (move ecx amount)
1207 (inst shl r :cl)))
1209 ;;;; Modular functions
1211 (defmacro define-mod-binop ((name prototype) function)
1212 `(define-vop (,name ,prototype)
1213 (:args (x :target r :scs (unsigned-reg signed-reg)
1214 :load-if (not (and (or (sc-is x unsigned-stack)
1215 (sc-is x signed-stack))
1216 (or (sc-is y unsigned-reg)
1217 (sc-is y signed-reg))
1218 (or (sc-is r unsigned-stack)
1219 (sc-is r signed-stack))
1220 (location= x r))))
1221 (y :scs (unsigned-reg signed-reg unsigned-stack signed-stack)))
1222 (:arg-types untagged-num untagged-num)
1223 (:results (r :scs (unsigned-reg signed-reg) :from (:argument 0)
1224 :load-if (not (and (or (sc-is x unsigned-stack)
1225 (sc-is x signed-stack))
1226 (or (sc-is y unsigned-reg)
1227 (sc-is y unsigned-reg))
1228 (or (sc-is r unsigned-stack)
1229 (sc-is r unsigned-stack))
1230 (location= x r)))))
1231 (:result-types unsigned-num)
1232 (:translate ,function)))
1233 (defmacro define-mod-binop-c ((name prototype) function)
1234 `(define-vop (,name ,prototype)
1235 (:args (x :target r :scs (unsigned-reg signed-reg)
1236 :load-if (not (and (or (sc-is x unsigned-stack)
1237 (sc-is x signed-stack))
1238 (or (sc-is r unsigned-stack)
1239 (sc-is r signed-stack))
1240 (location= x r)))))
1241 (:info y)
1242 (:arg-types untagged-num (:constant (or (unsigned-byte 31) (signed-byte 32))))
1243 (:results (r :scs (unsigned-reg signed-reg) :from (:argument 0)
1244 :load-if (not (and (or (sc-is x unsigned-stack)
1245 (sc-is x signed-stack))
1246 (or (sc-is r unsigned-stack)
1247 (sc-is r unsigned-stack))
1248 (location= x r)))))
1249 (:result-types unsigned-num)
1250 (:translate ,function)))
1252 (macrolet ((def (name -c-p)
1253 (let ((fun64 (intern (format nil "~S-MOD64" name)))
1254 (vopu (intern (format nil "FAST-~S/UNSIGNED=>UNSIGNED" name)))
1255 (vopcu (intern (format nil "FAST-~S-C/UNSIGNED=>UNSIGNED" name)))
1256 (vopf (intern (format nil "FAST-~S/FIXNUM=>FIXNUM" name)))
1257 (vopcf (intern (format nil "FAST-~S-C/FIXNUM=>FIXNUM" name)))
1258 (vop64u (intern (format nil "FAST-~S-MOD64/WORD=>UNSIGNED" name)))
1259 (vop64f (intern (format nil "FAST-~S-MOD64/FIXNUM=>FIXNUM" name)))
1260 (vop64cu (intern (format nil "FAST-~S-MOD64-C/WORD=>UNSIGNED" name)))
1261 (vop64cf (intern (format nil "FAST-~S-MOD64-C/FIXNUM=>FIXNUM" name)))
1262 (sfun61 (intern (format nil "~S-SMOD61" name)))
1263 (svop61f (intern (format nil "FAST-~S-SMOD61/FIXNUM=>FIXNUM" name)))
1264 (svop61cf (intern (format nil "FAST-~S-SMOD61-C/FIXNUM=>FIXNUM" name))))
1265 `(progn
1266 (define-modular-fun ,fun64 (x y) ,name :unsigned 64)
1267 (define-modular-fun ,sfun61 (x y) ,name :signed 61)
1268 (define-mod-binop (,vop64u ,vopu) ,fun64)
1269 (define-vop (,vop64f ,vopf) (:translate ,fun64))
1270 (define-vop (,svop61f ,vopf) (:translate ,sfun61))
1271 ,@(when -c-p
1272 `((define-mod-binop-c (,vop64cu ,vopcu) ,fun64)
1273 (define-vop (,svop61cf ,vopcf) (:translate ,sfun61))))))))
1274 (def + t)
1275 (def - t)
1276 ;; (no -C variant as x86 MUL instruction doesn't take an immediate)
1277 (def * nil))
1279 (define-vop (fast-ash-left-mod64-c/unsigned=>unsigned
1280 fast-ash-c/unsigned=>unsigned)
1281 (:translate ash-left-mod64))
1282 (define-vop (fast-ash-left-mod64/unsigned=>unsigned
1283 fast-ash-left/unsigned=>unsigned))
1284 (deftransform ash-left-mod64 ((integer count)
1285 ((unsigned-byte 64) (unsigned-byte 6)))
1286 (when (sb!c::constant-lvar-p count)
1287 (sb!c::give-up-ir1-transform))
1288 '(%primitive fast-ash-left-mod64/unsigned=>unsigned integer count))
1290 (define-vop (fast-ash-left-smod61-c/fixnum=>fixnum
1291 fast-ash-c/fixnum=>fixnum)
1292 (:translate ash-left-smod61))
1293 (define-vop (fast-ash-left-smod61/fixnum=>fixnum
1294 fast-ash-left/fixnum=>fixnum))
1295 (deftransform ash-left-smod61 ((integer count)
1296 ((signed-byte 61) (unsigned-byte 6)))
1297 (when (sb!c::constant-lvar-p count)
1298 (sb!c::give-up-ir1-transform))
1299 '(%primitive fast-ash-left-smod61/fixnum=>fixnum integer count))
1301 (in-package "SB!C")
1303 (defknown sb!vm::%lea-mod64 (integer integer (member 1 2 4 8) (signed-byte 64))
1304 (unsigned-byte 64)
1305 (foldable flushable movable))
1306 (defknown sb!vm::%lea-smod61 (integer integer (member 1 2 4 8) (signed-byte 64))
1307 (signed-byte 61)
1308 (foldable flushable movable))
1310 (define-modular-fun-optimizer %lea ((base index scale disp) :unsigned :width width)
1311 (when (and (<= width 64)
1312 (constant-lvar-p scale)
1313 (constant-lvar-p disp))
1314 (cut-to-width base :unsigned width)
1315 (cut-to-width index :unsigned width)
1316 'sb!vm::%lea-mod64))
1317 (define-modular-fun-optimizer %lea ((base index scale disp) :signed :width width)
1318 (when (and (<= width 61)
1319 (constant-lvar-p scale)
1320 (constant-lvar-p disp))
1321 (cut-to-width base :signed width)
1322 (cut-to-width index :signed width)
1323 'sb!vm::%lea-smod61))
1325 #+sb-xc-host
1326 (progn
1327 (defun sb!vm::%lea-mod64 (base index scale disp)
1328 (ldb (byte 64 0) (%lea base index scale disp)))
1329 (defun sb!vm::%lea-smod61 (base index scale disp)
1330 (mask-signed-field 61 (%lea base index scale disp))))
1331 #-sb-xc-host
1332 (progn
1333 (defun sb!vm::%lea-mod64 (base index scale disp)
1334 (let ((base (logand base #xffffffffffffffff))
1335 (index (logand index #xffffffffffffffff)))
1336 ;; can't use modular version of %LEA, as we only have VOPs for
1337 ;; constant SCALE and DISP.
1338 (ldb (byte 64 0) (+ base (* index scale) disp))))
1339 (defun sb!vm::%lea-smod61 (base index scale disp)
1340 (let ((base (mask-signed-field 61 base))
1341 (index (mask-signed-field 61 index)))
1342 ;; can't use modular version of %LEA, as we only have VOPs for
1343 ;; constant SCALE and DISP.
1344 (mask-signed-field 61 (+ base (* index scale) disp)))))
1346 (in-package "SB!VM")
1348 (define-vop (%lea-mod64/unsigned=>unsigned
1349 %lea/unsigned=>unsigned)
1350 (:translate %lea-mod64))
1351 (define-vop (%lea-smod61/fixnum=>fixnum
1352 %lea/fixnum=>fixnum)
1353 (:translate %lea-smod61))
1355 ;;; logical operations
1356 (define-modular-fun lognot-mod64 (x) lognot :unsigned 64)
1357 (define-vop (lognot-mod64/unsigned=>unsigned)
1358 (:translate lognot-mod64)
1359 (:args (x :scs (unsigned-reg unsigned-stack) :target r
1360 :load-if (not (and (sc-is x unsigned-stack)
1361 (sc-is r unsigned-stack)
1362 (location= x r)))))
1363 (:arg-types unsigned-num)
1364 (:results (r :scs (unsigned-reg)
1365 :load-if (not (and (sc-is x unsigned-stack)
1366 (sc-is r unsigned-stack)
1367 (location= x r)))))
1368 (:result-types unsigned-num)
1369 (:policy :fast-safe)
1370 (:generator 1
1371 (move r x)
1372 (inst not r)))
1374 (define-modular-fun logxor-mod64 (x y) logxor :unsigned 64)
1375 (define-mod-binop (fast-logxor-mod64/word=>unsigned
1376 fast-logxor/unsigned=>unsigned)
1377 logxor-mod64)
1378 (define-mod-binop-c (fast-logxor-mod64-c/word=>unsigned
1379 fast-logxor-c/unsigned=>unsigned)
1380 logxor-mod64)
1381 (define-vop (fast-logxor-mod64/fixnum=>fixnum
1382 fast-logxor/fixnum=>fixnum)
1383 (:translate logxor-mod64))
1384 (define-vop (fast-logxor-mod64-c/fixnum=>fixnum
1385 fast-logxor-c/fixnum=>fixnum)
1386 (:translate logxor-mod64))
1388 (define-source-transform logeqv (&rest args)
1389 (if (oddp (length args))
1390 `(logxor ,@args)
1391 `(lognot (logxor ,@args))))
1392 (define-source-transform logandc1 (x y)
1393 `(logand (lognot ,x) ,y))
1394 (define-source-transform logandc2 (x y)
1395 `(logand ,x (lognot ,y)))
1396 (define-source-transform logorc1 (x y)
1397 `(logior (lognot ,x) ,y))
1398 (define-source-transform logorc2 (x y)
1399 `(logior ,x (lognot ,y)))
1400 (define-source-transform lognor (x y)
1401 `(lognot (logior ,x ,y)))
1402 (define-source-transform lognand (x y)
1403 `(lognot (logand ,x ,y)))
1405 ;;;; bignum stuff
1407 (define-vop (bignum-length get-header-data)
1408 (:translate sb!bignum:%bignum-length)
1409 (:policy :fast-safe))
1411 (define-vop (bignum-set-length set-header-data)
1412 (:translate sb!bignum:%bignum-set-length)
1413 (:policy :fast-safe))
1415 (define-full-reffer bignum-ref * bignum-digits-offset other-pointer-lowtag
1416 (unsigned-reg) unsigned-num sb!bignum:%bignum-ref)
1417 (define-full-reffer+offset bignum--ref-with-offset * bignum-digits-offset
1418 other-pointer-lowtag (unsigned-reg) unsigned-num
1419 sb!bignum:%bignum-ref-with-offset)
1420 (define-full-setter bignum-set * bignum-digits-offset other-pointer-lowtag
1421 (unsigned-reg) unsigned-num sb!bignum:%bignum-set)
1423 (define-vop (digit-0-or-plus)
1424 (:translate sb!bignum:%digit-0-or-plusp)
1425 (:policy :fast-safe)
1426 (:args (digit :scs (unsigned-reg)))
1427 (:arg-types unsigned-num)
1428 (:conditional)
1429 (:info target not-p)
1430 (:generator 3
1431 (inst or digit digit)
1432 (inst jmp (if not-p :s :ns) target)))
1435 ;;; For add and sub with carry the sc of carry argument is any-reg so
1436 ;;; that it may be passed as a fixnum or word and thus may be 0, 1, or
1437 ;;; 8. This is easy to deal with and may save a fixnum-word
1438 ;;; conversion.
1439 (define-vop (add-w/carry)
1440 (:translate sb!bignum:%add-with-carry)
1441 (:policy :fast-safe)
1442 (:args (a :scs (unsigned-reg) :target result)
1443 (b :scs (unsigned-reg unsigned-stack) :to :eval)
1444 (c :scs (any-reg) :target temp))
1445 (:arg-types unsigned-num unsigned-num positive-fixnum)
1446 (:temporary (:sc any-reg :from (:argument 2) :to :eval) temp)
1447 (:results (result :scs (unsigned-reg) :from (:argument 0))
1448 (carry :scs (unsigned-reg)))
1449 (:result-types unsigned-num positive-fixnum)
1450 (:generator 4
1451 (move result a)
1452 (move temp c)
1453 (inst neg temp) ; Set the carry flag to 0 if c=0 else to 1
1454 (inst adc result b)
1455 (inst mov carry 0)
1456 (inst adc carry carry)))
1458 ;;; Note: the borrow is 1 for no borrow and 0 for a borrow, the opposite
1459 ;;; of the x86-64 convention.
1460 (define-vop (sub-w/borrow)
1461 (:translate sb!bignum:%subtract-with-borrow)
1462 (:policy :fast-safe)
1463 (:args (a :scs (unsigned-reg) :to :eval :target result)
1464 (b :scs (unsigned-reg unsigned-stack) :to :result)
1465 (c :scs (any-reg control-stack)))
1466 (:arg-types unsigned-num unsigned-num positive-fixnum)
1467 (:results (result :scs (unsigned-reg) :from :eval)
1468 (borrow :scs (unsigned-reg)))
1469 (:result-types unsigned-num positive-fixnum)
1470 (:generator 5
1471 (inst cmp c 1) ; Set the carry flag to 1 if c=0 else to 0
1472 (move result a)
1473 (inst sbb result b)
1474 (inst mov borrow 1)
1475 (inst sbb borrow 0)))
1478 (define-vop (bignum-mult-and-add-3-arg)
1479 (:translate sb!bignum:%multiply-and-add)
1480 (:policy :fast-safe)
1481 (:args (x :scs (unsigned-reg) :target eax)
1482 (y :scs (unsigned-reg unsigned-stack))
1483 (carry-in :scs (unsigned-reg unsigned-stack)))
1484 (:arg-types unsigned-num unsigned-num unsigned-num)
1485 (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)
1486 :to (:result 1) :target lo) eax)
1487 (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1)
1488 :to (:result 0) :target hi) edx)
1489 (:results (hi :scs (unsigned-reg))
1490 (lo :scs (unsigned-reg)))
1491 (:result-types unsigned-num unsigned-num)
1492 (:generator 20
1493 (move eax x)
1494 (inst mul eax y)
1495 (inst add eax carry-in)
1496 (inst adc edx 0)
1497 (move hi edx)
1498 (move lo eax)))
1500 (define-vop (bignum-mult-and-add-4-arg)
1501 (:translate sb!bignum:%multiply-and-add)
1502 (:policy :fast-safe)
1503 (:args (x :scs (unsigned-reg) :target eax)
1504 (y :scs (unsigned-reg unsigned-stack))
1505 (prev :scs (unsigned-reg unsigned-stack))
1506 (carry-in :scs (unsigned-reg unsigned-stack)))
1507 (:arg-types unsigned-num unsigned-num unsigned-num unsigned-num)
1508 (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)
1509 :to (:result 1) :target lo) eax)
1510 (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1)
1511 :to (:result 0) :target hi) edx)
1512 (:results (hi :scs (unsigned-reg))
1513 (lo :scs (unsigned-reg)))
1514 (:result-types unsigned-num unsigned-num)
1515 (:generator 20
1516 (move eax x)
1517 (inst mul eax y)
1518 (inst add eax prev)
1519 (inst adc edx 0)
1520 (inst add eax carry-in)
1521 (inst adc edx 0)
1522 (move hi edx)
1523 (move lo eax)))
1526 (define-vop (bignum-mult)
1527 (:translate sb!bignum:%multiply)
1528 (:policy :fast-safe)
1529 (:args (x :scs (unsigned-reg) :target eax)
1530 (y :scs (unsigned-reg unsigned-stack)))
1531 (:arg-types unsigned-num unsigned-num)
1532 (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)
1533 :to (:result 1) :target lo) eax)
1534 (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 1)
1535 :to (:result 0) :target hi) edx)
1536 (:results (hi :scs (unsigned-reg))
1537 (lo :scs (unsigned-reg)))
1538 (:result-types unsigned-num unsigned-num)
1539 (:generator 20
1540 (move eax x)
1541 (inst mul eax y)
1542 (move hi edx)
1543 (move lo eax)))
1545 (define-vop (bignum-lognot lognot-mod64/unsigned=>unsigned)
1546 (:translate sb!bignum:%lognot))
1548 (define-vop (fixnum-to-digit)
1549 (:translate sb!bignum:%fixnum-to-digit)
1550 (:policy :fast-safe)
1551 (:args (fixnum :scs (any-reg control-stack) :target digit))
1552 (:arg-types tagged-num)
1553 (:results (digit :scs (unsigned-reg)
1554 :load-if (not (and (sc-is fixnum control-stack)
1555 (sc-is digit unsigned-stack)
1556 (location= fixnum digit)))))
1557 (:result-types unsigned-num)
1558 (:generator 1
1559 (move digit fixnum)
1560 (inst sar digit 3)))
1562 (define-vop (bignum-floor)
1563 (:translate sb!bignum:%floor)
1564 (:policy :fast-safe)
1565 (:args (div-high :scs (unsigned-reg) :target edx)
1566 (div-low :scs (unsigned-reg) :target eax)
1567 (divisor :scs (unsigned-reg unsigned-stack)))
1568 (:arg-types unsigned-num unsigned-num unsigned-num)
1569 (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 1)
1570 :to (:result 0) :target quo) eax)
1571 (:temporary (:sc unsigned-reg :offset edx-offset :from (:argument 0)
1572 :to (:result 1) :target rem) edx)
1573 (:results (quo :scs (unsigned-reg))
1574 (rem :scs (unsigned-reg)))
1575 (:result-types unsigned-num unsigned-num)
1576 (:generator 300
1577 (move edx div-high)
1578 (move eax div-low)
1579 (inst div eax divisor)
1580 (move quo eax)
1581 (move rem edx)))
1583 (define-vop (signify-digit)
1584 (:translate sb!bignum:%fixnum-digit-with-correct-sign)
1585 (:policy :fast-safe)
1586 (:args (digit :scs (unsigned-reg unsigned-stack) :target res))
1587 (:arg-types unsigned-num)
1588 (:results (res :scs (any-reg signed-reg)
1589 :load-if (not (and (sc-is digit unsigned-stack)
1590 (sc-is res control-stack signed-stack)
1591 (location= digit res)))))
1592 (:result-types signed-num)
1593 (:generator 1
1594 (move res digit)
1595 (when (sc-is res any-reg control-stack)
1596 (inst shl res 3))))
1598 (define-vop (digit-ashr)
1599 (:translate sb!bignum:%ashr)
1600 (:policy :fast-safe)
1601 (:args (digit :scs (unsigned-reg unsigned-stack) :target result)
1602 (count :scs (unsigned-reg) :target ecx))
1603 (:arg-types unsigned-num positive-fixnum)
1604 (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) ecx)
1605 (:results (result :scs (unsigned-reg) :from (:argument 0)
1606 :load-if (not (and (sc-is result unsigned-stack)
1607 (location= digit result)))))
1608 (:result-types unsigned-num)
1609 (:generator 2
1610 (move result digit)
1611 (move ecx count)
1612 (inst sar result :cl)))
1614 (define-vop (digit-ashr/c)
1615 (:translate sb!bignum:%ashr)
1616 (:policy :fast-safe)
1617 (:args (digit :scs (unsigned-reg unsigned-stack) :target result))
1618 (:arg-types unsigned-num (:constant (integer 0 63)))
1619 (:info count)
1620 (:results (result :scs (unsigned-reg) :from (:argument 0)
1621 :load-if (not (and (sc-is result unsigned-stack)
1622 (location= digit result)))))
1623 (:result-types unsigned-num)
1624 (:generator 1
1625 (move result digit)
1626 (inst sar result count)))
1628 (define-vop (digit-lshr digit-ashr)
1629 (:translate sb!bignum:%digit-logical-shift-right)
1630 (:generator 1
1631 (move result digit)
1632 (move ecx count)
1633 (inst shr result :cl)))
1635 (define-vop (digit-ashl digit-ashr)
1636 (:translate sb!bignum:%ashl)
1637 (:generator 1
1638 (move result digit)
1639 (move ecx count)
1640 (inst shl result :cl)))
1642 ;;;; static functions
1644 (define-static-fun two-arg-/ (x y) :translate /)
1646 (define-static-fun two-arg-gcd (x y) :translate gcd)
1647 (define-static-fun two-arg-lcm (x y) :translate lcm)
1649 (define-static-fun two-arg-and (x y) :translate logand)
1650 (define-static-fun two-arg-ior (x y) :translate logior)
1651 (define-static-fun two-arg-xor (x y) :translate logxor)
1654 (in-package "SB!C")
1656 (defun *-transformer (y)
1657 (cond
1658 ((= y (ash 1 (integer-length y)))
1659 ;; there's a generic transform for y = 2^k
1660 (give-up-ir1-transform))
1661 ((member y '(3 5 9))
1662 ;; we can do these multiplications directly using LEA
1663 `(%lea x x ,(1- y) 0))
1665 ;; A normal 64-bit multiplication takes 4 cycles on Athlon 64/Opteron.
1666 ;; Optimizing multiplications (other than the above cases) to
1667 ;; shifts/adds/leas gives a maximum improvement of 1 cycle, but requires
1668 ;; quite a lot of hairy code.
1669 (give-up-ir1-transform))))
1671 (deftransform * ((x y)
1672 ((unsigned-byte 64) (constant-arg (unsigned-byte 64)))
1673 (unsigned-byte 64))
1674 "recode as leas, shifts and adds"
1675 (let ((y (lvar-value y)))
1676 (*-transformer y)))
1677 (deftransform sb!vm::*-mod64
1678 ((x y) ((unsigned-byte 64) (constant-arg (unsigned-byte 64)))
1679 (unsigned-byte 64))
1680 "recode as leas, shifts and adds"
1681 (let ((y (lvar-value y)))
1682 (*-transformer y)))
1684 (deftransform * ((x y)
1685 ((signed-byte 61) (constant-arg (unsigned-byte 64)))
1686 (signed-byte 61))
1687 "recode as leas, shifts and adds"
1688 (let ((y (lvar-value y)))
1689 (*-transformer y)))
1690 (deftransform sb!vm::*-smod61
1691 ((x y) ((signed-byte 61) (constant-arg (unsigned-byte 64)))
1692 (signed-byte 61))
1693 "recode as leas, shifts and adds"
1694 (let ((y (lvar-value y)))
1695 (*-transformer y)))