SSE intrinsics
[sbcl/pkhuong.git] / src / compiler / generic / vm-tran.lisp
blob22f075a2343fa342d5acba806c62734271c9f188
1 ;;;; implementation-dependent transforms
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!C")
14 ;;; We need to define these predicates, since the TYPEP source
15 ;;; transform picks whichever predicate was defined last when there
16 ;;; are multiple predicates for equivalent types.
17 (define-source-transform short-float-p (x) `(single-float-p ,x))
18 #!-long-float
19 (define-source-transform long-float-p (x) `(double-float-p ,x))
21 (define-source-transform compiled-function-p (x)
22 #!-sb-eval
23 `(functionp ,x)
24 #!+sb-eval
25 (once-only ((x x))
26 `(and (functionp ,x)
27 (not (sb!eval:interpreted-function-p ,x)))))
29 (define-source-transform char-int (x)
30 `(char-code ,x))
32 (deftransform abs ((x) (rational))
33 '(if (< x 0) (- x) x))
35 ;;; We don't want to clutter the bignum code.
36 #!+(or x86 x86-64)
37 (define-source-transform sb!bignum:%bignum-ref (bignum index)
38 ;; KLUDGE: We use TRULY-THE here because even though the bignum code
39 ;; is (currently) compiled with (SAFETY 0), the compiler insists on
40 ;; inserting CAST nodes to ensure that INDEX is of the correct type.
41 ;; These CAST nodes do not generate any type checks, but they do
42 ;; interfere with the operation of FOLD-INDEX-ADDRESSING, below.
43 ;; This scenario is a problem for the more user-visible case of
44 ;; folding as well. --njf, 2006-12-01
45 `(sb!bignum:%bignum-ref-with-offset ,bignum
46 (truly-the bignum-index ,index) 0))
48 #!+(or x86 x86-64)
49 (defun fold-index-addressing (fun-name element-size lowtag data-offset
50 index offset &optional setter-p)
51 (multiple-value-bind (func index-args) (extract-fun-args index '(+ -) 2)
52 (destructuring-bind (x constant) index-args
53 (declare (ignorable x))
54 (unless (constant-lvar-p constant)
55 (give-up-ir1-transform))
56 (let ((value (lvar-value constant)))
57 (unless (and (integerp value)
58 (sb!vm::foldable-constant-offset-p
59 element-size lowtag data-offset
60 (funcall func value (lvar-value offset))))
61 (give-up-ir1-transform "constant is too large for inlining"))
62 (splice-fun-args index func 2)
63 `(lambda (thing index off1 off2 ,@(when setter-p
64 '(value)))
65 (,fun-name thing index (,func off2 off1) ,@(when setter-p
66 '(value))))))))
68 #!+(or x86 x86-64)
69 (deftransform sb!bignum:%bignum-ref-with-offset
70 ((bignum index offset) * * :node node)
71 (fold-index-addressing 'sb!bignum:%bignum-ref-with-offset
72 sb!vm:n-word-bits sb!vm:other-pointer-lowtag
73 sb!vm:bignum-digits-offset
74 index offset))
76 ;;; The layout is stored in slot 0.
77 (define-source-transform %instance-layout (x)
78 `(truly-the layout (%instance-ref ,x 0)))
79 (define-source-transform %set-instance-layout (x val)
80 `(%instance-set ,x 0 (the layout ,val)))
81 (define-source-transform %funcallable-instance-layout (x)
82 `(truly-the layout (%funcallable-instance-info ,x 0)))
83 (define-source-transform %set-funcallable-instance-layout (x val)
84 `(setf (%funcallable-instance-info ,x 0) (the layout ,val)))
86 ;;;; character support
88 ;;; In our implementation there are really only BASE-CHARs.
89 #+nil
90 (define-source-transform characterp (obj)
91 `(base-char-p ,obj))
93 ;;;; simplifying HAIRY-DATA-VECTOR-REF and HAIRY-DATA-VECTOR-SET
95 (deftransform hairy-data-vector-ref ((string index) (simple-string t))
96 (let ((ctype (lvar-type string)))
97 (if (array-type-p ctype)
98 ;; the other transform will kick in, so that's OK
99 (give-up-ir1-transform)
100 `(etypecase string
101 ((simple-array character (*))
102 (data-vector-ref string index))
103 #!+sb-unicode
104 ((simple-array base-char (*))
105 (data-vector-ref string index))
106 ((simple-array nil (*))
107 (data-vector-ref string index))))))
109 ;;; This and the corresponding -SET transform work equally well on non-simple
110 ;;; arrays, but after benchmarking (on x86), Nikodemus didn't find any cases
111 ;;; where it actually helped with non-simple arrays -- to the contrary, it
112 ;;; only made for bigger and up 1o 100% slower code.
113 (deftransform hairy-data-vector-ref ((array index) (simple-array t) *)
114 "avoid runtime dispatch on array element type"
115 (let ((element-ctype (extract-upgraded-element-type array))
116 (declared-element-ctype (extract-declared-element-type array)))
117 (declare (type ctype element-ctype))
118 (when (eq *wild-type* element-ctype)
119 (give-up-ir1-transform
120 "Upgraded element type of array is not known at compile time."))
121 ;; (The expansion here is basically a degenerate case of
122 ;; WITH-ARRAY-DATA. Since WITH-ARRAY-DATA is implemented as a
123 ;; macro, and macros aren't expanded in transform output, we have
124 ;; to hand-expand it ourselves.)
125 (let* ((element-type-specifier (type-specifier element-ctype)))
126 `(multiple-value-bind (array index)
127 (%data-vector-and-index array index)
128 (declare (type (simple-array ,element-type-specifier 1) array))
129 ,(let ((bare-form '(data-vector-ref array index)))
130 (if (type= element-ctype declared-element-ctype)
131 bare-form
132 `(the ,(type-specifier declared-element-ctype)
133 ,bare-form)))))))
135 ;;; Transform multi-dimensional array to one dimensional data vector
136 ;;; access.
137 (deftransform data-vector-ref ((array index) (simple-array t))
138 (let ((array-type (lvar-type array)))
139 (unless (array-type-p array-type)
140 (give-up-ir1-transform))
141 (let ((dims (array-type-dimensions array-type)))
142 (when (or (atom dims) (= (length dims) 1))
143 (give-up-ir1-transform))
144 (let ((el-type (array-type-specialized-element-type array-type))
145 (total-size (if (member '* dims)
147 (reduce #'* dims))))
148 `(data-vector-ref (truly-the (simple-array ,(type-specifier el-type)
149 (,total-size))
150 (%array-data-vector array))
151 index)))))
153 ;;; Transform data vector access to a form that opens up optimization
154 ;;; opportunities. On platforms that support DATA-VECTOR-REF-WITH-OFFSET
155 ;;; DATA-VECTOR-REF is not supported at all.
156 #!+(or x86 x86-64)
157 (define-source-transform data-vector-ref (array index)
158 `(data-vector-ref-with-offset ,array ,index 0))
160 #!+(or x86 x86-64)
161 (deftransform data-vector-ref-with-offset ((array index offset))
162 (let ((array-type (lvar-type array)))
163 (when (or (not (array-type-p array-type))
164 (eql (array-type-specialized-element-type array-type)
165 *wild-type*))
166 (give-up-ir1-transform))
167 ;; It shouldn't be possible to get here with anything but a non-complex
168 ;; vector.
169 (aver (not (array-type-complexp array-type)))
170 (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
171 (saetp (find-saetp element-type)))
172 (when (< (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)
173 (give-up-ir1-transform))
174 (fold-index-addressing 'data-vector-ref-with-offset
175 (sb!vm:saetp-n-bits saetp)
176 sb!vm:other-pointer-lowtag
177 sb!vm:vector-data-offset
178 index offset))))
180 (deftransform hairy-data-vector-set ((string index new-value)
181 (simple-string t t))
182 (let ((ctype (lvar-type string)))
183 (if (array-type-p ctype)
184 ;; the other transform will kick in, so that's OK
185 (give-up-ir1-transform)
186 `(etypecase string
187 ((simple-array character (*))
188 (data-vector-set string index new-value))
189 #!+sb-unicode
190 ((simple-array base-char (*))
191 (data-vector-set string index new-value))
192 ((simple-array nil (*))
193 (data-vector-set string index new-value))))))
195 ;;; This and the corresponding -REF transform work equally well on non-simple
196 ;;; arrays, but after benchmarking (on x86), Nikodemus didn't find any cases
197 ;;; where it actually helped with non-simple arrays -- to the contrary, it
198 ;;; only made for bigger and up 1o 100% slower code.
199 (deftransform hairy-data-vector-set ((array index new-value)
200 (simple-array t t)
202 "avoid runtime dispatch on array element type"
203 (let ((element-ctype (extract-upgraded-element-type array))
204 (declared-element-ctype (extract-declared-element-type array)))
205 (declare (type ctype element-ctype))
206 (when (eq *wild-type* element-ctype)
207 (give-up-ir1-transform
208 "Upgraded element type of array is not known at compile time."))
209 (let ((element-type-specifier (type-specifier element-ctype)))
210 `(multiple-value-bind (array index)
211 (%data-vector-and-index array index)
212 (declare (type (simple-array ,element-type-specifier 1) array)
213 (type ,element-type-specifier new-value))
214 ,(if (type= element-ctype declared-element-ctype)
215 '(data-vector-set array index new-value)
216 `(truly-the ,(type-specifier declared-element-ctype)
217 (data-vector-set array index
218 (the ,(type-specifier declared-element-ctype)
219 new-value))))))))
221 ;;; Transform multi-dimensional array to one dimensional data vector
222 ;;; access.
223 (deftransform data-vector-set ((array index new-value)
224 (simple-array t t))
225 (let ((array-type (lvar-type array)))
226 (unless (array-type-p array-type)
227 (give-up-ir1-transform))
228 (let ((dims (array-type-dimensions array-type)))
229 (when (or (atom dims) (= (length dims) 1))
230 (give-up-ir1-transform))
231 (let ((el-type (array-type-specialized-element-type array-type))
232 (total-size (if (member '* dims)
234 (reduce #'* dims))))
235 `(data-vector-set (truly-the (simple-array ,(type-specifier el-type)
236 (,total-size))
237 (%array-data-vector array))
238 index
239 new-value)))))
241 ;;; Transform data vector access to a form that opens up optimization
242 ;;; opportunities.
243 #!+(or x86 x86-64)
244 (define-source-transform data-vector-set (array index new-value)
245 `(data-vector-set-with-offset ,array ,index 0 ,new-value))
247 #!+(or x86 x86-64)
248 (deftransform data-vector-set-with-offset ((array index offset new-value))
249 (let ((array-type (lvar-type array)))
250 (when (or (not (array-type-p array-type))
251 (eql (array-type-specialized-element-type array-type)
252 *wild-type*))
253 ;; We don't yet know the exact element type, but will get that
254 ;; knowledge after some more type propagation.
255 (give-up-ir1-transform))
256 (aver (not (array-type-complexp array-type)))
257 (let* ((element-type (type-specifier (array-type-specialized-element-type array-type)))
258 (saetp (find-saetp element-type)))
259 (when (< (sb!vm:saetp-n-bits saetp) sb!vm:n-byte-bits)
260 (give-up-ir1-transform))
261 (fold-index-addressing 'data-vector-set-with-offset
262 (sb!vm:saetp-n-bits saetp)
263 sb!vm:other-pointer-lowtag
264 sb!vm:vector-data-offset
265 index offset t))))
267 (defoptimizer (%data-vector-and-index derive-type) ((array index))
268 (let ((atype (lvar-type array)))
269 (when (array-type-p atype)
270 (values-specifier-type
271 `(values (simple-array ,(type-specifier
272 (array-type-specialized-element-type atype))
273 (*))
274 index)))))
276 (deftransform %data-vector-and-index ((%array %index)
277 (simple-array t)
279 ;; KLUDGE: why the percent signs? Well, ARRAY and INDEX are
280 ;; respectively exported from the CL and SB!INT packages, which
281 ;; means that they're visible to all sorts of things. If the
282 ;; compiler can prove that the call to ARRAY-HEADER-P, below, either
283 ;; returns T or NIL, it will delete the irrelevant branch. However,
284 ;; user code might have got here with a variable named CL:ARRAY, and
285 ;; quite often compiler code with a variable named SB!INT:INDEX, so
286 ;; this can generate code deletion notes for innocuous user code:
287 ;; (DEFUN F (ARRAY I) (DECLARE (SIMPLE-VECTOR ARRAY)) (AREF ARRAY I))
288 ;; -- CSR, 2003-04-01
290 ;; We do this solely for the -OR-GIVE-UP side effect, since we want
291 ;; to know that the type can be figured out in the end before we
292 ;; proceed, but we don't care yet what the type will turn out to be.
293 (upgraded-element-type-specifier-or-give-up %array)
295 '(if (array-header-p %array)
296 (values (%array-data-vector %array) %index)
297 (values %array %index)))
299 ;;; transforms for getting at simple arrays of (UNSIGNED-BYTE N) when (< N 8)
301 ;;; FIXME: In CMU CL, these were commented out with #+NIL. Why? Should
302 ;;; we fix them or should we delete them? (Perhaps these definitions
303 ;;; predate the various DATA-VECTOR-REF-FOO VOPs which have
304 ;;; (:TRANSLATE DATA-VECTOR-REF), and are redundant now?)
305 #+nil
306 (macrolet
307 ((frob (type bits)
308 (let ((elements-per-word (truncate sb!vm:n-word-bits bits)))
309 `(progn
310 (deftransform data-vector-ref ((vector index)
311 (,type *))
312 `(multiple-value-bind (word bit)
313 (floor index ,',elements-per-word)
314 (ldb ,(ecase sb!vm:target-byte-order
315 (:little-endian '(byte ,bits (* bit ,bits)))
316 (:big-endian '(byte ,bits (- sb!vm:n-word-bits
317 (* (1+ bit) ,bits)))))
318 (%vector-raw-bits vector word))))
319 (deftransform data-vector-set ((vector index new-value)
320 (,type * *))
321 `(multiple-value-bind (word bit)
322 (floor index ,',elements-per-word)
323 (setf (ldb ,(ecase sb!vm:target-byte-order
324 (:little-endian '(byte ,bits (* bit ,bits)))
325 (:big-endian
326 '(byte ,bits (- sb!vm:n-word-bits
327 (* (1+ bit) ,bits)))))
328 (%vector-raw-bits vector word))
329 new-value)))))))
330 (frob simple-bit-vector 1)
331 (frob (simple-array (unsigned-byte 2) (*)) 2)
332 (frob (simple-array (unsigned-byte 4) (*)) 4))
334 ;;;; BIT-VECTOR hackery
336 ;;; SIMPLE-BIT-VECTOR bit-array operations are transformed to a word
337 ;;; loop that does 32 bits at a time.
339 ;;; FIXME: This is a lot of repeatedly macroexpanded code. It should
340 ;;; be a function call instead.
341 (macrolet ((def (bitfun wordfun)
342 `(deftransform ,bitfun ((bit-array-1 bit-array-2 result-bit-array)
343 (simple-bit-vector
344 simple-bit-vector
345 simple-bit-vector)
347 :node node :policy (>= speed space))
348 `(progn
349 ,@(unless (policy node (zerop safety))
350 '((unless (= (length bit-array-1)
351 (length bit-array-2)
352 (length result-bit-array))
353 (error "Argument and/or result bit arrays are not the same length:~
354 ~% ~S~% ~S ~% ~S"
355 bit-array-1
356 bit-array-2
357 result-bit-array))))
358 (let ((length (length result-bit-array)))
359 (if (= length 0)
360 ;; We avoid doing anything to 0-length
361 ;; bit-vectors, or rather, the memory that
362 ;; follows them. Other divisible-by-32 cases
363 ;; are handled by the (1- length), below.
364 ;; CSR, 2002-04-24
365 result-bit-array
366 (do ((index 0 (1+ index))
367 ;; bit-vectors of length 1-32 need
368 ;; precisely one (SETF %VECTOR-RAW-BITS),
369 ;; done here in the epilogue. - CSR,
370 ;; 2002-04-24
371 (end-1 (truncate (truly-the index (1- length))
372 sb!vm:n-word-bits)))
373 ((>= index end-1)
374 (setf (%vector-raw-bits result-bit-array index)
375 (,',wordfun (%vector-raw-bits bit-array-1 index)
376 (%vector-raw-bits bit-array-2 index)))
377 result-bit-array)
378 (declare (optimize (speed 3) (safety 0))
379 (type index index end-1))
380 (setf (%vector-raw-bits result-bit-array index)
381 (,',wordfun (%vector-raw-bits bit-array-1 index)
382 (%vector-raw-bits bit-array-2 index))))))))))
383 (def bit-and word-logical-and)
384 (def bit-ior word-logical-or)
385 (def bit-xor word-logical-xor)
386 (def bit-eqv word-logical-eqv)
387 (def bit-nand word-logical-nand)
388 (def bit-nor word-logical-nor)
389 (def bit-andc1 word-logical-andc1)
390 (def bit-andc2 word-logical-andc2)
391 (def bit-orc1 word-logical-orc1)
392 (def bit-orc2 word-logical-orc2))
394 (deftransform bit-not
395 ((bit-array result-bit-array)
396 (simple-bit-vector simple-bit-vector) *
397 :node node :policy (>= speed space))
398 `(progn
399 ,@(unless (policy node (zerop safety))
400 '((unless (= (length bit-array)
401 (length result-bit-array))
402 (error "Argument and result bit arrays are not the same length:~
403 ~% ~S~% ~S"
404 bit-array result-bit-array))))
405 (let ((length (length result-bit-array)))
406 (if (= length 0)
407 ;; We avoid doing anything to 0-length bit-vectors, or rather,
408 ;; the memory that follows them. Other divisible-by
409 ;; n-word-bits cases are handled by the (1- length), below.
410 ;; CSR, 2002-04-24
411 result-bit-array
412 (do ((index 0 (1+ index))
413 ;; bit-vectors of length 1 to n-word-bits need precisely
414 ;; one (SETF %VECTOR-RAW-BITS), done here in the
415 ;; epilogue. - CSR, 2002-04-24
416 (end-1 (truncate (truly-the index (1- length))
417 sb!vm:n-word-bits)))
418 ((>= index end-1)
419 (setf (%vector-raw-bits result-bit-array index)
420 (word-logical-not (%vector-raw-bits bit-array index)))
421 result-bit-array)
422 (declare (optimize (speed 3) (safety 0))
423 (type index index end-1))
424 (setf (%vector-raw-bits result-bit-array index)
425 (word-logical-not (%vector-raw-bits bit-array index))))))))
427 (deftransform bit-vector-= ((x y) (simple-bit-vector simple-bit-vector))
428 `(and (= (length x) (length y))
429 (let ((length (length x)))
430 (or (= length 0)
431 (do* ((i 0 (+ i 1))
432 (end-1 (floor (1- length) sb!vm:n-word-bits)))
433 ((>= i end-1)
434 (let* ((extra (1+ (mod (1- length) sb!vm:n-word-bits)))
435 (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
436 (- extra sb!vm:n-word-bits)))
437 (numx
438 (logand
439 (ash mask
440 ,(ecase sb!c:*backend-byte-order*
441 (:little-endian 0)
442 (:big-endian
443 '(- sb!vm:n-word-bits extra))))
444 (%vector-raw-bits x i)))
445 (numy
446 (logand
447 (ash mask
448 ,(ecase sb!c:*backend-byte-order*
449 (:little-endian 0)
450 (:big-endian
451 '(- sb!vm:n-word-bits extra))))
452 (%vector-raw-bits y i))))
453 (declare (type (integer 1 #.sb!vm:n-word-bits) extra)
454 (type sb!vm:word mask numx numy))
455 (= numx numy)))
456 (declare (type index i end-1))
457 (let ((numx (%vector-raw-bits x i))
458 (numy (%vector-raw-bits y i)))
459 (declare (type sb!vm:word numx numy))
460 (unless (= numx numy)
461 (return nil))))))))
463 (deftransform count ((item sequence) (bit simple-bit-vector) *
464 :policy (>= speed space))
465 `(let ((length (length sequence)))
466 (if (zerop length)
468 (do ((index 0 (1+ index))
469 (count 0)
470 (end-1 (truncate (truly-the index (1- length))
471 sb!vm:n-word-bits)))
472 ((>= index end-1)
473 (let* ((extra (1+ (mod (1- length) sb!vm:n-word-bits)))
474 (mask (ash #.(1- (ash 1 sb!vm:n-word-bits))
475 (- extra sb!vm:n-word-bits)))
476 (bits (logand (ash mask
477 ,(ecase sb!c:*backend-byte-order*
478 (:little-endian 0)
479 (:big-endian
480 '(- sb!vm:n-word-bits extra))))
481 (%vector-raw-bits sequence index))))
482 (declare (type (integer 1 #.sb!vm:n-word-bits) extra))
483 (declare (type sb!vm:word mask bits))
484 (incf count (logcount bits))
485 ,(if (constant-lvar-p item)
486 (if (zerop (lvar-value item))
487 '(- length count)
488 'count)
489 '(if (zerop item)
490 (- length count)
491 count))))
492 (declare (type index index count end-1)
493 (optimize (speed 3) (safety 0)))
494 (incf count (logcount (%vector-raw-bits sequence index)))))))
496 (deftransform fill ((sequence item) (simple-bit-vector bit) *
497 :policy (>= speed space))
498 (let ((value (if (constant-lvar-p item)
499 (if (= (lvar-value item) 0)
501 #.(1- (ash 1 sb!vm:n-word-bits)))
502 `(if (= item 0) 0 #.(1- (ash 1 sb!vm:n-word-bits))))))
503 `(let ((length (length sequence))
504 (value ,value))
505 (if (= length 0)
506 sequence
507 (do ((index 0 (1+ index))
508 ;; bit-vectors of length 1 to n-word-bits need precisely
509 ;; one (SETF %VECTOR-RAW-BITS), done here in the
510 ;; epilogue. - CSR, 2002-04-24
511 (end-1 (truncate (truly-the index (1- length))
512 sb!vm:n-word-bits)))
513 ((>= index end-1)
514 (setf (%vector-raw-bits sequence index) value)
515 sequence)
516 (declare (optimize (speed 3) (safety 0))
517 (type index index end-1))
518 (setf (%vector-raw-bits sequence index) value))))))
520 (deftransform fill ((sequence item) (simple-base-string base-char) *
521 :policy (>= speed space))
522 (let ((value (if (constant-lvar-p item)
523 (let* ((char (lvar-value item))
524 (code (sb!xc:char-code char))
525 (accum 0))
526 (dotimes (i sb!vm:n-word-bytes accum)
527 (setf accum (logior accum (ash code (* 8 i))))))
528 `(let ((code (sb!xc:char-code item)))
529 (logior ,@(loop for i from 0 below sb!vm:n-word-bytes
530 collect `(ash code ,(* 8 i))))))))
531 `(let ((length (length sequence))
532 (value ,value))
533 (multiple-value-bind (times rem)
534 (truncate length sb!vm:n-word-bytes)
535 (do ((index 0 (1+ index))
536 (end times))
537 ((>= index end)
538 (let ((place (* times sb!vm:n-word-bytes)))
539 (declare (fixnum place))
540 (dotimes (j rem sequence)
541 (declare (index j))
542 (setf (schar sequence (the index (+ place j))) item))))
543 (declare (optimize (speed 3) (safety 0))
544 (type index index))
545 (setf (%vector-raw-bits sequence index) value))))))
547 ;;;; %BYTE-BLT
549 ;;; FIXME: The old CMU CL code used various COPY-TO/FROM-SYSTEM-AREA
550 ;;; stuff (with all the associated bit-index cruft and overflow
551 ;;; issues) even for byte moves. In SBCL, we're converting to byte
552 ;;; moves as problems are discovered with the old code, and this is
553 ;;; currently (ca. sbcl-0.6.12.30) the main interface for code in
554 ;;; SB!KERNEL and SB!SYS (e.g. i/o code). It's not clear that it's the
555 ;;; ideal interface, though, and it probably deserves some thought.
556 (deftransform %byte-blt ((src src-start dst dst-start dst-end)
557 ((or (simple-unboxed-array (*)) system-area-pointer)
558 index
559 (or (simple-unboxed-array (*)) system-area-pointer)
560 index
561 index))
562 ;; FIXME: CMU CL had a hairier implementation of this (back when it
563 ;; was still called (%PRIMITIVE BYTE-BLT). It had the small problem
564 ;; that it didn't work for large (>16M) values of SRC-START or
565 ;; DST-START. However, it might have been more efficient. In
566 ;; particular, I don't really know how much the foreign function
567 ;; call costs us here. My guess is that if the overhead is
568 ;; acceptable for SQRT and COS, it's acceptable here, but this
569 ;; should probably be checked. -- WHN
570 '(flet ((sapify (thing)
571 (etypecase thing
572 (system-area-pointer thing)
573 ;; FIXME: The code here rather relies on the simple
574 ;; unboxed array here having byte-sized entries. That
575 ;; should be asserted explicitly, I just haven't found
576 ;; a concise way of doing it. (It would be nice to
577 ;; declare it in the DEFKNOWN too.)
578 ((simple-unboxed-array (*)) (vector-sap thing)))))
579 (declare (inline sapify))
580 (with-pinned-objects (dst src)
581 (memmove (sap+ (sapify dst) dst-start)
582 (sap+ (sapify src) src-start)
583 (- dst-end dst-start)))
584 (values)))
586 ;;;; transforms for EQL of floating point values
587 #!-float-eql-vops
588 (deftransform eql ((x y) (single-float single-float))
589 '(= (single-float-bits x) (single-float-bits y)))
591 #!-float-eql-vops
592 (deftransform eql ((x y) (double-float double-float))
593 '(and (= (double-float-low-bits x) (double-float-low-bits y))
594 (= (double-float-high-bits x) (double-float-high-bits y))))
597 ;;;; modular functions
599 ;;; FIXME: I think that the :GOODness of a modular function boils down
600 ;;; to whether the normal definition can be used in the middle of a
601 ;;; modular arrangement. LOGAND and LOGIOR can be for all unsigned
602 ;;; modular implementations, I believe, because for all unsigned
603 ;;; arguments of a given size the result of the ordinary definition is
604 ;;; the right one. This should follow through to other logical
605 ;;; functions, such as LOGXOR, should it not? -- CSR, 2007-12-29,
606 ;;; trying to understand a comment he wrote over four years
607 ;;; previously: "FIXME: XOR? ANDC1, ANDC2? -- CSR, 2003-09-16"
608 (define-good-modular-fun logand :untagged nil)
609 (define-good-modular-fun logior :untagged nil)
610 (define-good-modular-fun logxor :untagged nil)
611 (macrolet ((define-good-signed-modular-funs (&rest funs)
612 (let (result)
613 `(progn
614 ,@(dolist (fun funs (nreverse result))
615 (push `(define-good-modular-fun ,fun :untagged t) result)
616 (push `(define-good-modular-fun ,fun :tagged t) result))))))
617 (define-good-signed-modular-funs
618 logand logandc1 logandc2 logeqv logior lognand lognor lognot
619 logorc1 logorc2 logxor))
621 (macrolet
622 ((def (name kind width signedp)
623 (let ((type (ecase signedp
624 ((nil) 'unsigned-byte)
625 ((t) 'signed-byte))))
626 `(progn
627 (defknown ,name (integer (integer 0)) (,type ,width)
628 (foldable flushable movable))
629 (define-modular-fun-optimizer ash ((integer count) ,kind ,signedp :width width)
630 (when (and (<= width ,width)
631 (or (and (constant-lvar-p count)
632 (plusp (lvar-value count)))
633 (csubtypep (lvar-type count)
634 (specifier-type '(and unsigned-byte fixnum)))))
635 (cut-to-width integer ,kind width ,signedp)
636 ',name))
637 (setf (gethash ',name (modular-class-versions (find-modular-class ',kind ',signedp)))
638 `(ash ,',width))))))
639 ;; This should really be dependent on SB!VM:N-WORD-BITS, but since we
640 ;; don't have a true Alpha64 port yet, we'll have to stick to
641 ;; SB!VM:N-MACHINE-WORD-BITS for the time being. --njf, 2004-08-14
642 #!+#.(cl:if (cl:= 32 sb!vm:n-machine-word-bits) '(and) '(or))
643 (progn
644 #!+x86 (def sb!vm::ash-left-smod30 :tagged 30 t)
645 (def sb!vm::ash-left-mod32 :untagged 32 nil))
646 #!+#.(cl:if (cl:= 64 sb!vm:n-machine-word-bits) '(and) '(or))
647 (progn
648 #!+x86-64 (def sb!vm::ash-left-smod61 :tagged 61 t)
649 (def sb!vm::ash-left-mod64 :untagged 64 nil)))
651 ;;;; word-wise logical operations
653 ;;; These transforms assume the presence of modular arithmetic to
654 ;;; generate efficient code.
656 (define-source-transform word-logical-not (x)
657 `(logand (lognot (the sb!vm:word ,x)) #.(1- (ash 1 sb!vm:n-word-bits))))
659 (deftransform word-logical-and ((x y))
660 '(logand x y))
662 (deftransform word-logical-nand ((x y))
663 '(logand (lognand x y) #.(1- (ash 1 sb!vm:n-word-bits))))
665 (deftransform word-logical-or ((x y))
666 '(logior x y))
668 (deftransform word-logical-nor ((x y))
669 '(logand (lognor x y) #.(1- (ash 1 sb!vm:n-word-bits))))
671 (deftransform word-logical-xor ((x y))
672 '(logxor x y))
674 (deftransform word-logical-eqv ((x y))
675 '(logand (logeqv x y) #.(1- (ash 1 sb!vm:n-word-bits))))
677 (deftransform word-logical-orc1 ((x y))
678 '(logand (logorc1 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
680 (deftransform word-logical-orc2 ((x y))
681 '(logand (logorc2 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
683 (deftransform word-logical-andc1 ((x y))
684 '(logand (logandc1 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
686 (deftransform word-logical-andc2 ((x y))
687 '(logand (logandc2 x y) #.(1- (ash 1 sb!vm:n-word-bits))))
690 ;;; There are two different ways the multiplier can be recoded. The
691 ;;; more obvious is to shift X by the correct amount for each bit set
692 ;;; in Y and to sum the results. But if there is a string of bits that
693 ;;; are all set, you can add X shifted by one more then the bit
694 ;;; position of the first set bit and subtract X shifted by the bit
695 ;;; position of the last set bit. We can't use this second method when
696 ;;; the high order bit is bit 31 because shifting by 32 doesn't work
697 ;;; too well.
698 (defun ub32-strength-reduce-constant-multiply (arg num)
699 (declare (type (unsigned-byte 32) num))
700 (let ((adds 0) (shifts 0)
701 (result nil) first-one)
702 (labels ((add (next-factor)
703 (setf result
704 (if result
705 (progn (incf adds) `(+ ,result ,next-factor))
706 next-factor))))
707 (declare (inline add))
708 (dotimes (bitpos 32)
709 (if first-one
710 (when (not (logbitp bitpos num))
711 (add (if (= (1+ first-one) bitpos)
712 ;; There is only a single bit in the string.
713 (progn (incf shifts) `(ash ,arg ,first-one))
714 ;; There are at least two.
715 (progn
716 (incf adds)
717 (incf shifts 2)
718 `(- (ash ,arg ,bitpos)
719 (ash ,arg ,first-one)))))
720 (setf first-one nil))
721 (when (logbitp bitpos num)
722 (setf first-one bitpos))))
723 (when first-one
724 (cond ((= first-one 31))
725 ((= first-one 30) (incf shifts) (add `(ash ,arg 30)))
727 (incf shifts 2)
728 (incf adds)
729 (add `(- (ash ,arg 31)
730 (ash ,arg ,first-one)))))
731 (incf shifts)
732 (add `(ash ,arg 31))))
733 (values (if (plusp adds)
734 `(logand ,result #.(1- (ash 1 32))) ; using modular arithmetic
735 result)
736 adds
737 shifts)))
740 ;;; Transform GET-LISP-OBJ-ADDRESS for constant immediates, since the normal
741 ;;; VOP can't handle them.
743 (deftransform sb!vm::get-lisp-obj-address ((obj) ((constant-arg fixnum)))
744 (ash (lvar-value obj) sb!vm::n-fixnum-tag-bits))
746 (deftransform sb!vm::get-lisp-obj-address ((obj) ((constant-arg character)))
747 (logior sb!vm::character-widetag
748 (ash (char-code (lvar-value obj)) sb!vm::n-widetag-bits)))