1 ;;;; functions to implement bitblt-ish operations
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
16 (declaim (inline start-mask end-mask
))
18 ;;; Produce a mask that contains 1's for the COUNT "start" bits and
19 ;;; 0's for the remaining "end" bits. Only the lower 5 bits of COUNT
20 ;;; are significant (KLUDGE: because of hardwired implicit dependence
21 ;;; on 32-bit word size -- WHN 2001-03-19).
22 (defun start-mask (count)
23 (declare (fixnum count
))
24 (shift-towards-start most-positive-word
(- count
)))
26 ;;; Produce a mask that contains 1's for the COUNT "end" bits and 0's
27 ;;; for the remaining "start" bits. Only the lower 5 bits of COUNT are
28 ;;; significant (KLUDGE: because of hardwired implicit dependence on
29 ;;; 32-bit word size -- WHN 2001-03-19).
30 (defun end-mask (count)
31 (declare (fixnum count
))
32 (shift-towards-end most-positive-word
(- count
)))
35 ;;; the actual bashers and common uses of same
37 (defconstant min-bytes-c-call-threshold
38 ;; mostly just guessing here
39 #+(or x86 x86-64 ppc ppc64
) 128
40 #-
(or x86 x86-64 ppc ppc64
) 256)
42 (defmacro verify-src
/dst-bits-per-elt
(source destination expect-bits-per-element
)
43 (declare (ignorable source destination expect-bits-per-element
))
44 #+(and sb-devel
(not sb-devel-no-errors
))
45 `(let ((src-bits-per-element
46 (ash 1 (aref #.%%simple-array-n-bits-shifts%%
47 (%other-pointer-widetag
,source
))))
49 (ash 1 (aref #.%%simple-array-n-bits-shifts%%
50 (%other-pointer-widetag
,destination
)))))
51 (when (or (/= src-bits-per-element
,expect-bits-per-element
)
52 (/= dst-bits-per-element
,expect-bits-per-element
))
53 ;; Why enforce this: because since the arrays are lisp objects
54 ;; maybe we can be clever "somehow" (I'm not sure how)
55 ;; and/or maybe we have to unpoison the memory for #+ubsan.
56 ;; Whereas BYTE-BLT takes SAPs (and/or arrays) and so it has to
57 ;; be more strictly like memmove(). Because it is exactly that.
58 (error "Misuse of bash-copy: bits-per-elt=~D but src=~d and dst=~d"
59 ,expect-bits-per-element src-bits-per-element dst-bits-per-element
))))
61 ;;; 1, 2, 4, and 8 bytes per element can be handled with memmove()
62 ;;; or, if it's easy enough, a loop over VECTOR-RAW-BITS.
63 (defmacro define-byte-blt-copier
65 &aux
(bits-per-element (* bytes-per-element
8))
66 (vtype `(simple-array (unsigned-byte ,bits-per-element
) (*)))
67 (elements-per-word (/ n-word-bytes bytes-per-element
))
68 (always-call-out-p ; memmove() is _always_ asymptotically faster than this
69 ;; code, which can't make any use of vectorization that C libraries
70 ;; typically do. It's a question of the overhead of a C call.
71 `(>= nelements
,(/ min-bytes-c-call-threshold bytes-per-element
))))
73 ;; Iterate backwards if there is overlap and byte transfer is toward higher
74 ;; addresses. Technically (> dst-start src-start) is a necessary
75 ;; but not sufficient condition for overlap, but it's fine.
76 '(and (eq src dst
) (> dst-start src-start
)))
78 ;; We could reduce the number of loop variables by 1 by computing
79 ;; the distance between src-start and dst-start, and adding it in
80 ;; to each array reference. Probably it would be worse though.
81 '(do ((dst-index (the (or (eql -
1) index
) (+ dst-start nwords -
1))
83 (src-index (the (or (eql -
1) index
) (+ src-start nwords -
1))
85 ((< dst-index dst-start
))
86 (declare (type (or (eql -
1) index
) dst-index src-index
))
87 ;; Assigning into SRC is right, because DST and SRC are the same array.
88 ;; We don't need "both" arrays to be in registers.
89 (%set-vector-raw-bits src dst-index
90 (%vector-raw-bits src
(the index src-index
)))))
92 '(do ((dst-index dst-start
(the index
(1+ dst-index
)))
93 (src-index src-start
(the index
(1+ src-index
))))
94 ((>= dst-index dst-end
))
95 (%set-vector-raw-bits dst dst-index
(%vector-raw-bits src src-index
))))
97 ;; %BYTE-BLT wants the end as an index, which it converts back to a count
98 ;; by subtracting the start. Regardless, the args are way too confusing,
99 ;; so let's go directly to memmove. Cribbed from (DEFTRANSFORM %BYTE-BLT)
100 `(with-pinned-objects (dst src
)
101 (memmove (sap+ (vector-sap (the ,vtype dst
))
102 (the signed-word
(* dst-start
,bytes-per-element
)))
103 (sap+ (vector-sap (the ,vtype src
))
104 (the signed-word
(* src-start
,bytes-per-element
)))
105 (the word
(* nelements
,bytes-per-element
))))))
106 ;; The arguments are array element indices.
107 `(defun ,(intern (format nil
"UB~D-BASH-COPY" bits-per-element
)
108 (find-package "SB-KERNEL"))
109 (src src-start dst dst-start nelements
)
110 (declare (type index src-start dst-start nelements
))
111 (verify-src/dst-bits-per-elt src dst
,bits-per-element
)
113 (declare (optimize (safety 0)
114 (sb-c::alien-funcall-saves-fp-and-pc
0)))
115 #+cheneygc
(when (> nelements
0)
116 ;; cheneygc can't handle a WP fault in memcpy()
117 ;; because "if(!foreign_function_call_active ..."
118 (let ((last (truly-the index
(+ dst-start
(1- nelements
)))))
119 (data-vector-set (truly-the ,vtype dst
) last
120 (data-vector-ref (truly-the ,vtype dst
) last
))))
121 ,(if (= bytes-per-element sb-vm
:n-word-bytes
)
122 `(if ,always-call-out-p
124 (let ((nwords nelements
))
127 (let ((dst-end (the index
(+ dst-start nelements
))))
129 `(let ((dst-subword (mod dst-start
,elements-per-word
))
130 (src-subword (mod src-start
,elements-per-word
))
131 (dst (truly-the ,vtype dst
))
132 (src (truly-the ,vtype src
)))
133 (cond ((or ,always-call-out-p
134 (/= dst-subword src-subword
)) ; too complicated
137 ;; Using the primitive-type-specific data-vector-set,
138 ;; process at most (1- ELEMENTS-PER-WORD) elements
139 ;; until aligned to a word.
140 (let ((dst-end (+ dst-start nelements
))
141 (src-end (+ src-start nelements
))
142 (original-nelements nelements
))
144 (loop for i downfrom
(- elements-per-word
1)
145 repeat
(1- elements-per-word
)
147 ;; Test NELEMENTS first because it should be in a register
148 ;; from the preceding DECF.
149 `((when (and (/= nelements
0)
150 (logtest dst-end
,(1- elements-per-word
)))
151 (data-vector-set dst
(1- dst-end
)
152 (data-vector-ref src
(- src-end
,i
)))
153 (decf (the index dst-end
))
154 (decf (the index nelements
))
157 (decf src-end
(the (mod 8) (- original-nelements nelements
)))
158 ;; Now DST-END and SRC-END are element indices that start a word.
159 ;; Scan backwards by whole words.
160 (let ((nwords (truncate nelements
,elements-per-word
)))
162 ;; Convert to word indices
163 (let* ((dst-start (- (truncate dst-end
,elements-per-word
) nwords
))
164 (src-start (- (truncate src-end
,elements-per-word
) nwords
)))
166 (decf (the index dst-end
) (* nwords
,elements-per-word
))
167 (decf (the index src-end
) (* nwords
,elements-per-word
))
168 (decf nelements
(* nwords
,elements-per-word
))))
169 ;; If there are elements remaining after the last full word copied,
170 ;; process element by element.
172 (loop for i from
(1- elements-per-word
) downto
1
174 `((unless (= nelements
0)
177 (data-vector-ref src
(- src-end
,i
)))
178 ,@(unless (= i
(1- elements-per-word
))
179 '((decf (the index nelements
))))
184 (let ((original-nelements nelements
))
186 (loop for i downfrom
(- elements-per-word
2)
187 repeat
(1- elements-per-word
)
189 `((when (and (/= nelements
0)
190 (logtest dst-start
,(1- elements-per-word
)))
193 (data-vector-ref src
(+ src-start
,i
)))
194 (incf (the index dst-start
))
195 (decf (the index nelements
))
198 (incf (the index src-start
) (- original-nelements nelements
)))
199 (let ((nwords (truncate nelements
,elements-per-word
)))
201 (let* ((src-start (truncate src-start
,elements-per-word
))
202 (dst-start (truncate dst-start
,elements-per-word
))
203 (dst-end (the index
(+ dst-start nwords
))))
205 (incf dst-start
(* nwords
,elements-per-word
))
206 (incf src-start
(* nwords
,elements-per-word
))
207 (decf nelements
(* nwords
,elements-per-word
))))
210 (loop for i from
(- elements-per-word
2) downto
0
212 `((unless (= nelements
0)
215 (data-vector-ref src
(+ src-start
,i
)))
216 ,@(unless (= i
(- elements-per-word
2))
217 '((decf (the index nelements
))))
222 (define-byte-blt-copier 1)
223 (define-byte-blt-copier 2)
224 (define-byte-blt-copier 4)
225 #+64-bit
(define-byte-blt-copier 8)
227 ;;; We cheat a little bit by using TRULY-THE in the copying function to
228 ;;; force the compiler to generate good code in the (= BITSIZE
229 ;;; N-WORD-BITS) case. We don't use TRULY-THE in the other cases
230 ;;; to give the compiler freedom to generate better code.
231 (defmacro !define-byte-bashers
(bitsize)
232 (let* ((bytes-per-word (/ n-word-bits bitsize
))
233 (byte-offset `(integer 0 (,bytes-per-word
)))
234 (word-offset `(integer 0 ,(ceiling array-dimension-limit bytes-per-word
)))
235 (constant-bash-name (intern (format nil
"CONSTANT-UB~D-BASH" bitsize
) (find-package "SB-KERNEL")))
236 (array-fill-name (intern (format nil
"UB~D-BASH-FILL" bitsize
) (find-package "SB-KERNEL")))
237 (unary-bash-name (intern (format nil
"UNARY-UB~D-BASH" bitsize
) (find-package "SB-KERNEL")))
238 (array-copy-name (intern (format nil
"UB~D-BASH-COPY" bitsize
) (find-package "SB-KERNEL"))))
240 (declaim (inline ,constant-bash-name
))
241 ;; Fill DST with VALUE starting at DST-OFFSET and continuing
242 ;; for LENGTH bytes (however bytes are defined).
243 (defun ,constant-bash-name
(dst dst-offset length value
)
244 (declare (type word value
) (type index dst-offset length
))
245 (multiple-value-bind (dst-word-offset dst-byte-offset
)
246 (floor dst-offset
,bytes-per-word
)
247 (declare (type ,word-offset dst-word-offset
)
248 (type ,byte-offset dst-byte-offset
))
249 (multiple-value-bind (n-words final-bytes
)
250 (floor (+ dst-byte-offset length
) ,bytes-per-word
)
251 (declare (type ,word-offset n-words
)
252 (type ,byte-offset final-bytes
))
254 ,(unless (= bytes-per-word
1)
255 `(unless (zerop length
)
256 (%set-vector-raw-bits dst dst-word-offset
257 (if (>= length
,bytes-per-word
)
259 (let ((mask (shift-towards-end
260 (start-mask (* length
,bitsize
))
261 (* dst-byte-offset
,bitsize
))))
262 (word-logical-or (word-logical-and value mask
)
263 (word-logical-andc2 (%vector-raw-bits dst dst-word-offset
)
265 (let ((interior (floor (- length final-bytes
) ,bytes-per-word
)))
266 ,@(unless (= bytes-per-word
1)
267 `((unless (zerop dst-byte-offset
)
268 (let ((mask (end-mask (* (- dst-byte-offset
) ,bitsize
))))
269 (%set-vector-raw-bits dst dst-word-offset
270 (word-logical-or (word-logical-and value mask
)
271 (word-logical-andc2 (%vector-raw-bits dst dst-word-offset
)
273 (incf dst-word-offset
))))
274 (let ((end (+ dst-word-offset interior
)))
275 (declare (type ,word-offset end
))
277 ((>= dst-word-offset end
))
278 (%set-vector-raw-bits dst dst-word-offset value
)
279 (incf dst-word-offset
)))
281 (dotimes (i interior
)
282 (%set-vector-raw-bits dst dst-word-offset value
)
283 (incf dst-word-offset
))
284 ,@(unless (= bytes-per-word
1)
285 `((unless (zerop final-bytes
)
286 (let ((mask (start-mask (* final-bytes
,bitsize
))))
287 (%set-vector-raw-bits dst dst-word-offset
288 (word-logical-or (word-logical-and value mask
)
289 (word-logical-andc2 (%vector-raw-bits dst dst-word-offset
)
293 ;; common uses for constant-byte-bashing
294 (defknown ,array-fill-name
(word simple-unboxed-array index index
)
298 :derive-type
(sb-c::result-type-nth-arg
1))
299 (defun ,array-fill-name
(value dst dst-offset length
)
300 (declare (type word value
) (type index dst-offset length
))
301 (declare (optimize (speed 3) (safety 1)))
302 (,constant-bash-name dst dst-offset length value
)
305 ;; Copying. Never use this for 8, 16, 32, 64
306 ,@(when (member bitsize
'(1 2 4))
307 `((declaim (inline ,unary-bash-name
))
308 (defun ,unary-bash-name
(src src-offset dst dst-offset length
)
309 (declare (type index src-offset dst-offset length
))
310 (verify-src/dst-bits-per-elt src dst
,bitsize
)
311 (multiple-value-bind (dst-word-offset dst-byte-offset
)
312 (floor dst-offset
,bytes-per-word
)
313 (declare (type ,word-offset dst-word-offset
)
314 (type ,byte-offset dst-byte-offset
))
315 (multiple-value-bind (src-word-offset src-byte-offset
)
316 (floor src-offset
,bytes-per-word
)
317 (declare (type ,word-offset src-word-offset
)
318 (type ,byte-offset src-byte-offset
))
320 ((<= (+ dst-byte-offset length
) ,bytes-per-word
)
321 ;; We are only writing one word, so it doesn't matter what
322 ;; order we do it in. But we might be reading from
323 ;; multiple words, so take care.
326 ;; We're not writing anything. This is really easy.
328 ((>= length
,bytes-per-word
)
329 ;; DST-BYTE-OFFSET must be equal to zero, or we would be
330 ;; writing multiple words. If SRC-BYTE-OFFSET is also zero,
331 ;; the we just transfer the single word. Otherwise we have
332 ;; to extract bytes from two source words.
333 (%set-vector-raw-bits dst dst-word-offset
335 ((zerop src-byte-offset
)
336 (%vector-raw-bits src src-word-offset
))
337 ,@(unless (= bytes-per-word
1)
338 `((t (word-logical-or (shift-towards-start
339 (%vector-raw-bits src src-word-offset
)
340 (* src-byte-offset
,bitsize
))
342 (%vector-raw-bits src
(1+ src-word-offset
))
343 (* (- src-byte-offset
) ,bitsize
)))))))))
344 ,@(unless (= bytes-per-word
1)
346 ;; We are only writing some portion of the destination word.
347 ;; We still don't know whether we need one or two source words.
348 (let ((mask (shift-towards-end (start-mask (* length
,bitsize
))
349 (* dst-byte-offset
,bitsize
)))
350 (orig (%vector-raw-bits dst dst-word-offset
))
351 (value (if (> src-byte-offset dst-byte-offset
)
352 ;; The source starts further
353 ;; into the word than does the
354 ;; destination, so the source
355 ;; could extend into the next
356 ;; word. If it does, we have
357 ;; to merge the two words, and
358 ;; it not, we can just shift
360 (let ((src-byte-shift (- src-byte-offset
362 (if (> (+ src-byte-offset length
) ,bytes-per-word
)
365 (%vector-raw-bits src src-word-offset
)
366 (* src-byte-shift
,bitsize
))
368 (%vector-raw-bits src
(1+ src-word-offset
))
369 (* (- src-byte-shift
) ,bitsize
)))
370 (shift-towards-start (%vector-raw-bits src src-word-offset
)
371 (* src-byte-shift
,bitsize
))))
372 ;; The destination starts further
373 ;; into the word than does the
374 ;; source, so we know the source
375 ;; cannot extend into a second
376 ;; word (or else the destination
377 ;; would too, and we wouldn't be
380 (%vector-raw-bits src src-word-offset
)
381 (* (- dst-byte-offset src-byte-offset
) ,bitsize
)))))
382 (declare (type word mask orig value
))
383 (%set-vector-raw-bits dst dst-word-offset
384 (word-logical-or (word-logical-and value mask
)
385 (word-logical-andc2 orig mask
)))))))))
386 ((= src-byte-offset dst-byte-offset
)
387 ;; The source and destination are aligned, so shifting
388 ;; is unnecessary. But we have to pick the direction
389 ;; of the copy in case the source and destination are
390 ;; really the same object.
391 (multiple-value-bind (words final-bytes
)
392 (floor (+ dst-byte-offset length
) ,bytes-per-word
)
393 (declare (type ,word-offset words
)
394 (type ,byte-offset final-bytes
))
395 (let ((interior (floor (- length final-bytes
) ,bytes-per-word
)))
396 (declare (type ,word-offset interior
))
398 ((<= dst-offset src-offset
)
399 ;; We need to loop from left to right.
400 ,@(unless (= bytes-per-word
1)
401 `((unless (zerop dst-byte-offset
)
402 ;; We are only writing part of the first word, so mask
403 ;; off the bytes we want to preserve.
404 (let ((mask (end-mask (* (- dst-byte-offset
) ,bitsize
)))
405 (orig (%vector-raw-bits dst dst-word-offset
))
406 (value (%vector-raw-bits src src-word-offset
)))
407 (declare (type word mask orig value
))
408 (%set-vector-raw-bits dst dst-word-offset
409 (word-logical-or (word-logical-and value mask
)
410 (word-logical-andc2 orig mask
))))
411 (incf src-word-offset
)
412 (incf dst-word-offset
))))
413 ;; Copy the interior words.
414 (let ((end ,(if (= bytes-per-word
1)
415 `(truly-the ,word-offset
416 (+ dst-word-offset interior
))
417 `(+ dst-word-offset interior
))))
418 (declare (type ,word-offset end
))
420 ((>= dst-word-offset end
))
421 (%set-vector-raw-bits dst dst-word-offset
422 (%vector-raw-bits src src-word-offset
))
423 ,(if (= bytes-per-word
1)
424 `(setf src-word-offset
(truly-the ,word-offset
(+ src-word-offset
1)))
425 `(incf src-word-offset
))
426 (incf dst-word-offset
)))
427 ,@(unless (= bytes-per-word
1)
428 `((unless (zerop final-bytes
)
429 ;; We are only writing part of the last word.
430 (let ((mask (start-mask (* final-bytes
,bitsize
)))
431 (orig (%vector-raw-bits dst dst-word-offset
))
432 (value (%vector-raw-bits src src-word-offset
)))
433 (declare (type word mask orig value
))
434 (%set-vector-raw-bits dst dst-word-offset
435 (word-logical-or (word-logical-and value mask
)
436 (word-logical-andc2 orig mask
))))))))
438 ;; We need to loop from right to left.
439 ,(if (= bytes-per-word
1)
440 `(setf dst-word-offset
(truly-the ,word-offset
441 (+ dst-word-offset words
)))
442 `(incf dst-word-offset words
))
443 ,(if (= bytes-per-word
1)
444 `(setf src-word-offset
(truly-the ,word-offset
445 (+ src-word-offset words
)))
446 `(incf src-word-offset words
))
447 ,@(unless (= bytes-per-word
1)
448 `((unless (zerop final-bytes
)
449 (let ((mask (start-mask (* final-bytes
,bitsize
)))
450 (orig (%vector-raw-bits dst dst-word-offset
))
451 (value (%vector-raw-bits src src-word-offset
)))
452 (declare (type word mask orig value
))
453 (%set-vector-raw-bits dst dst-word-offset
454 (word-logical-or (word-logical-and value mask
)
455 (word-logical-andc2 orig mask
)))))))
456 (let ((end (- dst-word-offset interior
)))
458 ((<= dst-word-offset end
))
459 (decf src-word-offset
)
460 (decf dst-word-offset
)
461 (%set-vector-raw-bits dst dst-word-offset
462 (%vector-raw-bits src src-word-offset
))))
463 ,@(unless (= bytes-per-word
1)
464 `((unless (zerop dst-byte-offset
)
465 ;; We are only writing part of the last word.
466 (decf src-word-offset
)
467 (decf dst-word-offset
)
468 (let ((mask (end-mask (* (- dst-byte-offset
) ,bitsize
)))
469 (orig (%vector-raw-bits dst dst-word-offset
))
470 (value (%vector-raw-bits src src-word-offset
)))
471 (declare (type word mask orig value
))
472 (%set-vector-raw-bits dst dst-word-offset
473 (word-logical-or (word-logical-and value mask
)
474 (word-logical-andc2 orig mask
))))))))))))
476 ;; Source and destination are not aligned.
477 (multiple-value-bind (words final-bytes
)
478 (floor (+ dst-byte-offset length
) ,bytes-per-word
)
479 (declare (type ,word-offset words
)
480 (type ,byte-offset final-bytes
))
481 (let ((src-shift (mod (- src-byte-offset dst-byte-offset
)
483 (interior (floor (- length final-bytes
) ,bytes-per-word
)))
484 (declare (type ,word-offset interior
)
485 (type ,byte-offset src-shift
))
487 ((<= dst-offset src-offset
)
488 ;; We need to loop from left to right.
490 (next (%vector-raw-bits src src-word-offset
)))
491 (declare (type word prev next
))
492 (flet ((get-next-src ()
494 (setf next
(%vector-raw-bits src
495 (incf src-word-offset
)))))
496 (declare (inline get-next-src
))
497 ,@(unless (= bytes-per-word
1)
498 `((unless (zerop dst-byte-offset
)
499 (when (> src-byte-offset dst-byte-offset
)
501 (let ((mask (end-mask (* (- dst-byte-offset
) ,bitsize
)))
502 (orig (%vector-raw-bits dst dst-word-offset
))
503 (value (word-logical-or (shift-towards-start prev
(* src-shift
,bitsize
))
504 (shift-towards-end next
(* (- src-shift
) ,bitsize
)))))
505 (declare (type word mask orig value
))
506 (%set-vector-raw-bits dst dst-word-offset
507 (word-logical-or (word-logical-and value mask
)
508 (word-logical-andc2 orig mask
))))
509 (incf dst-word-offset
))))
510 (let ((end (+ dst-word-offset interior
)))
511 (declare (type ,word-offset end
))
513 ((>= dst-word-offset end
))
515 (let ((value (word-logical-or
516 (shift-towards-end next
(* (- src-shift
) ,bitsize
))
517 (shift-towards-start prev
(* src-shift
,bitsize
)))))
518 (declare (type word value
))
519 (%set-vector-raw-bits dst dst-word-offset value
)
520 (incf dst-word-offset
))))
521 ,@(unless (= bytes-per-word
1)
522 `((unless (zerop final-bytes
)
524 (if (> (+ final-bytes src-shift
) ,bytes-per-word
)
528 (shift-towards-end next
(* (- src-shift
) ,bitsize
))
529 (shift-towards-start prev
(* src-shift
,bitsize
))))
530 (shift-towards-start next
(* src-shift
,bitsize
))))
531 (mask (start-mask (* final-bytes
,bitsize
)))
532 (orig (%vector-raw-bits dst dst-word-offset
)))
533 (declare (type word mask orig value
))
534 (%set-vector-raw-bits dst dst-word-offset
535 (word-logical-or (word-logical-and value mask
)
536 (word-logical-andc2 orig mask
))))))))))
538 ;; We need to loop from right to left.
539 (incf dst-word-offset words
)
540 (incf src-word-offset
(1- (ceiling (+ src-byte-offset length
) ,bytes-per-word
)))
542 (prev (%vector-raw-bits src src-word-offset
)))
543 (declare (type word prev next
))
544 (flet ((get-next-src ()
546 (setf prev
(%vector-raw-bits src
(decf src-word-offset
)))))
547 (declare (inline get-next-src
))
548 ,@(unless (= bytes-per-word
1)
549 `((unless (zerop final-bytes
)
550 (when (> final-bytes
(- ,bytes-per-word src-shift
))
552 (let ((value (word-logical-or
553 (shift-towards-end next
(* (- src-shift
) ,bitsize
))
554 (shift-towards-start prev
(* src-shift
,bitsize
))))
555 (mask (start-mask (* final-bytes
,bitsize
)))
556 (orig (%vector-raw-bits dst dst-word-offset
)))
557 (declare (type word mask orig value
))
558 (%set-vector-raw-bits dst dst-word-offset
559 (word-logical-or (word-logical-and value mask
)
560 (word-logical-andc2 orig mask
)))))))
561 (decf dst-word-offset
)
562 (let ((end (- dst-word-offset interior
)))
564 ((<= dst-word-offset end
))
566 (let ((value (word-logical-or
567 (shift-towards-end next
(* (- src-shift
) ,bitsize
))
568 (shift-towards-start prev
(* src-shift
,bitsize
)))))
569 (declare (type word value
))
570 (%set-vector-raw-bits dst dst-word-offset value
)
571 (decf dst-word-offset
))))
572 ,@(unless (= bytes-per-word
1)
573 `((unless (zerop dst-byte-offset
)
574 (if (> src-byte-offset dst-byte-offset
)
576 (setf next prev prev
0))
577 (let ((mask (end-mask (* (- dst-byte-offset
) ,bitsize
)))
578 (orig (%vector-raw-bits dst dst-word-offset
))
579 (value (word-logical-or
580 (shift-towards-start prev
(* src-shift
,bitsize
))
581 (shift-towards-end next
(* (- src-shift
) ,bitsize
)))))
582 (declare (type word mask orig value
))
583 (%set-vector-raw-bits dst dst-word-offset
584 (word-logical-or (word-logical-and value mask
)
585 (word-logical-andc2 orig mask
)))))))))))))))))
588 ;; common uses for unary-byte-bashing
589 (defun ,array-copy-name
(src src-offset dst dst-offset length
)
590 (declare (type index src-offset dst-offset length
))
591 (locally (declare (optimize (speed 3) (safety 1)))
592 (,unary-bash-name src src-offset dst dst-offset length
))))))))
594 ;;; We would normally do this with a MACROLET, but then we run into
595 ;;; problems with the lexical environment being too hairy for the
596 ;;; cross-compiler and it cannot inline the basic basher functions.
597 #.
(loop for i
= 1 then
(* i
2)
598 collect
`(!define-byte-bashers
,i
) into bashers
599 until
(= i n-word-bits
)
600 finally
(return `(progn ,@bashers
)))
602 (defmacro !define-constant-byte-bashers
(bitsize type value-transformer
&optional
(name type
))
603 (let ((constant-bash-name (intern (format nil
"CONSTANT-UB~D-BASH" bitsize
) (find-package "SB-KERNEL")))
604 (array-fill-name (intern (format nil
"UB~D-BASH-FILL-WITH-~A" bitsize name
) (find-package "SB-KERNEL"))))
606 (defknown ,array-fill-name
(,type simple-unboxed-array index index
)
610 :derive-type
(sb-c::result-type-nth-arg
1))
611 (defun ,array-fill-name
(value dst dst-offset length
)
612 (declare (type ,type value
) (type index dst-offset length
))
613 (declare (optimize (speed 3) (safety 1)))
614 (,constant-bash-name dst dst-offset length
(,value-transformer value
))
619 ,@(loop for n-bits
= 1 then
(* n-bits
2)
620 until
(= n-bits n-word-bits
)
622 `(!define-constant-byte-bashers
,n-bits
623 (unsigned-byte ,n-bits
)
625 ,@(loop for i
= n-bits then
(* 2 i
)
626 until
(= i sb-vm
:n-word-bits
)
628 `(setf value
(dpb value
(byte ,i
,i
) value
))))
629 ,(format nil
"UB~A" n-bits
))
631 `(!define-constant-byte-bashers
,n-bits
632 (signed-byte ,n-bits
)
634 (let ((value (ldb (byte ,n-bits
0) value
)))
635 ,@(loop for i
= n-bits then
(* 2 i
)
636 until
(= i sb-vm
:n-word-bits
)
638 `(setf value
(dpb value
(byte ,i
,i
) value
)))))
639 ,(format nil
"SB~A" n-bits
)))
640 (!define-constant-byte-bashers
,n-word-bits
641 (signed-byte ,n-word-bits
)
643 (ldb (byte ,n-word-bits
0) value
))
644 ,(format nil
"SB~A" n-word-bits
)))))
647 (!define-constant-byte-bashers
#.n-word-bits
650 (ldb (byte #.n-word-bits
0) (ash value n-fixnum-tag-bits
))))
652 (!define-constant-byte-bashers
32
655 (let ((bits (ldb (byte 32 0) (single-float-bits value
))))
657 (dpb bits
(byte 32 32) bits
)
662 (!define-constant-byte-bashers
64
665 (ldb (byte 64 0) (double-float-bits value
))))
668 (!define-constant-byte-bashers
64
669 (complex single-float
)
672 (logior (ash (ldb (byte 32 0)
673 (single-float-bits (realpart item
))) 32)
675 (single-float-bits (imagpart item
))))
677 (logior (ash (ldb (byte 32 0)
678 (single-float-bits (imagpart item
))) 32)
680 (single-float-bits (realpart item
)))))
681 complex-single-float
)
684 ;;;; Bashing-Style search for bits
686 ;;;; Similar search would work well for base-strings as well.
687 ;;;; (Technically for all unboxed sequences of sub-word size elements,
688 ;;;; but somehow I doubt eg. octet vectors get POSITION or FIND used
689 ;;;; as much on them.)
690 (defconstant +bit-position-base-mask
+ (1- n-word-bits
))
691 (defconstant +bit-position-base-shift
+ (integer-length +bit-position-base-mask
+))
692 (macrolet ((compute-start-mask (index)
693 `(let ((first-bits (logand ,index
+bit-position-base-mask
+)))
694 #+little-endian
(ash -
1 first-bits
)
695 #+big-endian
(lognot (ash -
1 (- n-word-bits first-bits
)))))
696 (compute-end-mask (index)
697 `(let ((last-bits (logand ,index
+bit-position-base-mask
+)))
698 #+little-endian
(lognot (ash -
1 last-bits
))
699 #+big-endian
(logand (ash -
1 (- n-word-bits last-bits
))
700 most-positive-word
)))
701 (calc-index (bit-index)
702 `(logior (the index
,bit-index
)
704 (ash word-index
+bit-position-base-shift
+))))
705 (def (name from-end frob
)
706 `(defun ,name
(vector start end
)
707 (declare (simple-bit-vector vector
)
709 (optimize (speed 3) (safety 0)))
710 ;; The END parameter is an exclusive limit as is customary.
711 ;; It's somewhat subjective whether the algorithm below
712 ;; would become simpler by subtracting 1 from END initially.
713 (let* ((first-word (ash start
(- +bit-position-base-shift
+)))
714 (last-word (ash end
(- +bit-position-base-shift
+)))
715 ;; These mask out everything but the interesting parts.
716 (start-mask (compute-start-mask start
))
717 (end-mask (compute-end-mask end
)))
718 (declare (index last-word first-word
))
719 (flet ((#+little-endian start-bit
#+big-endian end-bit
(x)
722 (truly-the (mod #.n-word-bits
)
723 (%primitive unsigned-word-find-first-bit x
))
725 (- #+big-endian n-word-bits
726 (integer-length (logand x
(- x
)))
728 (#+little-endian end-bit
#+big-endian start-bit
(x)
730 (- #+big-endian n-word-bits
734 (,@frob
(%vector-raw-bits vector offset
))))
735 (declare (inline start-bit end-bit get-word
))
737 (unless (< first-word last-word
)
738 ;; Both masks pertain to a single word. This also catches
739 ;; START = END. In that case the masks have no bits in common.
741 (let ((mask (logand start-mask end-mask
)))
743 (let ((word (logand mask
(get-word first-word
))))
745 (let ((word-index first-word
)) ; for the macro to see
747 `(calc-index (end-bit word
))
748 `(calc-index (start-bit word
))))))))))
750 ;; Since the start and end words differ, there is no word
751 ;; to which both masks pertain.
752 ;; We use a fairly traditional algorithm:
753 ;; (1) scan some number (0 <= N <= n-word-bits) of bits initially,
754 ;; (2) then a whole number of intervening words,
755 ;; (3) then some number (0 < N < n-word-bits) of trailing bits
756 ;; Steps (1) and (3) use the START and END masks respectively.
757 ;; The START mask has between 1 and N-WORD-BITS (inclusive) consecutive
758 ;; 1s, starting from the appropriate end.
759 ;; END-MASK instead of getting all 1s in the limiting case,
760 ;; gets all 0s, and a LAST-WORD value that is 1 too high
761 ;; which is semantically correct - it is an "inclusive" limit
762 ;; of a word in which no bits should be examined.
763 ;; When that occurs, we avoid reading the final word
764 ;; to avoid a buffer overrun bug.
768 `(let ((word-index last-word
)) ; trailing chunk
769 (declare (index word-index
))
770 (unless (zerop end-mask
)
771 ;; If no bits are set, then this is off the end of the subsequence.
772 ;; Do not read the word at all.
773 (let ((word (logand end-mask
(get-word word-index
))))
775 (return-from ,name
(calc-index (end-bit word
))))))
778 (loop while
(> word-index first-word
) ; might execute 0 times
779 do
(let ((word (get-word word-index
)))
781 (return-from ,name
(calc-index (end-bit word
)))))
783 ;; leading chunk - always executed
784 (let ((word (logand start-mask
(get-word first-word
))))
786 (calc-index (end-bit word
)))))
789 `(let* ((word-index first-word
)
790 (word (logand start-mask
(get-word word-index
))))
791 (declare (index word-index
))
793 (return-from ,name
(calc-index (start-bit word
))))
795 ;; Scan full words up to but excluding LAST-WORD
796 (loop while
(< word-index last-word
) ; might execute 0 times
797 do
(let ((word (get-word word-index
)))
799 (return-from ,name
(calc-index (start-bit word
)))))
801 ;; Scan last word unless no bits in mask
802 (unless (zerop end-mask
)
803 (let ((word (logand end-mask
(get-word word-index
))))
805 (calc-index (start-bit word
))))))))))))
807 (defun run-bit-position-assertions ()
808 ;; Check the claim in the comment at "(unless (< first-word last-word)"
809 (loop for i from
0 to
(* 2 n-word-bits
)
810 do
(let ((start-mask (compute-start-mask i
))
811 (end-mask (compute-end-mask i
)))
812 (assert (= (logand start-mask end-mask
) 0)))))
814 (def %bit-pos-fwd
/1 nil
(identity))
815 (def %bit-pos-rev
/1 t
(identity))
816 (def %bit-pos-fwd
/0 nil
(logandc2 most-positive-word
))
817 (def %bit-pos-rev
/0 t
(logandc2 most-positive-word
)))
819 ;; Known direction, unknown item to find
820 (defun %bit-pos-fwd
(bit vector start end
)
822 (0 (%bit-pos-fwd
/0 vector start end
))
823 (1 (%bit-pos-fwd
/1 vector start end
))
825 (defun %bit-pos-rev
(bit vector start end
)
827 (0 (%bit-pos-rev
/0 vector start end
))
828 (1 (%bit-pos-rev
/1 vector start end
))
831 ;; Known item to find, unknown direction
832 (declaim (maybe-inline %bit-position
/0 %bit-position
/1))
833 (defun %bit-position
/0 (vector from-end start end
)
835 (%bit-pos-rev
/0 vector start end
)
836 (%bit-pos-fwd
/0 vector start end
)))
837 (defun %bit-position
/1 (vector from-end start end
)
839 (%bit-pos-rev
/1 vector start end
)
840 (%bit-pos-fwd
/1 vector start end
)))
842 (defun %bit-position
(bit vector from-end start end
)
843 (declare (inline %bit-position
/0 %bit-position
/1))
845 (0 (%bit-position
/0 vector from-end start end
))
846 (1 (%bit-position
/1 vector from-end start end
))
848 (clear-info :function
:inlinep
'%bit-position
/0)
849 (clear-info :function
:inlinep
'%bit-position
/1)
851 ;;; These are needed ASAP (in target-unicode)
852 (defun shift-towards-start (number count
) (shift-towards-start number count
))
853 (defun shift-towards-end (number count
) (shift-towards-end number count
))
855 (run-bit-position-assertions)