Fix cross build.
[sbcl.git] / src / code / bit-bash.lisp
blobc2ee332f09ac4dde9e1cc47218857aee1603cd10
1 ;;;; functions to implement bitblt-ish operations
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-VM")
14 ;;;; support routines
16 (declaim (inline start-mask end-mask))
18 ;;; Produce a mask that contains 1's for the COUNT "start" bits and
19 ;;; 0's for the remaining "end" bits. Only the lower 5 bits of COUNT
20 ;;; are significant (KLUDGE: because of hardwired implicit dependence
21 ;;; on 32-bit word size -- WHN 2001-03-19).
22 (defun start-mask (count)
23 (declare (fixnum count))
24 (shift-towards-start most-positive-word (- count)))
26 ;;; Produce a mask that contains 1's for the COUNT "end" bits and 0's
27 ;;; for the remaining "start" bits. Only the lower 5 bits of COUNT are
28 ;;; significant (KLUDGE: because of hardwired implicit dependence on
29 ;;; 32-bit word size -- WHN 2001-03-19).
30 (defun end-mask (count)
31 (declare (fixnum count))
32 (shift-towards-end most-positive-word (- count)))
35 ;;; the actual bashers and common uses of same
37 (defconstant min-bytes-c-call-threshold
38 ;; mostly just guessing here
39 #+(or x86 x86-64 ppc ppc64) 128
40 #-(or x86 x86-64 ppc ppc64) 256)
42 (defmacro verify-src/dst-bits-per-elt (source destination expect-bits-per-element)
43 (declare (ignorable source destination expect-bits-per-element))
44 #+(and sb-devel (not sb-devel-no-errors))
45 `(let ((src-bits-per-element
46 (ash 1 (aref #.%%simple-array-n-bits-shifts%%
47 (%other-pointer-widetag ,source))))
48 (dst-bits-per-element
49 (ash 1 (aref #.%%simple-array-n-bits-shifts%%
50 (%other-pointer-widetag ,destination)))))
51 (when (or (/= src-bits-per-element ,expect-bits-per-element)
52 (/= dst-bits-per-element ,expect-bits-per-element))
53 ;; Why enforce this: because since the arrays are lisp objects
54 ;; maybe we can be clever "somehow" (I'm not sure how)
55 ;; and/or maybe we have to unpoison the memory for #+ubsan.
56 ;; Whereas BYTE-BLT takes SAPs (and/or arrays) and so it has to
57 ;; be more strictly like memmove(). Because it is exactly that.
58 (error "Misuse of bash-copy: bits-per-elt=~D but src=~d and dst=~d"
59 ,expect-bits-per-element src-bits-per-element dst-bits-per-element))))
61 ;;; 1, 2, 4, and 8 bytes per element can be handled with memmove()
62 ;;; or, if it's easy enough, a loop over VECTOR-RAW-BITS.
63 (defmacro define-byte-blt-copier
64 (bytes-per-element
65 &aux (bits-per-element (* bytes-per-element 8))
66 (vtype `(simple-array (unsigned-byte ,bits-per-element) (*)))
67 (elements-per-word (/ n-word-bytes bytes-per-element))
68 (always-call-out-p ; memmove() is _always_ asymptotically faster than this
69 ;; code, which can't make any use of vectorization that C libraries
70 ;; typically do. It's a question of the overhead of a C call.
71 `(>= nelements ,(/ min-bytes-c-call-threshold bytes-per-element))))
72 (flet ((backward-p ()
73 ;; Iterate backwards if there is overlap and byte transfer is toward higher
74 ;; addresses. Technically (> dst-start src-start) is a necessary
75 ;; but not sufficient condition for overlap, but it's fine.
76 '(and (eq src dst) (> dst-start src-start)))
77 (down ()
78 ;; We could reduce the number of loop variables by 1 by computing
79 ;; the distance between src-start and dst-start, and adding it in
80 ;; to each array reference. Probably it would be worse though.
81 '(do ((dst-index (the (or (eql -1) index) (+ dst-start nwords -1))
82 (1- dst-index))
83 (src-index (the (or (eql -1) index) (+ src-start nwords -1))
84 (1- src-index)))
85 ((< dst-index dst-start))
86 (declare (type (or (eql -1) index) dst-index src-index))
87 ;; Assigning into SRC is right, because DST and SRC are the same array.
88 ;; We don't need "both" arrays to be in registers.
89 (%set-vector-raw-bits src dst-index
90 (%vector-raw-bits src (the index src-index)))))
91 (up ()
92 '(do ((dst-index dst-start (the index (1+ dst-index)))
93 (src-index src-start (the index (1+ src-index))))
94 ((>= dst-index dst-end))
95 (%set-vector-raw-bits dst dst-index (%vector-raw-bits src src-index))))
96 (use-memmove ()
97 ;; %BYTE-BLT wants the end as an index, which it converts back to a count
98 ;; by subtracting the start. Regardless, the args are way too confusing,
99 ;; so let's go directly to memmove. Cribbed from (DEFTRANSFORM %BYTE-BLT)
100 `(with-pinned-objects (dst src)
101 (memmove (sap+ (vector-sap (the ,vtype dst))
102 (the signed-word (* dst-start ,bytes-per-element)))
103 (sap+ (vector-sap (the ,vtype src))
104 (the signed-word (* src-start ,bytes-per-element)))
105 (the word (* nelements ,bytes-per-element))))))
106 ;; The arguments are array element indices.
107 `(defun ,(intern (format nil "UB~D-BASH-COPY" bits-per-element)
108 (find-package "SB-KERNEL"))
109 (src src-start dst dst-start nelements)
110 (declare (type index src-start dst-start nelements))
111 (verify-src/dst-bits-per-elt src dst ,bits-per-element)
112 (locally
113 (declare (optimize (safety 0)
114 (sb-c::alien-funcall-saves-fp-and-pc 0)))
115 #+cheneygc (when (> nelements 0)
116 ;; cheneygc can't handle a WP fault in memcpy()
117 ;; because "if(!foreign_function_call_active ..."
118 (let ((last (truly-the index (+ dst-start (1- nelements)))))
119 (data-vector-set (truly-the ,vtype dst) last
120 (data-vector-ref (truly-the ,vtype dst) last))))
121 ,(if (= bytes-per-element sb-vm:n-word-bytes)
122 `(if ,always-call-out-p
123 ,(use-memmove)
124 (let ((nwords nelements))
125 (if ,(backward-p)
126 ,(down)
127 (let ((dst-end (the index (+ dst-start nelements))))
128 ,(up)))))
129 `(let ((dst-subword (mod dst-start ,elements-per-word))
130 (src-subword (mod src-start ,elements-per-word))
131 (dst (truly-the ,vtype dst))
132 (src (truly-the ,vtype src)))
133 (cond ((or ,always-call-out-p
134 (/= dst-subword src-subword)) ; too complicated
135 ,(use-memmove))
136 (,(backward-p)
137 ;; Using the primitive-type-specific data-vector-set,
138 ;; process at most (1- ELEMENTS-PER-WORD) elements
139 ;; until aligned to a word.
140 (let ((dst-end (+ dst-start nelements))
141 (src-end (+ src-start nelements))
142 (original-nelements nelements))
143 ,@(let (initial)
144 (loop for i downfrom (- elements-per-word 1)
145 repeat (1- elements-per-word)
146 do (setq initial
147 ;; Test NELEMENTS first because it should be in a register
148 ;; from the preceding DECF.
149 `((when (and (/= nelements 0)
150 (logtest dst-end ,(1- elements-per-word)))
151 (data-vector-set dst (1- dst-end)
152 (data-vector-ref src (- src-end ,i)))
153 (decf (the index dst-end))
154 (decf (the index nelements))
155 ,@initial))))
156 initial)
157 (decf src-end (the (mod 8) (- original-nelements nelements)))
158 ;; Now DST-END and SRC-END are element indices that start a word.
159 ;; Scan backwards by whole words.
160 (let ((nwords (truncate nelements ,elements-per-word)))
161 (when (plusp nwords)
162 ;; Convert to word indices
163 (let* ((dst-start (- (truncate dst-end ,elements-per-word) nwords))
164 (src-start (- (truncate src-end ,elements-per-word) nwords)))
165 ,(down))
166 (decf (the index dst-end) (* nwords ,elements-per-word))
167 (decf (the index src-end) (* nwords ,elements-per-word))
168 (decf nelements (* nwords ,elements-per-word))))
169 ;; If there are elements remaining after the last full word copied,
170 ;; process element by element.
171 ,@(let (final)
172 (loop for i from (1- elements-per-word) downto 1
173 do (setq final
174 `((unless (= nelements 0)
175 (data-vector-set
176 dst (- dst-end ,i)
177 (data-vector-ref src (- src-end ,i)))
178 ,@(unless (= i (1- elements-per-word))
179 '((decf (the index nelements))))
180 ,@final))))
181 final)))
183 ;; Same as above
184 (let ((original-nelements nelements))
185 ,@(let (initial)
186 (loop for i downfrom (- elements-per-word 2)
187 repeat (1- elements-per-word)
188 do (setq initial
189 `((when (and (/= nelements 0)
190 (logtest dst-start ,(1- elements-per-word)))
191 (data-vector-set
192 dst dst-start
193 (data-vector-ref src (+ src-start ,i)))
194 (incf (the index dst-start))
195 (decf (the index nelements))
196 ,@initial))))
197 initial)
198 (incf (the index src-start) (- original-nelements nelements)))
199 (let ((nwords (truncate nelements ,elements-per-word)))
200 (when (plusp nwords)
201 (let* ((src-start (truncate src-start ,elements-per-word))
202 (dst-start (truncate dst-start ,elements-per-word))
203 (dst-end (the index (+ dst-start nwords))))
204 ,(up))
205 (incf dst-start (* nwords ,elements-per-word))
206 (incf src-start (* nwords ,elements-per-word))
207 (decf nelements (* nwords ,elements-per-word))))
208 ;; Same as above
209 ,@(let (final)
210 (loop for i from (- elements-per-word 2) downto 0
211 do (setq final
212 `((unless (= nelements 0)
213 (data-vector-set
214 dst (+ dst-start ,i)
215 (data-vector-ref src (+ src-start ,i)))
216 ,@(unless (= i (- elements-per-word 2))
217 '((decf (the index nelements))))
218 ,@final))))
219 final)))))
220 (values)))))
222 (define-byte-blt-copier 1)
223 (define-byte-blt-copier 2)
224 (define-byte-blt-copier 4)
225 #+64-bit (define-byte-blt-copier 8)
227 ;;; We cheat a little bit by using TRULY-THE in the copying function to
228 ;;; force the compiler to generate good code in the (= BITSIZE
229 ;;; N-WORD-BITS) case. We don't use TRULY-THE in the other cases
230 ;;; to give the compiler freedom to generate better code.
231 (defmacro !define-byte-bashers (bitsize)
232 (let* ((bytes-per-word (/ n-word-bits bitsize))
233 (byte-offset `(integer 0 (,bytes-per-word)))
234 (word-offset `(integer 0 ,(ceiling array-dimension-limit bytes-per-word)))
235 (constant-bash-name (intern (format nil "CONSTANT-UB~D-BASH" bitsize) (find-package "SB-KERNEL")))
236 (array-fill-name (intern (format nil "UB~D-BASH-FILL" bitsize) (find-package "SB-KERNEL")))
237 (unary-bash-name (intern (format nil "UNARY-UB~D-BASH" bitsize) (find-package "SB-KERNEL")))
238 (array-copy-name (intern (format nil "UB~D-BASH-COPY" bitsize) (find-package "SB-KERNEL"))))
239 `(progn
240 (declaim (inline ,constant-bash-name))
241 ;; Fill DST with VALUE starting at DST-OFFSET and continuing
242 ;; for LENGTH bytes (however bytes are defined).
243 (defun ,constant-bash-name (dst dst-offset length value)
244 (declare (type word value) (type index dst-offset length))
245 (multiple-value-bind (dst-word-offset dst-byte-offset)
246 (floor dst-offset ,bytes-per-word)
247 (declare (type ,word-offset dst-word-offset)
248 (type ,byte-offset dst-byte-offset))
249 (multiple-value-bind (n-words final-bytes)
250 (floor (+ dst-byte-offset length) ,bytes-per-word)
251 (declare (type ,word-offset n-words)
252 (type ,byte-offset final-bytes))
253 (if (zerop n-words)
254 ,(unless (= bytes-per-word 1)
255 `(unless (zerop length)
256 (%set-vector-raw-bits dst dst-word-offset
257 (if (>= length ,bytes-per-word)
258 value
259 (let ((mask (shift-towards-end
260 (start-mask (* length ,bitsize))
261 (* dst-byte-offset ,bitsize))))
262 (word-logical-or (word-logical-and value mask)
263 (word-logical-andc2 (%vector-raw-bits dst dst-word-offset)
264 mask)))))))
265 (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
266 ,@(unless (= bytes-per-word 1)
267 `((unless (zerop dst-byte-offset)
268 (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize))))
269 (%set-vector-raw-bits dst dst-word-offset
270 (word-logical-or (word-logical-and value mask)
271 (word-logical-andc2 (%vector-raw-bits dst dst-word-offset)
272 mask))))
273 (incf dst-word-offset))))
274 (let ((end (+ dst-word-offset interior)))
275 (declare (type ,word-offset end))
276 (do ()
277 ((>= dst-word-offset end))
278 (%set-vector-raw-bits dst dst-word-offset value)
279 (incf dst-word-offset)))
280 #+nil
281 (dotimes (i interior)
282 (%set-vector-raw-bits dst dst-word-offset value)
283 (incf dst-word-offset))
284 ,@(unless (= bytes-per-word 1)
285 `((unless (zerop final-bytes)
286 (let ((mask (start-mask (* final-bytes ,bitsize))))
287 (%set-vector-raw-bits dst dst-word-offset
288 (word-logical-or (word-logical-and value mask)
289 (word-logical-andc2 (%vector-raw-bits dst dst-word-offset)
290 mask)))))))))))
291 (values))
293 ;; common uses for constant-byte-bashing
294 (defknown ,array-fill-name (word simple-unboxed-array index index)
295 simple-unboxed-array
297 :result-arg 1
298 :derive-type (sb-c::result-type-nth-arg 1))
299 (defun ,array-fill-name (value dst dst-offset length)
300 (declare (type word value) (type index dst-offset length))
301 (declare (optimize (speed 3) (safety 1)))
302 (,constant-bash-name dst dst-offset length value)
303 dst)
305 ;; Copying. Never use this for 8, 16, 32, 64
306 ,@(when (member bitsize '(1 2 4))
307 `((declaim (inline ,unary-bash-name))
308 (defun ,unary-bash-name (src src-offset dst dst-offset length)
309 (declare (type index src-offset dst-offset length))
310 (verify-src/dst-bits-per-elt src dst ,bitsize)
311 (multiple-value-bind (dst-word-offset dst-byte-offset)
312 (floor dst-offset ,bytes-per-word)
313 (declare (type ,word-offset dst-word-offset)
314 (type ,byte-offset dst-byte-offset))
315 (multiple-value-bind (src-word-offset src-byte-offset)
316 (floor src-offset ,bytes-per-word)
317 (declare (type ,word-offset src-word-offset)
318 (type ,byte-offset src-byte-offset))
319 (cond
320 ((<= (+ dst-byte-offset length) ,bytes-per-word)
321 ;; We are only writing one word, so it doesn't matter what
322 ;; order we do it in. But we might be reading from
323 ;; multiple words, so take care.
324 (cond
325 ((zerop length)
326 ;; We're not writing anything. This is really easy.
328 ((>= length ,bytes-per-word)
329 ;; DST-BYTE-OFFSET must be equal to zero, or we would be
330 ;; writing multiple words. If SRC-BYTE-OFFSET is also zero,
331 ;; the we just transfer the single word. Otherwise we have
332 ;; to extract bytes from two source words.
333 (%set-vector-raw-bits dst dst-word-offset
334 (cond
335 ((zerop src-byte-offset)
336 (%vector-raw-bits src src-word-offset))
337 ,@(unless (= bytes-per-word 1)
338 `((t (word-logical-or (shift-towards-start
339 (%vector-raw-bits src src-word-offset)
340 (* src-byte-offset ,bitsize))
341 (shift-towards-end
342 (%vector-raw-bits src (1+ src-word-offset))
343 (* (- src-byte-offset) ,bitsize)))))))))
344 ,@(unless (= bytes-per-word 1)
345 `((t
346 ;; We are only writing some portion of the destination word.
347 ;; We still don't know whether we need one or two source words.
348 (let ((mask (shift-towards-end (start-mask (* length ,bitsize))
349 (* dst-byte-offset ,bitsize)))
350 (orig (%vector-raw-bits dst dst-word-offset))
351 (value (if (> src-byte-offset dst-byte-offset)
352 ;; The source starts further
353 ;; into the word than does the
354 ;; destination, so the source
355 ;; could extend into the next
356 ;; word. If it does, we have
357 ;; to merge the two words, and
358 ;; it not, we can just shift
359 ;; the first word.
360 (let ((src-byte-shift (- src-byte-offset
361 dst-byte-offset)))
362 (if (> (+ src-byte-offset length) ,bytes-per-word)
363 (word-logical-or
364 (shift-towards-start
365 (%vector-raw-bits src src-word-offset)
366 (* src-byte-shift ,bitsize))
367 (shift-towards-end
368 (%vector-raw-bits src (1+ src-word-offset))
369 (* (- src-byte-shift) ,bitsize)))
370 (shift-towards-start (%vector-raw-bits src src-word-offset)
371 (* src-byte-shift ,bitsize))))
372 ;; The destination starts further
373 ;; into the word than does the
374 ;; source, so we know the source
375 ;; cannot extend into a second
376 ;; word (or else the destination
377 ;; would too, and we wouldn't be
378 ;; in this branch).
379 (shift-towards-end
380 (%vector-raw-bits src src-word-offset)
381 (* (- dst-byte-offset src-byte-offset) ,bitsize)))))
382 (declare (type word mask orig value))
383 (%set-vector-raw-bits dst dst-word-offset
384 (word-logical-or (word-logical-and value mask)
385 (word-logical-andc2 orig mask)))))))))
386 ((= src-byte-offset dst-byte-offset)
387 ;; The source and destination are aligned, so shifting
388 ;; is unnecessary. But we have to pick the direction
389 ;; of the copy in case the source and destination are
390 ;; really the same object.
391 (multiple-value-bind (words final-bytes)
392 (floor (+ dst-byte-offset length) ,bytes-per-word)
393 (declare (type ,word-offset words)
394 (type ,byte-offset final-bytes))
395 (let ((interior (floor (- length final-bytes) ,bytes-per-word)))
396 (declare (type ,word-offset interior))
397 (cond
398 ((<= dst-offset src-offset)
399 ;; We need to loop from left to right.
400 ,@(unless (= bytes-per-word 1)
401 `((unless (zerop dst-byte-offset)
402 ;; We are only writing part of the first word, so mask
403 ;; off the bytes we want to preserve.
404 (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
405 (orig (%vector-raw-bits dst dst-word-offset))
406 (value (%vector-raw-bits src src-word-offset)))
407 (declare (type word mask orig value))
408 (%set-vector-raw-bits dst dst-word-offset
409 (word-logical-or (word-logical-and value mask)
410 (word-logical-andc2 orig mask))))
411 (incf src-word-offset)
412 (incf dst-word-offset))))
413 ;; Copy the interior words.
414 (let ((end ,(if (= bytes-per-word 1)
415 `(truly-the ,word-offset
416 (+ dst-word-offset interior))
417 `(+ dst-word-offset interior))))
418 (declare (type ,word-offset end))
419 (do ()
420 ((>= dst-word-offset end))
421 (%set-vector-raw-bits dst dst-word-offset
422 (%vector-raw-bits src src-word-offset))
423 ,(if (= bytes-per-word 1)
424 `(setf src-word-offset (truly-the ,word-offset (+ src-word-offset 1)))
425 `(incf src-word-offset))
426 (incf dst-word-offset)))
427 ,@(unless (= bytes-per-word 1)
428 `((unless (zerop final-bytes)
429 ;; We are only writing part of the last word.
430 (let ((mask (start-mask (* final-bytes ,bitsize)))
431 (orig (%vector-raw-bits dst dst-word-offset))
432 (value (%vector-raw-bits src src-word-offset)))
433 (declare (type word mask orig value))
434 (%set-vector-raw-bits dst dst-word-offset
435 (word-logical-or (word-logical-and value mask)
436 (word-logical-andc2 orig mask))))))))
438 ;; We need to loop from right to left.
439 ,(if (= bytes-per-word 1)
440 `(setf dst-word-offset (truly-the ,word-offset
441 (+ dst-word-offset words)))
442 `(incf dst-word-offset words))
443 ,(if (= bytes-per-word 1)
444 `(setf src-word-offset (truly-the ,word-offset
445 (+ src-word-offset words)))
446 `(incf src-word-offset words))
447 ,@(unless (= bytes-per-word 1)
448 `((unless (zerop final-bytes)
449 (let ((mask (start-mask (* final-bytes ,bitsize)))
450 (orig (%vector-raw-bits dst dst-word-offset))
451 (value (%vector-raw-bits src src-word-offset)))
452 (declare (type word mask orig value))
453 (%set-vector-raw-bits dst dst-word-offset
454 (word-logical-or (word-logical-and value mask)
455 (word-logical-andc2 orig mask)))))))
456 (let ((end (- dst-word-offset interior)))
457 (do ()
458 ((<= dst-word-offset end))
459 (decf src-word-offset)
460 (decf dst-word-offset)
461 (%set-vector-raw-bits dst dst-word-offset
462 (%vector-raw-bits src src-word-offset))))
463 ,@(unless (= bytes-per-word 1)
464 `((unless (zerop dst-byte-offset)
465 ;; We are only writing part of the last word.
466 (decf src-word-offset)
467 (decf dst-word-offset)
468 (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
469 (orig (%vector-raw-bits dst dst-word-offset))
470 (value (%vector-raw-bits src src-word-offset)))
471 (declare (type word mask orig value))
472 (%set-vector-raw-bits dst dst-word-offset
473 (word-logical-or (word-logical-and value mask)
474 (word-logical-andc2 orig mask))))))))))))
476 ;; Source and destination are not aligned.
477 (multiple-value-bind (words final-bytes)
478 (floor (+ dst-byte-offset length) ,bytes-per-word)
479 (declare (type ,word-offset words)
480 (type ,byte-offset final-bytes))
481 (let ((src-shift (mod (- src-byte-offset dst-byte-offset)
482 ,bytes-per-word))
483 (interior (floor (- length final-bytes) ,bytes-per-word)))
484 (declare (type ,word-offset interior)
485 (type ,byte-offset src-shift))
486 (cond
487 ((<= dst-offset src-offset)
488 ;; We need to loop from left to right.
489 (let ((prev 0)
490 (next (%vector-raw-bits src src-word-offset)))
491 (declare (type word prev next))
492 (flet ((get-next-src ()
493 (setf prev next)
494 (setf next (%vector-raw-bits src
495 (incf src-word-offset)))))
496 (declare (inline get-next-src))
497 ,@(unless (= bytes-per-word 1)
498 `((unless (zerop dst-byte-offset)
499 (when (> src-byte-offset dst-byte-offset)
500 (get-next-src))
501 (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
502 (orig (%vector-raw-bits dst dst-word-offset))
503 (value (word-logical-or (shift-towards-start prev (* src-shift ,bitsize))
504 (shift-towards-end next (* (- src-shift) ,bitsize)))))
505 (declare (type word mask orig value))
506 (%set-vector-raw-bits dst dst-word-offset
507 (word-logical-or (word-logical-and value mask)
508 (word-logical-andc2 orig mask))))
509 (incf dst-word-offset))))
510 (let ((end (+ dst-word-offset interior)))
511 (declare (type ,word-offset end))
512 (do ()
513 ((>= dst-word-offset end))
514 (get-next-src)
515 (let ((value (word-logical-or
516 (shift-towards-end next (* (- src-shift) ,bitsize))
517 (shift-towards-start prev (* src-shift ,bitsize)))))
518 (declare (type word value))
519 (%set-vector-raw-bits dst dst-word-offset value)
520 (incf dst-word-offset))))
521 ,@(unless (= bytes-per-word 1)
522 `((unless (zerop final-bytes)
523 (let ((value
524 (if (> (+ final-bytes src-shift) ,bytes-per-word)
525 (progn
526 (get-next-src)
527 (word-logical-or
528 (shift-towards-end next (* (- src-shift) ,bitsize))
529 (shift-towards-start prev (* src-shift ,bitsize))))
530 (shift-towards-start next (* src-shift ,bitsize))))
531 (mask (start-mask (* final-bytes ,bitsize)))
532 (orig (%vector-raw-bits dst dst-word-offset)))
533 (declare (type word mask orig value))
534 (%set-vector-raw-bits dst dst-word-offset
535 (word-logical-or (word-logical-and value mask)
536 (word-logical-andc2 orig mask))))))))))
538 ;; We need to loop from right to left.
539 (incf dst-word-offset words)
540 (incf src-word-offset (1- (ceiling (+ src-byte-offset length) ,bytes-per-word)))
541 (let ((next 0)
542 (prev (%vector-raw-bits src src-word-offset)))
543 (declare (type word prev next))
544 (flet ((get-next-src ()
545 (setf next prev)
546 (setf prev (%vector-raw-bits src (decf src-word-offset)))))
547 (declare (inline get-next-src))
548 ,@(unless (= bytes-per-word 1)
549 `((unless (zerop final-bytes)
550 (when (> final-bytes (- ,bytes-per-word src-shift))
551 (get-next-src))
552 (let ((value (word-logical-or
553 (shift-towards-end next (* (- src-shift) ,bitsize))
554 (shift-towards-start prev (* src-shift ,bitsize))))
555 (mask (start-mask (* final-bytes ,bitsize)))
556 (orig (%vector-raw-bits dst dst-word-offset)))
557 (declare (type word mask orig value))
558 (%set-vector-raw-bits dst dst-word-offset
559 (word-logical-or (word-logical-and value mask)
560 (word-logical-andc2 orig mask)))))))
561 (decf dst-word-offset)
562 (let ((end (- dst-word-offset interior)))
563 (do ()
564 ((<= dst-word-offset end))
565 (get-next-src)
566 (let ((value (word-logical-or
567 (shift-towards-end next (* (- src-shift) ,bitsize))
568 (shift-towards-start prev (* src-shift ,bitsize)))))
569 (declare (type word value))
570 (%set-vector-raw-bits dst dst-word-offset value)
571 (decf dst-word-offset))))
572 ,@(unless (= bytes-per-word 1)
573 `((unless (zerop dst-byte-offset)
574 (if (> src-byte-offset dst-byte-offset)
575 (get-next-src)
576 (setf next prev prev 0))
577 (let ((mask (end-mask (* (- dst-byte-offset) ,bitsize)))
578 (orig (%vector-raw-bits dst dst-word-offset))
579 (value (word-logical-or
580 (shift-towards-start prev (* src-shift ,bitsize))
581 (shift-towards-end next (* (- src-shift) ,bitsize)))))
582 (declare (type word mask orig value))
583 (%set-vector-raw-bits dst dst-word-offset
584 (word-logical-or (word-logical-and value mask)
585 (word-logical-andc2 orig mask)))))))))))))))))
586 (values))
588 ;; common uses for unary-byte-bashing
589 (defun ,array-copy-name (src src-offset dst dst-offset length)
590 (declare (type index src-offset dst-offset length))
591 (locally (declare (optimize (speed 3) (safety 1)))
592 (,unary-bash-name src src-offset dst dst-offset length))))))))
594 ;;; We would normally do this with a MACROLET, but then we run into
595 ;;; problems with the lexical environment being too hairy for the
596 ;;; cross-compiler and it cannot inline the basic basher functions.
597 #.(loop for i = 1 then (* i 2)
598 collect `(!define-byte-bashers ,i) into bashers
599 until (= i n-word-bits)
600 finally (return `(progn ,@bashers)))
602 (defmacro !define-constant-byte-bashers (bitsize type value-transformer &optional (name type))
603 (let ((constant-bash-name (intern (format nil "CONSTANT-UB~D-BASH" bitsize) (find-package "SB-KERNEL")))
604 (array-fill-name (intern (format nil "UB~D-BASH-FILL-WITH-~A" bitsize name) (find-package "SB-KERNEL"))))
605 `(progn
606 (defknown ,array-fill-name (,type simple-unboxed-array index index)
607 simple-unboxed-array
609 :result-arg 1
610 :derive-type (sb-c::result-type-nth-arg 1))
611 (defun ,array-fill-name (value dst dst-offset length)
612 (declare (type ,type value) (type index dst-offset length))
613 (declare (optimize (speed 3) (safety 1)))
614 (,constant-bash-name dst dst-offset length (,value-transformer value))
615 dst))))
617 (macrolet ((def ()
618 `(progn
619 ,@(loop for n-bits = 1 then (* n-bits 2)
620 until (= n-bits n-word-bits)
621 collect
622 `(!define-constant-byte-bashers ,n-bits
623 (unsigned-byte ,n-bits)
624 (lambda (value)
625 ,@(loop for i = n-bits then (* 2 i)
626 until (= i sb-vm:n-word-bits)
627 collect
628 `(setf value (dpb value (byte ,i ,i) value))))
629 ,(format nil "UB~A" n-bits))
630 collect
631 `(!define-constant-byte-bashers ,n-bits
632 (signed-byte ,n-bits)
633 (lambda (value)
634 (let ((value (ldb (byte ,n-bits 0) value)))
635 ,@(loop for i = n-bits then (* 2 i)
636 until (= i sb-vm:n-word-bits)
637 collect
638 `(setf value (dpb value (byte ,i ,i) value)))))
639 ,(format nil "SB~A" n-bits)))
640 (!define-constant-byte-bashers ,n-word-bits
641 (signed-byte ,n-word-bits)
642 (lambda (value)
643 (ldb (byte ,n-word-bits 0) value))
644 ,(format nil "SB~A" n-word-bits)))))
645 (def))
647 (!define-constant-byte-bashers #.n-word-bits
648 fixnum
649 (lambda (value)
650 (ldb (byte #.n-word-bits 0) (ash value n-fixnum-tag-bits))))
652 (!define-constant-byte-bashers 32
653 single-float
654 (lambda (value)
655 (let ((bits (ldb (byte 32 0) (single-float-bits value))))
656 #+64-bit
657 (dpb bits (byte 32 32) bits)
658 #-64-bit
659 bits)))
661 #+64-bit
662 (!define-constant-byte-bashers 64
663 double-float
664 (lambda (value)
665 (ldb (byte 64 0) (double-float-bits value))))
667 #+64-bit
668 (!define-constant-byte-bashers 64
669 (complex single-float)
670 (lambda (item)
671 #+big-endian
672 (logior (ash (ldb (byte 32 0)
673 (single-float-bits (realpart item))) 32)
674 (ldb (byte 32 0)
675 (single-float-bits (imagpart item))))
676 #+little-endian
677 (logior (ash (ldb (byte 32 0)
678 (single-float-bits (imagpart item))) 32)
679 (ldb (byte 32 0)
680 (single-float-bits (realpart item)))))
681 complex-single-float)
684 ;;;; Bashing-Style search for bits
685 ;;;;
686 ;;;; Similar search would work well for base-strings as well.
687 ;;;; (Technically for all unboxed sequences of sub-word size elements,
688 ;;;; but somehow I doubt eg. octet vectors get POSITION or FIND used
689 ;;;; as much on them.)
690 (defconstant +bit-position-base-mask+ (1- n-word-bits))
691 (defconstant +bit-position-base-shift+ (integer-length +bit-position-base-mask+))
692 (macrolet ((compute-start-mask (index)
693 `(let ((first-bits (logand ,index +bit-position-base-mask+)))
694 #+little-endian (ash -1 first-bits)
695 #+big-endian (lognot (ash -1 (- n-word-bits first-bits)))))
696 (compute-end-mask (index)
697 `(let ((last-bits (logand ,index +bit-position-base-mask+)))
698 #+little-endian (lognot (ash -1 last-bits))
699 #+big-endian (logand (ash -1 (- n-word-bits last-bits))
700 most-positive-word)))
701 (calc-index (bit-index)
702 `(logior (the index ,bit-index)
703 (truly-the fixnum
704 (ash word-index +bit-position-base-shift+))))
705 (def (name from-end frob)
706 `(defun ,name (vector start end)
707 (declare (simple-bit-vector vector)
708 (index start end)
709 (optimize (speed 3) (safety 0)))
710 ;; The END parameter is an exclusive limit as is customary.
711 ;; It's somewhat subjective whether the algorithm below
712 ;; would become simpler by subtracting 1 from END initially.
713 (let* ((first-word (ash start (- +bit-position-base-shift+)))
714 (last-word (ash end (- +bit-position-base-shift+)))
715 ;; These mask out everything but the interesting parts.
716 (start-mask (compute-start-mask start))
717 (end-mask (compute-end-mask end)))
718 (declare (index last-word first-word))
719 (flet ((#+little-endian start-bit #+big-endian end-bit (x)
720 (declare (word x))
721 #+(or x86-64 x86)
722 (truly-the (mod #.n-word-bits)
723 (%primitive unsigned-word-find-first-bit x))
724 #-(or x86-64 x86)
725 (- #+big-endian n-word-bits
726 (integer-length (logand x (- x)))
727 #+little-endian 1))
728 (#+little-endian end-bit #+big-endian start-bit (x)
729 (declare (word x))
730 (- #+big-endian n-word-bits
731 (integer-length x)
732 #+little-endian 1))
733 (get-word (offset)
734 (,@frob (%vector-raw-bits vector offset))))
735 (declare (inline start-bit end-bit get-word))
737 (unless (< first-word last-word)
738 ;; Both masks pertain to a single word. This also catches
739 ;; START = END. In that case the masks have no bits in common.
740 (return-from ,name
741 (let ((mask (logand start-mask end-mask)))
742 (unless (zerop mask)
743 (let ((word (logand mask (get-word first-word))))
744 (unless (zerop word)
745 (let ((word-index first-word)) ; for the macro to see
746 ,(if from-end
747 `(calc-index (end-bit word))
748 `(calc-index (start-bit word))))))))))
750 ;; Since the start and end words differ, there is no word
751 ;; to which both masks pertain.
752 ;; We use a fairly traditional algorithm:
753 ;; (1) scan some number (0 <= N <= n-word-bits) of bits initially,
754 ;; (2) then a whole number of intervening words,
755 ;; (3) then some number (0 < N < n-word-bits) of trailing bits
756 ;; Steps (1) and (3) use the START and END masks respectively.
757 ;; The START mask has between 1 and N-WORD-BITS (inclusive) consecutive
758 ;; 1s, starting from the appropriate end.
759 ;; END-MASK instead of getting all 1s in the limiting case,
760 ;; gets all 0s, and a LAST-WORD value that is 1 too high
761 ;; which is semantically correct - it is an "inclusive" limit
762 ;; of a word in which no bits should be examined.
763 ;; When that occurs, we avoid reading the final word
764 ;; to avoid a buffer overrun bug.
765 ,(if from-end
767 ;; Reverse scan:
768 `(let ((word-index last-word)) ; trailing chunk
769 (declare (index word-index))
770 (unless (zerop end-mask)
771 ;; If no bits are set, then this is off the end of the subsequence.
772 ;; Do not read the word at all.
773 (let ((word (logand end-mask (get-word word-index))))
774 (unless (zerop word)
775 (return-from ,name (calc-index (end-bit word))))))
776 (decf word-index)
777 ;; middle chunks
778 (loop while (> word-index first-word) ; might execute 0 times
779 do (let ((word (get-word word-index)))
780 (unless (zerop word)
781 (return-from ,name (calc-index (end-bit word)))))
782 (decf word-index))
783 ;; leading chunk - always executed
784 (let ((word (logand start-mask (get-word first-word))))
785 (unless (zerop word)
786 (calc-index (end-bit word)))))
788 ;; Forward scan:
789 `(let* ((word-index first-word)
790 (word (logand start-mask (get-word word-index))))
791 (declare (index word-index))
792 (unless (zerop word)
793 (return-from ,name (calc-index (start-bit word))))
794 (incf word-index)
795 ;; Scan full words up to but excluding LAST-WORD
796 (loop while (< word-index last-word) ; might execute 0 times
797 do (let ((word (get-word word-index)))
798 (unless (zerop word)
799 (return-from ,name (calc-index (start-bit word)))))
800 (incf word-index))
801 ;; Scan last word unless no bits in mask
802 (unless (zerop end-mask)
803 (let ((word (logand end-mask (get-word word-index))))
804 (unless (zerop word)
805 (calc-index (start-bit word))))))))))))
807 (defun run-bit-position-assertions ()
808 ;; Check the claim in the comment at "(unless (< first-word last-word)"
809 (loop for i from 0 to (* 2 n-word-bits)
810 do (let ((start-mask (compute-start-mask i))
811 (end-mask (compute-end-mask i)))
812 (assert (= (logand start-mask end-mask) 0)))))
814 (def %bit-pos-fwd/1 nil (identity))
815 (def %bit-pos-rev/1 t (identity))
816 (def %bit-pos-fwd/0 nil (logandc2 most-positive-word))
817 (def %bit-pos-rev/0 t (logandc2 most-positive-word)))
819 ;; Known direction, unknown item to find
820 (defun %bit-pos-fwd (bit vector start end)
821 (case bit
822 (0 (%bit-pos-fwd/0 vector start end))
823 (1 (%bit-pos-fwd/1 vector start end))
824 (otherwise nil)))
825 (defun %bit-pos-rev (bit vector start end)
826 (case bit
827 (0 (%bit-pos-rev/0 vector start end))
828 (1 (%bit-pos-rev/1 vector start end))
829 (otherwise nil)))
831 ;; Known item to find, unknown direction
832 (declaim (maybe-inline %bit-position/0 %bit-position/1))
833 (defun %bit-position/0 (vector from-end start end)
834 (if from-end
835 (%bit-pos-rev/0 vector start end)
836 (%bit-pos-fwd/0 vector start end)))
837 (defun %bit-position/1 (vector from-end start end)
838 (if from-end
839 (%bit-pos-rev/1 vector start end)
840 (%bit-pos-fwd/1 vector start end)))
842 (defun %bit-position (bit vector from-end start end)
843 (declare (inline %bit-position/0 %bit-position/1))
844 (case bit
845 (0 (%bit-position/0 vector from-end start end))
846 (1 (%bit-position/1 vector from-end start end))
847 (otherwise nil)))
848 (clear-info :function :inlinep '%bit-position/0)
849 (clear-info :function :inlinep '%bit-position/1)
851 ;;; These are needed ASAP (in target-unicode)
852 (defun shift-towards-start (number count) (shift-towards-start number count))
853 (defun shift-towards-end (number count) (shift-towards-end number count))
855 (run-bit-position-assertions)