1 ;;;; allocation VOPs for the x86-64
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
14 ;;;; allocation helpers
16 ;;; Most allocation is done by inline code with sometimes help
17 ;;; from the C alloc() function by way of the alloc-tramp
20 (defun tagify (result base lowtag
)
22 (inst mov result base
)
23 (inst lea result
(ea lowtag base
))))
25 (defun stack-allocation (size lowtag alloc-tn
&optional known-alignedp
)
26 (aver (not (location= alloc-tn rsp-tn
)))
27 (inst sub rsp-tn size
)
28 ;; see comment in x86/macros.lisp implementation of this
29 ;; However that comment seems inapplicable here because:
30 ;; - PAD-DATA-BLOCK quite clearly enforces double-word alignment,
31 ;; contradicting "... unfortunately not enforced by ..."
32 ;; - It's not the job of FIXED-ALLOC to realign anything.
33 ;; - The real issue is that it's not obvious that the stack is
34 ;; 16-byte-aligned at *all* times. Maybe it is, maybe it isn't.
35 (unless known-alignedp
; can skip this AND if we're all good
36 (inst and rsp-tn
#.
(lognot lowtag-mask
)))
37 (tagify alloc-tn rsp-tn lowtag
)
40 (defun alloc-unboxed-p (type)
45 #.double-float-widetag
46 #.complex-single-float-widetag
47 #.complex-double-float-widetag
)
50 ;;; Insert allocation profiler instrumentation
51 (eval-when (:compile-toplevel
)
52 (aver (= thread-tot-bytes-alloc-unboxed-slot
53 (1+ thread-tot-bytes-alloc-boxed-slot
))))
55 ;;; Emit counter increments for SB-APROF. SCRATCH-REGISTERS is either a TN
56 ;;; or list of TNs that can be used to store into the profiling data.
57 ;;; We pick one of the available TNs to use for addressing the data buffer.
58 ;;; The TN that we pick can't be R12 because encoding it into an instruction
59 ;;; always requires a SIB byte, which doesn't fit in the reserved bytes
60 ;;; of the instruction stream where hot patching occurs.
62 ;;; F048FF4018 LOCK INC QWORD PTR [RAX+24]
63 ;;; F049FF442418 LOCK INC QWORD PTR [R12+24]
64 (defun instrument-alloc (type size node scratch-registers
67 (if (listp scratch-registers
)
68 (dolist (reg scratch-registers
69 (first scratch-registers
))
70 (unless (location= reg r12-tn
) (return reg
)))
72 (declare (ignorable type thread-temp
))
73 ;; Each allocation sequence has to call INSTRUMENT-ALLOC,
74 ;; so we may as well take advantage of this fact to load the temp reg
75 ;; here, if provided, rather than spewing more #+gs-seg tests around.
76 #+gs-seg
(when thread-temp
(inst rdgsbase thread-temp
))
77 (when (member :allocation-size-histogram sb-xc
:*features
*)
78 (let ((use-size-temp (not (typep size
'(or (signed-byte 32) tn
)))))
79 ;; Sum up the sizes of boxed vs unboxed allocations.
80 (cond ((tn-p type
) ; from ALLOCATE-VECTOR-ON-HEAP
81 ;; Constant huge size + unknown type can't occur.
82 (aver (not use-size-temp
))
83 (inst cmp
:byte type simple-vector-widetag
)
85 (inst and
:dword temp
1)
87 (ea thread-segment-reg
88 (ash thread-tot-bytes-alloc-boxed-slot word-shift
)
93 (thread-slot-ea (if (alloc-unboxed-p type
)
94 thread-tot-bytes-alloc-unboxed-slot
95 thread-tot-bytes-alloc-boxed-slot
))
96 (cond (use-size-temp (inst mov temp size
) temp
)
100 ;; optimistically assume it's a small object, so just divide
101 ;; the size by the size of a cons to get a (1-based) index.
102 (inst mov
:dword temp size
)
103 (inst shr
:dword temp
(1+ word-shift
))
104 ;; now see if the computed index is in range
105 (inst cmp size
(* n-histogram-bins-small
16))
107 ;; oversized. Compute the log2 of the size
108 (inst bsr
:dword temp size
)
109 ;; array of counts ... | array of sizes ...
110 (inst add
:qword
(ea (ash (+ thread-allocator-histogram-slot
112 (- first-large-histogram-bin-log2size
)
113 n-histogram-bins-small
114 n-histogram-bins-large
)
118 ;; not sure why this is "2" and not "1" in the fudge factor!!
119 ;; (but the assertions come out right)
120 (inst add
:dword temp
121 (+ (- first-large-histogram-bin-log2size
) n-histogram-bins-small
2))
123 (inst inc
:qword
(ea thread-segment-reg
124 (ash (1- thread-allocator-histogram-slot
) word-shift
)
126 ((<= size
(* sb-vm
:cons-size sb-vm
:n-word-bytes n-histogram-bins-small
))
127 (let ((index (1- (/ size
(* sb-vm
:cons-size sb-vm
:n-word-bytes
)))))
128 (inst inc
:qword
(thread-slot-ea (+ thread-allocator-histogram-slot index
)))))
130 (let ((index (- (integer-length size
) first-large-histogram-bin-log2size
)))
131 (inst add
:qword
(thread-slot-ea (+ thread-allocator-histogram-slot
132 n-histogram-bins-small
133 n-histogram-bins-large index
))
135 (inst inc
:qword
(thread-slot-ea (+ thread-allocator-histogram-slot
136 n-histogram-bins-small index
))))))))
137 (when (policy node
(> sb-c
::instrument-consing
1))
139 (aver (not (location= size temp
))))
140 ;; CAUTION: the logic for RAX-SAVE is entirely untested
141 ;; as it never gets exercised, and can not be, until R12 ceases
142 ;; to have its wired use as the GC card table base register.
143 (binding* (((data rax-save
) (if (location= temp r12-tn
)
146 (patch-loc (gen-label))
147 (skip-instrumentation (gen-label)))
148 ;; Don't count allocations to the arena
149 (inst cmp
:qword
(thread-slot-ea thread-arena-slot
) 0)
150 (inst jmp
:nz skip-instrumentation
)
151 (when rax-save
(inst push rax-tn
))
152 (inst mov data
(thread-slot-ea thread-profile-data-slot thread-temp
))
153 (inst test data data
)
154 ;; This instruction is modified to "JMP :z" when profiling is
155 ;; partially enabled. After the buffer is assigned, it becomes
156 ;; fully enabled. The unconditional jmp gives minimal performance
157 ;; loss if the profiler is statically disabled. (one memory
158 ;; read and a test whose result is never used, which the CPU
159 ;; is good at ignoring as far as instruction prefetch goes)
160 (emit-label patch-loc
)
161 (push patch-loc
(sb-assem::asmstream-alloc-points sb-assem
:*asmstream
*))
162 (inst jmp skip-instrumentation
)
163 (emit-alignment 3 :long-nop
)
165 (if (integerp size
) 'enable-alloc-counter
'enable-sized-alloc-counter
)))
166 ;; This jump is always encoded as 5 bytes
167 (inst call
(if (or (not node
) ; assembly routine
168 (sb-c::code-immobile-p node
))
169 (make-fixup helper
:assembly-routine
)
170 (uniquify-fixup helper
))))
172 ;; Emit "TEST AL, imm" where the immediate value
173 ;; encodes the the data buffer base reg and size reg numbers.
174 (inst byte
#xA8
) ; "TEST AL,imm"
175 (cond ((integerp size
)
176 (inst byte
(tn-offset data
)))
178 (inst byte
(logior (tn-offset data
) (ash (tn-offset size
) 4)))
179 (inst .skip
8 :long-nop
)))
180 (when rax-save
(inst pop rax-tn
))
181 (emit-label skip-instrumentation
))))
183 ;;; An arbitrary marker for the cons primitive-type, not to be confused
184 ;;; with the CONS-TYPE in our type-algebraic sense. Mostly just informs
185 ;;; the allocator to use cons_tlab.
186 (defconstant +cons-primtype
+ list-pointer-lowtag
)
188 (define-vop (sb-c::end-pseudo-atomic
)
189 (:generator
1 (emit-end-pseudo-atomic)))
191 ;;; Emit code to allocate an object with a size in bytes given by
192 ;;; SIZE into ALLOC-TN. The size may be an integer of a TN.
193 ;;; NODE may be used to make policy-based decisions.
194 ;;; This function should only be used inside a pseudo-atomic section,
195 ;;; which to the degree needed should also cover subsequent initialization.
197 ;;; A mnemonic device for the argument pattern here:
198 ;;; 1. what to allocate: type, size, lowtag describe the object
199 ;;; 2. where to put the result
200 ;;; 3. node (for determining immobile-space-p) and a scratch register or two
201 (defun allocation (type size lowtag alloc-tn node temp thread-temp
203 &aux
(systemp (system-tlab-p type node
)))
204 (declare (ignorable thread-temp
))
205 (flet ((fallback (size)
206 ;; Call an allocator trampoline and get the result in the proper register.
207 ;; There are 2 choices of trampoline to invoke alloc() or alloc_list()
208 ;; in C. This is chosen by the name of the asm routine.
209 (cond ((typep size
'(and integer
(not (signed-byte 32))))
210 ;; MOV accepts large immediate operands, PUSH does not
211 (inst mov alloc-tn size
)
212 (inst push alloc-tn
))
218 (if (eql type
+cons-primtype
+) 'sys-list-alloc-tramp
'sys-alloc-tramp
)
219 (if (eql type
+cons-primtype
+) 'list-alloc-tramp
'alloc-tramp
))
221 (inst pop alloc-tn
)))
222 (let* ((NOT-INLINE (gen-label))
224 (free-pointer #+sb-thread
225 (let ((slot (if systemp
226 (if (eql type
+cons-primtype
+)
227 thread-sys-cons-tlab-slot
228 thread-sys-mixed-tlab-slot
)
229 (if (eql type
+cons-primtype
+)
230 thread-cons-tlab-slot
231 thread-mixed-tlab-slot
))))
232 (thread-slot-ea slot
#+gs-seg thread-temp
))
234 (ea (+ static-space-start
235 (if (eql type
+cons-primtype
+)
237 mixed-region-offset
))))
238 (end-addr (ea (sb-x86-64-asm::ea-segment free-pointer
)
239 (+ n-word-bytes
(ea-disp free-pointer
))
240 (ea-base free-pointer
))))
241 (cond ((typep size
`(integer ,large-object-size
))
242 ;; large objects will never be made in a per-thread region
243 (cond (overflow (funcall overflow
))
245 (when (/= lowtag
0) (inst or
:byte alloc-tn lowtag
)))))
246 ((and (tn-p size
) (location= size alloc-tn
))
247 (aver (and temp
(not (location= temp size
))))
248 (inst mov temp free-pointer
)
249 ;; alloc-tn <- old free ptr and temp <- new free ptr
250 (inst xadd temp alloc-tn
)
251 (inst cmp temp end-addr
)
252 (inst jmp
:a NOT-INLINE
)
253 (inst mov free-pointer temp
)
255 (when (/= lowtag
0) (inst or
:byte alloc-tn lowtag
))
256 (assemble (:elsewhere
)
257 (emit-label NOT-INLINE
)
258 (inst sub temp alloc-tn
) ; new-free-ptr - old-free-ptr = size
259 (cond (overflow (funcall overflow
))
263 ;; fixed-size allocation whose size fits in an imm32 can be done
264 ;; with only one register, the ALLOC-TN. If it doesn't fit in imm32,
265 ;; it would get the first branch of the COND, for large objects.
266 (inst mov alloc-tn free-pointer
)
268 (when (tn-p size
) (aver (not (location= size temp
))))
269 (inst lea temp
(ea size alloc-tn
))
270 (inst cmp temp end-addr
)
271 (inst jmp
:a NOT-INLINE
)
272 (inst mov free-pointer temp
)
274 (when (/= lowtag
0) (inst or
:byte alloc-tn lowtag
)))
276 (inst add alloc-tn size
)
277 (inst cmp alloc-tn end-addr
)
278 (inst jmp
:a NOT-INLINE
)
279 (inst mov free-pointer alloc-tn
)
281 (inst sub alloc-tn size
)
283 (when (/= lowtag
0) (inst or
:byte alloc-tn lowtag
)))
285 ;; SUB can compute the result and tagify it.
286 ;; The fallback also has to tagify.
287 (let ((bias (+ (- size
) lowtag
)))
288 (if (= bias -
1) (inst dec alloc-tn
) (inst add alloc-tn bias
)))
289 (emit-label DONE
)))))
290 (assemble (:elsewhere
)
291 (emit-label NOT-INLINE
)
292 (cond (overflow (funcall overflow
))
295 (when (and (/= lowtag
0) (not temp
) (not (tn-p size
)))
296 (inst or
:byte alloc-tn lowtag
))
297 (inst jmp DONE
))))))))
300 ;;; Allocate an other-pointer object of fixed NWORDS with a single-word
301 ;;; header having the specified WIDETAG value. The result is placed in
302 ;;; RESULT-TN. NWORDS counts the header word.
303 (defun alloc-other (widetag nwords result-tn node alloc-temps thread-temp
305 &aux
(bytes (pad-data-block nwords
)))
306 (declare (ignorable thread-temp
))
307 (declare (dynamic-extent init
))
309 (when (= widetag bignum-widetag
) (setq bytes
(* bytes
2))) ; use 2x the space
310 (instrument-alloc widetag bytes node
(cons result-tn
(ensure-list alloc-temps
)) thread-temp
)
311 (let ((header (compute-object-header nwords widetag
))
312 (alloc-temp (if (listp alloc-temps
) (car alloc-temps
) alloc-temps
)))
315 (allocation widetag bytes
0 result-tn node alloc-temp thread-temp
)
316 (storew* header result-tn
0 0 t
)
317 (inst or
:byte result-tn other-pointer-lowtag
))
319 (allocation widetag bytes other-pointer-lowtag result-tn node nil thread-temp
)
320 (storew* header result-tn
0 other-pointer-lowtag t
)))
324 (defun list-ctor-push-elt (x scratch
)
325 (inst push
(if (sc-is x immediate
)
326 (let ((bits (encode-value-if-immediate x
)))
327 (or (plausible-signed-imm32-operand-p bits
)
328 (progn (inst mov scratch bits
) scratch
)))
331 ;;;; CONS, ACONS, LIST and LIST*
333 (defun init-list (prev-constant tn list slot lowtag temp zeroed
)
334 ;; TODO: gencgc does not need EMIT-GC-STORE-BARRIER here,
335 ;; but other other GC strategies might.
336 (let* ((immediate-value)
340 ;; a CONSTANT sc does not imply that we have a compile-time constant-
341 ;; it could be load-time in which case it does not satisfy constant-tn-p.
342 (unless (and (constant-tn-p tn
) (eql prev-constant
(tn-value tn
)))
343 (setf prev-constant
(if (constant-tn-p tn
) (tn-value tn
) temp
))
347 (if (eql prev-constant
(setf immediate-value
(tn-value tn
)))
350 ;; 1. "-if-immediate" is slighty misleading since it _is_ immediate.
351 ;; 2. This unfortunately treats STOREW* as a leaky abstraction
352 ;; because we have to know exactly what it rejects up front
353 ;; rather than asking it whether it can emit a single instruction
354 ;; that will do the trick.
355 (let ((bits (encode-value-if-immediate tn
)))
356 (when (and zeroed
(typep bits
'(unsigned-byte 31)))
357 (storew* bits list slot lowtag zeroed
)
358 (return-from init-list prev-constant
)) ; Return our "in/out" arg
361 (setf prev-constant temp
) ;; a non-eq initial value
366 ;; STOREW returns TEMP if and only if it stored using it.
367 ;; (Perhaps not the clearest idiom.)
368 (when (eq (storew reg list slot lowtag temp
) temp
)
369 (setf prev-constant immediate-value
)))
370 prev-constant
) ; Return our "in/out" arg
372 (macrolet ((pop-arg (ref)
373 `(prog1 (tn-ref-tn ,ref
) (setf ,ref
(tn-ref-across ,ref
))))
374 (store-slot (arg list slot
&optional
(lowtag list-pointer-lowtag
))
375 ;; PREV-CONSTANT is akin to a pass-by-reference arg to the function which
376 ;; used to be all inside this macro.
378 (init-list prev-constant
,arg
,list
,slot
,lowtag temp zeroed
))))
381 (:args
(car :scs
(any-reg descriptor-reg constant immediate control-stack
))
382 (cdr :scs
(any-reg descriptor-reg constant immediate control-stack
)))
383 (:temporary
(:sc unsigned-reg
:to
(:result
0) :target result
) alloc
)
384 (:temporary
(:sc unsigned-reg
:to
(:result
0)
385 :unused-if
(node-stack-allocate-p (sb-c::vop-node vop
)))
387 (:results
(result :scs
(descriptor-reg)))
388 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
393 ((node-stack-allocate-p node
)
394 (inst and rsp-tn
(lognot lowtag-mask
))
395 (cond ((and (sc-is car immediate
) (sc-is cdr immediate
)
396 (typep (encode-value-if-immediate car
) '(signed-byte 8))
397 (typep (encode-value-if-immediate cdr
) '(signed-byte 8)))
398 ;; (CONS 0 0) takes just 4 bytes to encode the PUSHes (for example)
399 (inst push
(encode-value-if-immediate cdr
))
400 (inst push
(encode-value-if-immediate car
)))
401 ((and (sc-is car immediate
) (sc-is cdr immediate
)
402 (eql (encode-value-if-immediate car
) (encode-value-if-immediate cdr
)))
403 (inst mov alloc
(encode-value-if-immediate cdr
))
407 (list-ctor-push-elt cdr alloc
)
408 (list-ctor-push-elt car alloc
)))
409 (inst lea result
(ea list-pointer-lowtag rsp-tn
)))
411 (let ((nbytes (* cons-size n-word-bytes
))
412 (zeroed #+mark-region-gc t
)
413 (prev-constant temp
)) ;; a non-eq initial value
414 (instrument-alloc +cons-primtype
+ nbytes node
(list temp alloc
) thread-tn
)
415 (pseudo-atomic (:thread-tn thread-tn
)
416 (allocation +cons-primtype
+ nbytes
0 alloc node temp thread-tn
)
417 (store-slot car alloc cons-car-slot
0)
418 (store-slot cdr alloc cons-cdr-slot
0)
419 (if (location= alloc result
)
420 (inst or
:byte alloc list-pointer-lowtag
)
421 (inst lea result
(ea list-pointer-lowtag alloc
)))))))))
424 (:args
(key :scs
(any-reg descriptor-reg constant immediate control-stack
))
425 (val :scs
(any-reg descriptor-reg constant immediate control-stack
))
426 (tail :scs
(any-reg descriptor-reg constant immediate control-stack
)))
427 (:temporary
(:sc unsigned-reg
:to
(:result
0)) alloc
)
428 (:temporary
(:sc unsigned-reg
:to
(:result
0) :target result
) temp
)
429 (:results
(result :scs
(descriptor-reg)))
430 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
435 (let ((nbytes (* cons-size
2 n-word-bytes
))
436 (zeroed #+mark-region-gc t
)
437 (prev-constant temp
))
438 (instrument-alloc +cons-primtype
+ nbytes node
(list temp alloc
) thread-tn
)
439 (pseudo-atomic (:thread-tn thread-tn
)
440 (allocation +cons-primtype
+ nbytes
0 alloc node temp thread-tn
)
441 (store-slot tail alloc cons-cdr-slot
0)
442 (inst lea temp
(ea (+ 16 list-pointer-lowtag
) alloc
))
443 (store-slot temp alloc cons-car-slot
0)
444 (setf prev-constant temp
)
445 (let ((pair temp
) (temp alloc
)) ; give STORE-SLOT the ALLOC as its TEMP
446 (store-slot key pair cons-car-slot
)
447 (store-slot val pair cons-cdr-slot
))
448 ;; ALLOC could have been clobbered by using it as a temp for
449 ;; loading a constant.
450 (if (location= temp result
)
451 (inst sub result
16) ; TEMP is ALLOC+16+lowtag, so just subtract 16
452 (inst lea result
(ea (- 16) temp
)))))))
454 ;;; CONS-2 is similar to ACONS, except that instead of producing
455 ;;; ((X . Y) . Z) it produces (X Y . Z)
457 (:args
(car :scs
(any-reg descriptor-reg constant immediate control-stack
))
458 (cadr :scs
(any-reg descriptor-reg constant immediate control-stack
))
459 (cddr :scs
(any-reg descriptor-reg constant immediate control-stack
)))
460 (:temporary
(:sc unsigned-reg
:to
(:result
0) :target result
) alloc
)
461 (:temporary
(:sc unsigned-reg
:to
(:result
0)
462 :unused-if
(node-stack-allocate-p (sb-c::vop-node vop
)))
464 (:results
(result :scs
(descriptor-reg)))
465 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
470 ((node-stack-allocate-p node
)
471 (inst and rsp-tn
(lognot lowtag-mask
))
472 (list-ctor-push-elt cddr alloc
)
473 (list-ctor-push-elt cadr alloc
)
474 (inst lea alloc
(ea list-pointer-lowtag rsp-tn
))
475 (inst push alloc
) ; cdr of the first cons
476 (list-ctor-push-elt car alloc
)
477 (inst lea result
(ea list-pointer-lowtag rsp-tn
)))
479 (let ((nbytes (* cons-size
2 n-word-bytes
))
480 (zeroed #+mark-region-gc t
)
481 (prev-constant temp
))
482 (instrument-alloc +cons-primtype
+ nbytes node
(list temp alloc
) thread-tn
)
483 (pseudo-atomic (:thread-tn thread-tn
)
484 (allocation +cons-primtype
+ nbytes
0 alloc node temp thread-tn
)
485 (store-slot car alloc cons-car-slot
0)
486 (store-slot cadr alloc
(+ 2 cons-car-slot
) 0)
487 (store-slot cddr alloc
(+ 2 cons-cdr-slot
) 0)
488 (inst lea temp
(ea (+ 16 list-pointer-lowtag
) alloc
))
489 (store-slot temp alloc cons-cdr-slot
0)
490 (if (location= alloc result
)
491 (inst or
:byte alloc list-pointer-lowtag
)
492 (inst lea result
(ea list-pointer-lowtag alloc
)))))))))
495 (:args
(things :more t
:scs
(descriptor-reg any-reg constant immediate
)))
496 (:temporary
(:sc unsigned-reg
) ptr temp
)
497 (:temporary
(:sc unsigned-reg
:to
(:result
0) :target result
) res
)
498 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
499 (:info star cons-cells
)
500 (:results
(result :scs
(descriptor-reg)))
503 (aver (>= cons-cells
3)) ; prevent regressions in ir2tran's vop selection
504 (let* ((stack-allocate-p (node-stack-allocate-p node
))
505 (size (* (pad-data-block cons-size
) cons-cells
))
506 (zeroed #+mark-region-gc
(not stack-allocate-p
))
507 (prev-constant temp
))
508 (unless stack-allocate-p
509 (instrument-alloc +cons-primtype
+ size node
(list ptr temp
) thread-tn
))
510 (pseudo-atomic (:elide-if stack-allocate-p
:thread-tn thread-tn
)
512 (stack-allocation size list-pointer-lowtag res
)
513 (allocation +cons-primtype
+ size list-pointer-lowtag res node temp thread-tn
))
515 (dotimes (i (1- cons-cells
))
516 (store-slot (pop-arg things
) ptr cons-car-slot
)
517 (inst add ptr
(pad-data-block cons-size
))
518 (storew ptr ptr
(- cons-cdr-slot cons-size
) list-pointer-lowtag
))
519 (store-slot (pop-arg things
) ptr cons-car-slot list-pointer-lowtag
)
521 (store-slot (pop-arg things
) ptr cons-cdr-slot list-pointer-lowtag
)
522 (storew* nil-value ptr cons-cdr-slot list-pointer-lowtag zeroed
))))
528 (:translate unaligned-dx-cons
)
530 (:results
(result :scs
(descriptor-reg)))
534 (inst push nil-value
)
535 (inst lea result
(ea (- list-pointer-lowtag n-word-bytes
) rsp-tn
))))
537 ;;;; special-purpose inline allocators
539 ;;; Special variant of 'storew' which might have a shorter encoding
540 ;;; when storing to the heap (which starts out zero-filled).
541 ;;; This will always write 8 bytes if WORD is a negative number.
542 (defun storew* (word object slot lowtag zeroed
&optional temp
)
544 ;; FIXME: I this misses some cases that could use a dword store
545 ;; when the heap is prezeroed. Why can't it take (UNSIGNED-BYTE 32) ?
546 ;; For example #x8000FFFF would work as a :DWORD and we leave the upper 4
548 ((or (not zeroed
) (not (typep word
'(unsigned-byte 31))))
549 ;; Will use temp reg if WORD can't be encoded as an imm32
550 (storew word object slot lowtag temp
))
553 (cond ((typep word
'(unsigned-byte 8))
555 ((and (not (logtest word
#xff
))
556 (typep (ash word -
8) '(unsigned-byte 8)))
557 ;; Array lengths 128 to 16384 which are multiples of 128
558 (setq word
(ash word -
8))
559 (decf lowtag
1) ; increment address by 1
561 ((and (not (logtest word
#xffff
))
562 (typep (ash word -
16) '(unsigned-byte 8)))
564 (setq word
(ash word -
16))
565 (decf lowtag
2) ; increment address by 2
567 ((typep word
'(unsigned-byte 16))
569 (t ; must be an (unsigned-byte 31)
571 (inst mov size
(ea (- (* slot n-word-bytes
) lowtag
) object
) word
)))))
574 (defun store-string-trailing-null (vector type length words
)
575 ;; BASE-STRING needs to have a null terminator. The byte is inaccessible
576 ;; to lisp, so clear it now.
577 (cond ((and (sc-is type immediate
)
578 (/= (tn-value type
) sb-vm
:simple-base-string-widetag
))) ; do nothing
579 ((and (sc-is type immediate
)
580 (= (tn-value type
) sb-vm
:simple-base-string-widetag
)
581 (sc-is length immediate
))
582 (inst mov
:byte
(ea (- (+ (ash vector-data-offset word-shift
)
584 other-pointer-lowtag
)
587 ;; Zeroizing the entire final word is easier than using LENGTH now.
588 ((sc-is words immediate
)
589 ;; I am not convinced that this case is reachable -
590 ;; we won't DXify a vector of unknown type.
592 ;; Given N data words, write to word N-1
593 (ea (- (ash (+ (tn-value words
) vector-data-offset -
1)
595 other-pointer-lowtag
)
599 ;; This final case is ok with 0 data words - it might clobber the LENGTH
600 ;; slot, but subsequently we rewrite that slot.
601 ;; But strings always have at least 1 word, so no worries either way.
603 (ea (- (ash (1- vector-data-offset
) word-shift
)
604 other-pointer-lowtag
)
606 words
(ash 1 (- word-shift n-fixnum-tag-bits
)))
609 (macrolet ((calc-size-in-bytes (n-words size-tn
)
610 `(cond ((sc-is ,n-words immediate
)
611 (pad-data-block (+ (tn-value ,n-words
) vector-data-offset
)))
614 (ea (+ lowtag-mask
(* vector-data-offset n-word-bytes
))
615 nil
,n-words
(ash 1 (- word-shift n-fixnum-tag-bits
))))
616 (inst and
,size-tn
(lognot lowtag-mask
))
618 (put-header (vector-tn lowtag type len zeroed temp
)
619 `(let ((len (if (sc-is ,len immediate
) (fixnumize (tn-value ,len
)) ,len
))
620 (type (if (sc-is ,type immediate
) (tn-value ,type
) ,type
)))
621 (storew* type
,vector-tn
0 ,lowtag
,zeroed
,temp
)
622 #+ubsan
(inst mov
:dword
(vector-len-ea ,vector-tn
,lowtag
) len
)
623 #-ubsan
(storew* len
,vector-tn vector-length-slot
624 ,lowtag
,zeroed
,temp
)))
627 (if (sc-is type immediate
)
628 (/= (tn-value type
) simple-vector-widetag
)
630 (calc-shadow-bits-size (reg)
631 `(cond ((sc-is length immediate
)
632 ;; Calculate number of dualwords (as 128 bits per dualword)
633 ;; and multiply by 16 to get number of bytes. Also add the 2 header words.
634 (* 16 (+ 2 (ceiling (tn-value length
) 128))))
636 ;; Compute (CEILING length 128) by adding 127, then truncating divide
637 ;; by 128, and untag as part of the divide step.
638 ;; Account for the two fixed words by adding in 128 more bits initially.
639 (inst lea
:dword
,reg
(ea (fixnumize (+ 128 127)) length
))
640 (inst shr
:dword
,reg
8) ; divide by 128 and untag as one operation
641 (inst shl
:dword
,reg
4) ; multiply by 16 bytes per dualword
643 (store-originating-pc (vector)
644 ;; Put the current program-counter into the length slot of the shadow bits
645 ;; so that we can ascribe blame to the array's creator.
646 `(let ((here (gen-label)))
648 (inst lea temp
(rip-relative-ea here
))
650 (inst mov
(ea (- 8 other-pointer-lowtag
) ,vector
) temp
))))
652 (define-vop (allocate-vector-on-heap)
653 #+ubsan
(:info poisoned
)
654 (:args
(type :scs
(unsigned-reg immediate
))
655 (length :scs
(any-reg immediate
))
656 (words :scs
(any-reg immediate
)))
657 ;; Result is live from the beginning, like a temp, because we use it as such
658 ;; in 'calc-size-in-bytes'
659 (:results
(result :scs
(descriptor-reg) :from
:load
))
660 (:arg-types
#+ubsan
(:constant t
)
661 positive-fixnum positive-fixnum positive-fixnum
)
662 (:temporary
(:sc unsigned-reg
) temp
)
663 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
668 (when (want-shadow-bits)
669 ;; allocate a vector of "written" bits unless the vector is simple-vector-T,
670 ;; which can use unbound-marker as a poison value on reads.
671 (when (sc-is type unsigned-reg
)
672 (inst cmp
:byte type simple-vector-widetag
)
674 (inst jmp
:e NO-SHADOW-BITS
))
675 ;; It would be possible to do this and the array proper
676 ;; in a single pseudo-atomic section, but I don't care to do that.
677 (let ((nbytes (calc-shadow-bits-size result
)))
679 ;; Allocate the bits into RESULT
680 (allocation simple-bit-vector-widetag nbytes
0 result node temp nil
)
681 (inst mov
:byte
(ea result
) simple-bit-vector-widetag
)
682 (inst mov
:dword
(vector-len-ea result
0)
683 (if (sc-is length immediate
) (fixnumize (tn-value length
)) length
))
684 (inst or
:byte result other-pointer-lowtag
)))
685 (store-originating-pc result
)
686 (inst push result
)) ; save the pointer to the shadow bits
688 ;; The LET generates instructions that needn't be pseudoatomic
689 ;; so don't move it inside.
690 ;; There are 3 possibilities for correctness of INSTRUMENT-ALLOC:
691 ;; * If WORDS is not immediate, and ALLOC-TEMP is R12, then compute size
692 ;; into ALLOC-TEMP, use RESULT as the instrumentation temp.
693 ;; ALLOCATION receives: input = ALLOC-TEMP, output = RESULT, and no other temp
694 ;; * If WORDS is not immediate and ALLOC-TEMP is not R12, then compute size
695 ;; into RESULT, use ALLOC-TEMP as the instrumentation temp.
696 ;; ALLOCATION receives: input = RESULT, output = RESULT, temp = ALLOC-TEMP.
697 (multiple-value-bind (size-tn instrumentation-temp alloc-temp
)
698 (cond ((sc-is words immediate
)
699 ;; If WORDS is immediate, then let INSTRUMENT-ALLOC choose its temp
700 (values (calc-size-in-bytes words nil
) (list result temp
) temp
))
701 ((location= temp r12-tn
)
702 ;; Compute the size into TEMP, use RESULT for instrumentation.
703 ;; Don't give another temp to ALLOCATION, because its SIZE and temp
704 ;; can not be in the same register (which it AVERs).
705 (values (calc-size-in-bytes words temp
) result nil
))
707 ;; Compute the size into RESULT, use TEMP for instrumentation.
708 ;; ALLOCATION needs the temp register in this case,
709 ;; because input and output are in the same register.
710 (values (calc-size-in-bytes words result
) temp temp
)))
711 (instrument-alloc (if (sc-is type immediate
)
712 (case (tn-value type
)
713 (#.simple-vector-widetag
'simple-vector
)
716 size-tn node instrumentation-temp thread-tn
)
717 (pseudo-atomic (:thread-tn thread-tn
)
718 (allocation type size-tn
0 result node alloc-temp thread-tn
)
719 (put-header result
0 type length t alloc-temp
)
720 (inst or
:byte result other-pointer-lowtag
)))
722 (cond ((want-shadow-bits)
723 (inst pop temp-reg-tn
) ; restore shadow bits
724 (inst mov
(object-slot-ea result
1 other-pointer-lowtag
) temp-reg-tn
))
725 (poisoned ; uninitialized SIMPLE-VECTOR
726 (store-originating-pc result
)))))
728 (define-vop (allocate-vector-on-stack)
729 #+ubsan
(:info poisoned
)
730 (:args
(type :scs
(unsigned-reg immediate
))
731 (length :scs
(any-reg (immediate
732 (typep (fixnumize (tn-value tn
))
733 '(signed-byte 32)))))
734 (words :scs
(any-reg (immediate
735 (typep (pad-data-block (+ (tn-value tn
) vector-data-offset
))
737 (:results
(result :scs
(descriptor-reg) :from
:load
))
740 (:arg-types
#+ubsan
(:constant t
)
741 positive-fixnum positive-fixnum positive-fixnum
)
742 #+ubsan
(:temporary
(:sc any-reg
:offset rax-offset
) rax
)
743 #+ubsan
(:temporary
(:sc any-reg
:offset rcx-offset
) rcx
)
744 #+ubsan
(:temporary
(:sc any-reg
:offset rdi-offset
) rdi
)
748 (when (want-shadow-bits)
749 ;; allocate a vector of "written" bits unless the vector is simple-vector-T,
750 ;; which can use unbound-marker as a poison value on reads.
751 (when (sc-is type unsigned-reg
) (bug "vector-on-stack: unknown type"))
753 (let ((nbytes (calc-shadow-bits-size rcx
)))
754 (stack-allocation nbytes
0 rdi
)
755 (when (sc-is length immediate
) (inst mov rcx nbytes
)))
757 (inst stos
:byte
) ; RAX was zeroed
758 (inst lea rax
(ea other-pointer-lowtag rsp-tn
))
759 (inst mov
:dword
(ea (- other-pointer-lowtag
) rax
) simple-bit-vector-widetag
)
760 (inst mov
:dword
(vector-len-ea rax
)
761 (if (sc-is length immediate
) (fixnumize (tn-value length
)) length
))
762 (store-originating-pc rax
))
763 (let ((size (calc-size-in-bytes words result
)))
764 (when (sb-c::make-vector-check-overflow-p node
)
765 (let ((overflow (generate-error-code vop
766 'stack-allocated-object-overflows-stack-error
769 (make-sc+offset immediate-sc-number size
)))))
770 (inst sub rsp-tn size
)
771 (inst cmp
:qword rsp-tn
(thread-slot-ea thread-control-stack-start-slot
))
772 ;; avoid clearing condition codes
773 (inst lea rsp-tn
(if (integerp size
)
776 (inst jmp
:be overflow
)))
777 ;; Compute tagged pointer sooner than later since access off RSP
778 ;; requires an extra byte in the encoding anyway.
779 (stack-allocation size other-pointer-lowtag result
780 ;; If already aligned RSP, don't need to do it again.
781 #+ubsan
(want-shadow-bits))
782 ;; NB: store the trailing null BEFORE storing the header,
783 ;; in case the length in words is 0, which stores into the LENGTH slot
784 ;; as if it were element -1 of data (which probably can't happen).
785 (store-string-trailing-null result type length words
)
786 (put-header result other-pointer-lowtag type length nil nil
)
789 (cond ((want-shadow-bits)
790 (inst mov
(ea (- (ash vector-length-slot word-shift
) other-pointer-lowtag
)
793 (poisoned ; uninitialized SIMPLE-VECTOR
794 (store-originating-pc result
)))))
796 #+linux
; unimplemented for others
797 (define-vop (allocate-vector-on-stack+msan-unpoison
)
798 #+ubsan
(:info poisoned
)
799 #+ubsan
(:ignore poisoned
)
800 (:args
(type :scs
(unsigned-reg immediate
))
801 (length :scs
(any-reg immediate
))
802 (words :scs
(any-reg immediate
)))
803 (:results
(result :scs
(descriptor-reg) :from
:load
))
804 (:arg-types
#+ubsan
(:constant t
)
805 positive-fixnum positive-fixnum positive-fixnum
)
806 ;; This is a separate vop because it needs more temps.
807 (:temporary
(:sc any-reg
:offset rcx-offset
) rcx
)
808 (:temporary
(:sc any-reg
:offset rax-offset
) rax
)
809 (:temporary
(:sc any-reg
:offset rdi-offset
) rdi
)
812 (let ((size (calc-size-in-bytes words result
)))
813 ;; Compute tagged pointer sooner than later since access off RSP
814 ;; requires an extra byte in the encoding anyway.
815 (stack-allocation size other-pointer-lowtag result
)
816 (store-string-trailing-null result type length words
)
817 ;; FIXME: It would be good to check for stack overflow here.
818 (put-header result other-pointer-lowtag type length nil nil
)
819 (cond ((sc-is words immediate
)
820 (inst mov rcx
(+ (tn-value words
) vector-data-offset
)))
822 (inst lea rcx
(ea (ash vector-data-offset n-fixnum-tag-bits
) words
))
823 (inst shr rcx n-fixnum-tag-bits
)))
824 (inst mov rdi msan-mem-to-shadow-xor-const
)
825 (inst xor rdi rsp-tn
) ; compute shadow address
828 (inst stos
:qword
)))))
831 (macrolet ((calc-size-in-bytes (length answer
)
832 `(cond ((sc-is ,length immediate
)
833 (aver (/= (tn-value ,length
) 0))
834 (* (tn-value ,length
) n-word-bytes
2))
836 (inst mov result nil-value
)
837 (inst test
,length
,length
)
841 (ash 1 (1+ (- word-shift n-fixnum-tag-bits
)))))
844 `(let ((size (cond ((typep size
'(or (signed-byte 32) tn
))
847 (inst mov limit size
)
850 (ea (if (fixnump size
) size
0) result
851 (if (fixnump size
) nil size
))))))
853 (define-vop (allocate-list-on-stack)
854 (:args
(length :scs
(any-reg immediate
))
855 (element :scs
(any-reg descriptor-reg
)))
856 (:results
(result :scs
(descriptor-reg) :from
:load
))
857 (:arg-types positive-fixnum
*)
861 (:temporary
(:sc descriptor-reg
) tail next limit
)
863 (let ((size (calc-size-in-bytes length next
))
865 (when (sb-c::make-list-check-overflow-p node
)
866 (let ((overflow (generate-error-code vop
'stack-allocated-object-overflows-stack-error size
)))
867 (inst sub rsp-tn size
)
868 (inst cmp
:qword rsp-tn
(thread-slot-ea thread-control-stack-start-slot
))
869 ;; avoid clearing condition codes
870 (inst lea rsp-tn
(ea rsp-tn size
))
871 (inst jmp
:be overflow
)))
872 (stack-allocation size list-pointer-lowtag result
)
874 (inst mov next result
)
877 (inst add next
(* 2 n-word-bytes
))
878 (storew element tail cons-car-slot list-pointer-lowtag
)
879 ;; Store the CDR even if it will be smashed to nil.
880 (storew next tail cons-cdr-slot list-pointer-lowtag
)
881 (inst cmp next limit
)
883 (storew nil-value tail cons-cdr-slot list-pointer-lowtag
))
886 (define-vop (allocate-list-on-heap)
887 (:args
(length :scs
(any-reg immediate
))
888 ;; Too bad we don't have an SC that implies actually a CPU immediate
889 ;; i.e. fits in an imm32 operand
890 (element :scs
(any-reg descriptor-reg
)))
891 (:results
(result :scs
(descriptor-reg) :from
:load
))
892 (:arg-types positive-fixnum
*)
895 (:temporary
(:sc descriptor-reg
) tail next limit
)
896 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
898 (let ((size (calc-size-in-bytes length tail
))
901 (leave-pa (gen-label)))
902 (instrument-alloc +cons-primtype
+ size node
(list next limit
) thread-tn
)
903 (pseudo-atomic (:thread-tn thread-tn
)
904 (allocation +cons-primtype
+ size list-pointer-lowtag result node limit thread-tn
907 ;; Push C call args right-to-left
908 (inst push
(if (integerp size
) (constantize size
) size
))
909 (inst push
(if (sc-is element immediate
) (tn-value element
) element
))
911 'call
(if (system-tlab-p 0 node
) 'sys-make-list
'make-list
) node
)
913 (inst jmp leave-pa
)))
915 (inst mov next result
)
918 (storew next tail cons-cdr-slot list-pointer-lowtag
)
921 (inst add next
(* 2 n-word-bytes
))
922 (storew element tail cons-car-slot list-pointer-lowtag
)
923 (inst cmp next limit
)
925 ;; still pseudo-atomic
926 (storew nil-value tail cons-cdr-slot list-pointer-lowtag
)
927 (emit-label leave-pa
)))
928 done
))) ; label needed by calc-size-in-bytes
931 (define-vop (make-fdefn)
933 (:translate make-fdefn
)
934 (:args
(name :scs
(descriptor-reg) :to
:eval
))
935 (:results
(result :scs
(descriptor-reg) :from
:argument
))
936 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
939 (alloc-other fdefn-widetag fdefn-size result node nil thread-tn
941 (storew name result fdefn-name-slot other-pointer-lowtag
)
942 (storew nil-value result fdefn-fun-slot other-pointer-lowtag
)
943 (storew (make-fixup 'undefined-tramp
:assembly-routine
)
944 result fdefn-raw-addr-slot other-pointer-lowtag
)))))
946 (define-vop (make-closure)
947 (:info label length stack-allocate-p
)
948 (:temporary
(:sc any-reg
) temp
)
949 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
950 (:results
(result :scs
(descriptor-reg)))
954 (let* ((words (+ length closure-info-offset
)) ; including header
955 (bytes (pad-data-block words
))
956 (header (logior (ash (1- words
) n-widetag-bits
) closure-widetag
))
957 (remain-pseudo-atomic
958 (eq (car (last (vop-codegen-info vop
))) :pseudo-atomic
)))
959 (unless stack-allocate-p
960 (instrument-alloc closure-widetag bytes node
(list result temp
) thread-tn
))
961 (pseudo-atomic (:default-exit
(not remain-pseudo-atomic
)
962 :elide-if stack-allocate-p
:thread-tn thread-tn
)
964 (stack-allocation bytes fun-pointer-lowtag result
)
965 (allocation closure-widetag bytes fun-pointer-lowtag result node temp thread-tn
))
966 (storew* #-compact-instance-header header
; write the widetag and size
967 #+compact-instance-header
; ... plus the layout pointer
968 (let ((layout #-sb-thread
(static-symbol-value-ea 'function-layout
)
969 #+sb-thread
(thread-slot-ea thread-function-layout-slot
)))
970 (cond ((typep header
'(unsigned-byte 16))
971 (inst mov temp layout
)
972 ;; emit a 2-byte constant, the low 4 of TEMP were zeroed
973 (inst mov
:word temp header
))
975 (inst mov temp header
)
976 (inst or temp layout
)))
978 result
0 fun-pointer-lowtag
(not stack-allocate-p
))
979 (inst lea
(pc-size vop
)
980 temp
(rip-relative-ea label
(ash simple-fun-insts-offset word-shift
)))
981 (storew temp result closure-fun-slot fun-pointer-lowtag
)))))
983 (define-vop (reference-closure)
985 (:results
(result :scs
(descriptor-reg)))
988 (inst lea
(pc-size vop
)
989 result
(rip-relative-ea label fun-pointer-lowtag
))))
991 ;;; The compiler likes to be able to directly make value cells.
992 (define-vop (make-value-cell)
993 (:args
(value :scs
(descriptor-reg any-reg immediate constant
) :to
:result
))
994 (:results
(result :scs
(descriptor-reg) :from
:eval
))
995 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
996 (:info stack-allocate-p
)
999 (let ((data (if (sc-is value immediate
)
1000 (let ((bits (encode-value-if-immediate value
)))
1003 bits
)) ; could be a fixup
1005 (cond (stack-allocate-p
1006 ;; No regression test got here. Therefore I think there's no such thing as a
1007 ;; dynamic-extent value cell. It makes sense that there isn't: DX closures
1008 ;; would just reference their frame, wouldn't they?
1009 (inst and rsp-tn
(lognot lowtag-mask
)) ; align
1011 (inst push
(compute-object-header value-cell-size value-cell-widetag
))
1012 (inst lea result
(ea other-pointer-lowtag rsp-tn
)))
1014 (alloc-other value-cell-widetag value-cell-size result node nil thread-tn
1018 (unless (integerp data
) (inst push data
) t
))
1020 (inst push value
) t
)
1022 (inst pop
(object-slot-ea result value-cell-value-slot other-pointer-lowtag
))
1023 (storew data result value-cell-value-slot other-pointer-lowtag
)))))))))
1025 ;;;; automatic allocators for primitive objects
1028 ((alloc (vop name words type lowtag stack-allocate-p result
1029 &optional alloc-temp node
1030 &aux
(bytes (pad-data-block words
))
1031 (remain-pseudo-atomic
1032 (eq (car (last (vop-codegen-info vop
))) :pseudo-atomic
)))
1034 (when (eq type bignum-widetag
) (setq bytes
(* bytes
2))) ; use 2x the space
1035 (progn name
) ; possibly not used
1036 (unless stack-allocate-p
1037 (instrument-alloc type bytes node
(list result alloc-temp
) thread-tn
))
1038 (pseudo-atomic (:default-exit
(not remain-pseudo-atomic
)
1039 :elide-if stack-allocate-p
:thread-tn thread-tn
)
1040 ;; If storing a header word, defer ORing in the lowtag until after
1041 ;; the header is written so that displacement can be 0.
1042 (cond (stack-allocate-p
1043 (stack-allocation bytes
(if type
0 lowtag
) result
))
1044 ((eql type funcallable-instance-widetag
)
1046 (invoke-asm-routine 'call
'alloc-funinstance vop
)
1049 (allocation type bytes
(if type
0 lowtag
) result node alloc-temp thread-tn
)))
1050 (let ((header (compute-object-header words type
)))
1051 (cond #+compact-instance-header
1052 ((and (eq name
'%make-structure-instance
) stack-allocate-p
)
1053 ;; Write a :DWORD, not a :QWORD, because the high half will be
1054 ;; filled in when the layout is stored. Can't use STOREW* though,
1055 ;; because it tries to store as few bytes as possible,
1056 ;; where this instruction must write exactly 4 bytes.
1057 (inst mov
:dword
(ea 0 result
) header
))
1059 (storew* header result
0 0 (not stack-allocate-p
)))))
1060 ;; GC can make the best choice about placement if it has a layout.
1061 ;; Of course with conservative GC the object will be pinned anyway,
1062 ;; but still, always having a layout is a good thing.
1063 (when (typep type
'layout
) ; store its layout, while still in pseudo-atomic
1064 (inst mov
:dword
(ea 4 result
) (make-fixup type
:layout
)))
1065 (inst or
:byte result lowtag
))))
1066 ;; DX is strictly redundant in these 2 vops, but they're written this way
1067 ;; so that backends can choose to use a single vop for both.
1068 (define-vop (fixed-alloc)
1069 (:info name words type lowtag dx
)
1070 (:results
(result :scs
(descriptor-reg)))
1071 (:temporary
(:sc unsigned-reg
) alloc-temp
)
1072 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
1075 (:generator
50 (alloc vop name words type lowtag dx result alloc-temp node
)))
1076 (define-vop (sb-c::fixed-alloc-to-stack
)
1077 (:info name words type lowtag dx
)
1078 (:results
(result :scs
(descriptor-reg)))
1080 (:generator
50 (alloc vop name words type lowtag dx result
))))
1082 ;;; Allocate a non-vector variable-length object.
1083 ;;; Exactly 4 allocators are rendered via this vop:
1084 ;;; BIGNUM (%ALLOCATE-BIGNUM)
1085 ;;; FUNCALLABLE-INSTANCE (%MAKE-FUNCALLABLE-INSTANCE)
1086 ;;; CLOSURE (%ALLOC-CLOSURE)
1087 ;;; INSTANCE (%MAKE-INSTANCE,%MAKE-INSTANCE/MIXED)
1088 ;;; WORDS accounts for the mandatory slots *including* the header.
1089 ;;; EXTRA is the variable payload, also measured in words.
1090 (define-vop (var-alloc)
1091 (:args
(extra :scs
(any-reg)))
1092 (:arg-types positive-fixnum
)
1093 (:info name words type lowtag stack-allocate-p
)
1094 (:results
(result :scs
(descriptor-reg) :from
(:eval
1)))
1095 (:temporary
(:sc unsigned-reg
:from
:eval
:to
(:eval
1)) bytes
)
1096 (:temporary
(:sc unsigned-reg
:from
:eval
:to
:result
) header
)
1097 ;; KLUDGE: wire to RAX so that it doesn't get R12
1098 (:temporary
(:sc unsigned-reg
:offset
0) alloc-temp
)
1099 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
1103 (when (eq name
'%make-funcallable-instance
)
1104 ;; %MAKE-FUNCALLABLE-INSTANCE needs to allocate to pages of code,
1105 ;; which it failed to do if the var-alloc translation was invoked.
1106 ;; But it seems we never need this! (so is it FIXME or isn't it?)
1107 (error "can't %MAKE-FUNCALLABLE-INSTANCE of unknown length"))
1108 (let ((remain-pseudo-atomic (eq (car (last (vop-codegen-info vop
))) :pseudo-atomic
)))
1109 ;; With the exception of bignums, these objects have effectively
1110 ;; 32-bit headers because the high 4 byes contain a layout pointer.
1111 (let ((operand-size (if (= type bignum-widetag
) :qword
:dword
)))
1112 (inst lea operand-size bytes
1113 (ea (* (1+ words
) n-word-bytes
) nil
1114 extra
(ash 1 (- word-shift n-fixnum-tag-bits
))))
1115 (inst mov operand-size header bytes
)
1116 (inst shl operand-size header
(- (length-field-shift type
) word-shift
)) ; w+1 to length field
1117 (inst lea operand-size header
; (w-1 << 8) | type
1118 (ea (+ (ash -
2 (length-field-shift type
)) type
) header
))
1119 (inst and operand-size bytes
(lognot lowtag-mask
)))
1121 (when (= type bignum-widetag
) (inst shl
:dword bytes
1)) ; use 2x the space
1122 (cond (stack-allocate-p
1123 (stack-allocation bytes lowtag result
)
1124 (storew header result
0 lowtag
))
1126 ;; can't pass RESULT as a possible choice of scratch register
1127 ;; because it might be in the same physical reg as BYTES.
1128 ;; Yup, the lifetime specs in this vop are pretty confusing.
1129 (instrument-alloc type bytes node alloc-temp thread-tn
)
1130 (pseudo-atomic (:default-exit
(not remain-pseudo-atomic
)
1131 :thread-tn thread-tn
)
1132 (allocation type bytes lowtag result node alloc-temp thread-tn
)
1133 (storew header result
0 lowtag
)))))))
1136 (define-vop (alloc-code)
1137 (:args
(total-words :scs
(unsigned-reg) :target c-arg-1
)
1138 (boxed-words :scs
(unsigned-reg) :target c-arg-2
))
1139 (:temporary
(:sc unsigned-reg
1140 :offset
#.
(first *c-call-register-arg-offsets
*)
1141 :from
(:argument
0) :to
:result
) c-arg-1
)
1142 (:temporary
(:sc unsigned-reg
:offset rsi-offset
1143 :offset
#.
(second *c-call-register-arg-offsets
*)
1144 :from
(:argument
1) :to
:result
) c-arg-2
)
1145 (:temporary
(:sc unsigned-reg
:offset r15-offset
) dummy
)
1146 (:results
(res :scs
(descriptor-reg)))
1149 (move c-arg-1 total-words
)
1150 (move c-arg-2 boxed-words
)
1151 (with-registers-preserved (c :except
#-win32 rdi
#+win32 rcx
:frame-reg r15
)
1152 (pseudo-atomic () (call-c "alloc_code_object"))
1153 (move c-arg-1 rax-tn
))
1154 (move res c-arg-1
)))
1157 (macrolet ((c-fun (name)
1158 `(let ((c-fun (make-fixup ,name
:foreign
)))
1159 (cond ((sb-c::code-immobile-p node
) c-fun
)
1160 (t (progn (inst mov rax c-fun
) rax
))))))
1161 (define-vop (!alloc-immobile-fixedobj
)
1162 (:args
(size-class :scs
(any-reg) :target c-arg1
)
1163 (nwords :scs
(any-reg) :target c-arg2
)
1164 (header :scs
(any-reg) :target c-arg3
))
1165 (:temporary
(:sc unsigned-reg
:from
(:argument
0) :to
:eval
1166 :offset
#.
(first *c-call-register-arg-offsets
*)) c-arg1
)
1167 (:temporary
(:sc unsigned-reg
:from
(:argument
1) :to
:eval
1168 :offset
#.
(second *c-call-register-arg-offsets
*)) c-arg2
)
1169 (:temporary
(:sc unsigned-reg
:from
(:argument
2) :to
:eval
1170 :offset
#.
(third *c-call-register-arg-offsets
*)) c-arg3
)
1171 (:temporary
(:sc unsigned-reg
:from
:eval
:to
(:result
0) :offset rax-offset
) rax
)
1172 (:results
(result :scs
(descriptor-reg)))
1175 (inst mov c-arg1 size-class
)
1176 (inst mov c-arg2 nwords
)
1177 (inst mov c-arg3 header
)
1178 ;; RSP needn't be restored because the allocators all return immediately
1179 ;; which has that effect
1180 (inst and rsp-tn -
16)
1182 (call-c (c-fun "alloc_immobile_fixedobj"))
1183 (move result rax
))))
1186 ;;; Dynamic-space allocation:
1187 ;;; * (defun f (n) (dotimes (i n) (make-symbol "b")))
1188 ;;; * (time (f 500000))
1189 ;;; Evaluation took: 0.004 seconds of real time
1191 ;;; * (defun f (n) (dotimes (i n) (sb-vm::%alloc-immobile-symbol "b")))
1192 ;;; Evaluation took: 0.043 seconds of real time
1193 ;;; With vop: 0.028 seconds of real time
1195 (eval-when (:compile-toplevel
)
1196 (aver (evenp symbol-size
))) ; assumptions in the code below
1198 (define-vop (!fast-alloc-immobile-symbol
)
1199 (:results
(result :scs
(descriptor-reg)))
1200 (:temporary
(:sc unsigned-reg
:offset rax-offset
) rax
)
1201 (:temporary
(:sc unsigned-reg
:offset rbx-offset
) rbx
)
1202 (:temporary
(:sc unsigned-reg
:offset rcx-offset
) rcx
)
1203 (:temporary
(:sc unsigned-reg
) header
)
1205 ;; fixedobj_pages linkage entry: 1 PTE per page, 12-byte struct
1206 (inst mov rbx
(rip-relative-ea (make-fixup "fixedobj_pages" :foreign-dataref
)))
1207 ;; fixedobj_page_hint: 1 hint per sizeclass. C type = uint32_t
1208 (inst mov rax
(rip-relative-ea (make-fixup "fixedobj_page_hint" :foreign-dataref
)))
1209 (inst mov rbx
(ea rbx
)) ; get the base of the fixedobj_pages array
1210 ;; This has to be pseudoatomic as soon as the page hint is loaded.
1211 ;; Consider the situation where there is exactly one symbol on a page that
1212 ;; is almost all free, and the symbol page hint points to that page.
1213 ;; If GC occurs, it might dispose that symbol, resetting the page attributes
1214 ;; and page hint. It would be an error to allocate to that page
1215 ;; because it is no longer a page of symbols but rather a free page.
1216 ;; There is no way to inform GC that we are currently looking at a page
1217 ;; in anticipation of allocating to it.
1219 (inst mov
:dword rax
(ea 4 rax
)) ; rax := fixedobj_page_hint[1] (sizeclass=SYMBOL)
1220 (inst test
:dword rax rax
)
1221 (inst jmp
:z FAIL
) ; fail if hint page is 0
1222 (inst lea rbx
(ea rbx rax
8)) ; rbx := &fixedobj_pages[hint].free_index
1223 ;; compute fixedobj_page_address(hint) into RAX
1224 (inst mov rcx
(rip-relative-ea (make-fixup "FIXEDOBJ_SPACE_START" :foreign-dataref
)))
1225 (inst shl rax
(integer-length (1- immobile-card-bytes
)))
1226 (inst add rax
(ea rcx
))
1227 ;; load the page's free pointer
1228 (inst mov
:dword rcx
(ea rbx
)) ; rcx := fixedobj_pages[hint].free_index
1229 ;; fail if allocation would overrun the page
1230 (inst cmp
:dword rcx
(- immobile-card-bytes
(* symbol-size n-word-bytes
)))
1232 ;; compute address of the allegedly free memory block into RESULT
1233 (inst lea result
(ea rcx rax
)) ; free_index + page_base
1234 ;; read the potential symbol header
1235 (inst mov rax
(ea result
))
1236 (inst test
:dword rax
1)
1237 (inst jmp
:nz FAIL
) ; not a fixnum implies already taken
1238 ;; try to claim this word of memory
1239 (inst mov header
(logior (ash (1- symbol-size
) n-widetag-bits
) symbol-widetag
))
1240 (inst cmpxchg
:lock
(ea result
) header
)
1241 (inst jmp
:ne FAIL
) ; already taken
1242 ;; compute new free_index = spacing + old header + free_index
1243 (inst lea
:dword rax
(ea (* symbol-size n-word-bytes
) rax rcx
))
1244 (inst mov
:dword
(ea rbx
) rax
) ; store new free_index
1245 ;; set the low bit of the 'gens' field
1246 (inst or
:lock
:byte
(ea 7 rbx
) 1) ; 7+rbx = &fixedobj_pages[i].attr.parts.gens_
1247 (inst or
:byte result other-pointer-lowtag
) ; make_lispobj()
1250 (inst mov result nil-value
)