x86-64/allocate-vector-on-stack: don't accept large immediates.
[sbcl.git] / src / compiler / x86-64 / alloc.lisp
blobd1e915cd24207a4e471b6b72cd9032097bfd6471
1 ;;;; allocation VOPs for the x86-64
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-VM")
14 ;;;; allocation helpers
16 ;;; Most allocation is done by inline code with sometimes help
17 ;;; from the C alloc() function by way of the alloc-tramp
18 ;;; assembly routine.
20 (defun tagify (result base lowtag)
21 (if (eql lowtag 0)
22 (inst mov result base)
23 (inst lea result (ea lowtag base))))
25 (defun stack-allocation (size lowtag alloc-tn &optional known-alignedp)
26 (aver (not (location= alloc-tn rsp-tn)))
27 (inst sub rsp-tn size)
28 ;; see comment in x86/macros.lisp implementation of this
29 ;; However that comment seems inapplicable here because:
30 ;; - PAD-DATA-BLOCK quite clearly enforces double-word alignment,
31 ;; contradicting "... unfortunately not enforced by ..."
32 ;; - It's not the job of FIXED-ALLOC to realign anything.
33 ;; - The real issue is that it's not obvious that the stack is
34 ;; 16-byte-aligned at *all* times. Maybe it is, maybe it isn't.
35 (unless known-alignedp ; can skip this AND if we're all good
36 (inst and rsp-tn #.(lognot lowtag-mask)))
37 (tagify alloc-tn rsp-tn lowtag)
38 (values))
40 (defun alloc-unboxed-p (type)
41 (case type
42 ((unboxed-array
43 #.bignum-widetag
44 #.sap-widetag
45 #.double-float-widetag
46 #.complex-single-float-widetag
47 #.complex-double-float-widetag)
48 t)))
50 ;;; Insert allocation profiler instrumentation
51 (eval-when (:compile-toplevel)
52 (aver (= thread-tot-bytes-alloc-unboxed-slot
53 (1+ thread-tot-bytes-alloc-boxed-slot))))
55 ;;; Emit counter increments for SB-APROF. SCRATCH-REGISTERS is either a TN
56 ;;; or list of TNs that can be used to store into the profiling data.
57 ;;; We pick one of the available TNs to use for addressing the data buffer.
58 ;;; The TN that we pick can't be R12 because encoding it into an instruction
59 ;;; always requires a SIB byte, which doesn't fit in the reserved bytes
60 ;;; of the instruction stream where hot patching occurs.
61 ;;; Compare:
62 ;;; F048FF4018 LOCK INC QWORD PTR [RAX+24]
63 ;;; F049FF442418 LOCK INC QWORD PTR [R12+24]
64 (defun instrument-alloc (type size node scratch-registers
65 &optional thread-temp
66 &aux (temp
67 (if (listp scratch-registers)
68 (dolist (reg scratch-registers
69 (first scratch-registers))
70 (unless (location= reg r12-tn) (return reg)))
71 scratch-registers)))
72 (declare (ignorable type thread-temp))
73 ;; Each allocation sequence has to call INSTRUMENT-ALLOC,
74 ;; so we may as well take advantage of this fact to load the temp reg
75 ;; here, if provided, rather than spewing more #+gs-seg tests around.
76 #+gs-seg (when thread-temp (inst rdgsbase thread-temp))
77 (when (member :allocation-size-histogram sb-xc:*features*)
78 (let ((use-size-temp (not (typep size '(or (signed-byte 32) tn)))))
79 ;; Sum up the sizes of boxed vs unboxed allocations.
80 (cond ((tn-p type) ; from ALLOCATE-VECTOR-ON-HEAP
81 ;; Constant huge size + unknown type can't occur.
82 (aver (not use-size-temp))
83 (inst cmp :byte type simple-vector-widetag)
84 (inst set :ne temp)
85 (inst and :dword temp 1)
86 (inst add :qword
87 (ea thread-segment-reg
88 (ash thread-tot-bytes-alloc-boxed-slot word-shift)
89 thread-tn temp 8)
90 size))
92 (inst add :qword
93 (thread-slot-ea (if (alloc-unboxed-p type)
94 thread-tot-bytes-alloc-unboxed-slot
95 thread-tot-bytes-alloc-boxed-slot))
96 (cond (use-size-temp (inst mov temp size) temp)
97 (t size)))))
98 (cond ((tn-p size)
99 (assemble ()
100 ;; optimistically assume it's a small object, so just divide
101 ;; the size by the size of a cons to get a (1-based) index.
102 (inst mov :dword temp size)
103 (inst shr :dword temp (1+ word-shift))
104 ;; now see if the computed index is in range
105 (inst cmp size (* n-histogram-bins-small 16))
106 (inst jmp :le OK)
107 ;; oversized. Compute the log2 of the size
108 (inst bsr :dword temp size)
109 ;; array of counts ... | array of sizes ...
110 (inst add :qword (ea (ash (+ thread-allocator-histogram-slot
112 (- first-large-histogram-bin-log2size)
113 n-histogram-bins-small
114 n-histogram-bins-large)
115 word-shift)
116 thread-tn temp 8)
117 size)
118 ;; not sure why this is "2" and not "1" in the fudge factor!!
119 ;; (but the assertions come out right)
120 (inst add :dword temp
121 (+ (- first-large-histogram-bin-log2size) n-histogram-bins-small 2))
123 (inst inc :qword (ea thread-segment-reg
124 (ash (1- thread-allocator-histogram-slot) word-shift)
125 thread-tn temp 8))))
126 ((<= size (* sb-vm:cons-size sb-vm:n-word-bytes n-histogram-bins-small))
127 (let ((index (1- (/ size (* sb-vm:cons-size sb-vm:n-word-bytes)))))
128 (inst inc :qword (thread-slot-ea (+ thread-allocator-histogram-slot index)))))
130 (let ((index (- (integer-length size) first-large-histogram-bin-log2size)))
131 (inst add :qword (thread-slot-ea (+ thread-allocator-histogram-slot
132 n-histogram-bins-small
133 n-histogram-bins-large index))
134 size)
135 (inst inc :qword (thread-slot-ea (+ thread-allocator-histogram-slot
136 n-histogram-bins-small index))))))))
137 (when (policy node (> sb-c::instrument-consing 1))
138 (when (tn-p size)
139 (aver (not (location= size temp))))
140 ;; CAUTION: the logic for RAX-SAVE is entirely untested
141 ;; as it never gets exercised, and can not be, until R12 ceases
142 ;; to have its wired use as the GC card table base register.
143 (binding* (((data rax-save) (if (location= temp r12-tn)
144 (values rax-tn t)
145 (values temp nil)))
146 (patch-loc (gen-label))
147 (skip-instrumentation (gen-label)))
148 ;; Don't count allocations to the arena
149 (inst cmp :qword (thread-slot-ea thread-arena-slot) 0)
150 (inst jmp :nz skip-instrumentation)
151 (when rax-save (inst push rax-tn))
152 (inst mov data (thread-slot-ea thread-profile-data-slot thread-temp))
153 (inst test data data)
154 ;; This instruction is modified to "JMP :z" when profiling is
155 ;; partially enabled. After the buffer is assigned, it becomes
156 ;; fully enabled. The unconditional jmp gives minimal performance
157 ;; loss if the profiler is statically disabled. (one memory
158 ;; read and a test whose result is never used, which the CPU
159 ;; is good at ignoring as far as instruction prefetch goes)
160 (emit-label patch-loc)
161 (push patch-loc (sb-assem::asmstream-alloc-points sb-assem:*asmstream*))
162 (inst jmp skip-instrumentation)
163 (emit-alignment 3 :long-nop)
164 (let ((helper
165 (if (integerp size) 'enable-alloc-counter 'enable-sized-alloc-counter)))
166 ;; This jump is always encoded as 5 bytes
167 (inst call (if (or (not node) ; assembly routine
168 (sb-c::code-immobile-p node))
169 (make-fixup helper :assembly-routine)
170 (uniquify-fixup helper))))
171 (inst nop)
172 ;; Emit "TEST AL, imm" where the immediate value
173 ;; encodes the the data buffer base reg and size reg numbers.
174 (inst byte #xA8) ; "TEST AL,imm"
175 (cond ((integerp size)
176 (inst byte (tn-offset data)))
178 (inst byte (logior (tn-offset data) (ash (tn-offset size) 4)))
179 (inst .skip 8 :long-nop)))
180 (when rax-save (inst pop rax-tn))
181 (emit-label skip-instrumentation))))
183 ;;; An arbitrary marker for the cons primitive-type, not to be confused
184 ;;; with the CONS-TYPE in our type-algebraic sense. Mostly just informs
185 ;;; the allocator to use cons_tlab.
186 (defconstant +cons-primtype+ list-pointer-lowtag)
188 (define-vop (sb-c::end-pseudo-atomic)
189 (:generator 1 (emit-end-pseudo-atomic)))
191 ;;; Emit code to allocate an object with a size in bytes given by
192 ;;; SIZE into ALLOC-TN. The size may be an integer of a TN.
193 ;;; NODE may be used to make policy-based decisions.
194 ;;; This function should only be used inside a pseudo-atomic section,
195 ;;; which to the degree needed should also cover subsequent initialization.
197 ;;; A mnemonic device for the argument pattern here:
198 ;;; 1. what to allocate: type, size, lowtag describe the object
199 ;;; 2. where to put the result
200 ;;; 3. node (for determining immobile-space-p) and a scratch register or two
201 (defun allocation (type size lowtag alloc-tn node temp thread-temp
202 &key overflow
203 &aux (systemp (system-tlab-p type node)))
204 (declare (ignorable thread-temp))
205 (flet ((fallback (size)
206 ;; Call an allocator trampoline and get the result in the proper register.
207 ;; There are 2 choices of trampoline to invoke alloc() or alloc_list()
208 ;; in C. This is chosen by the name of the asm routine.
209 (cond ((typep size '(and integer (not (signed-byte 32))))
210 ;; MOV accepts large immediate operands, PUSH does not
211 (inst mov alloc-tn size)
212 (inst push alloc-tn))
214 (inst push size)))
215 (invoke-asm-routine
216 'call
217 (if systemp
218 (if (eql type +cons-primtype+) 'sys-list-alloc-tramp 'sys-alloc-tramp)
219 (if (eql type +cons-primtype+) 'list-alloc-tramp 'alloc-tramp))
220 node)
221 (inst pop alloc-tn)))
222 (let* ((NOT-INLINE (gen-label))
223 (DONE (gen-label))
224 (free-pointer #+sb-thread
225 (let ((slot (if systemp
226 (if (eql type +cons-primtype+)
227 thread-sys-cons-tlab-slot
228 thread-sys-mixed-tlab-slot)
229 (if (eql type +cons-primtype+)
230 thread-cons-tlab-slot
231 thread-mixed-tlab-slot))))
232 (thread-slot-ea slot #+gs-seg thread-temp))
233 #-sb-thread
234 (ea (+ static-space-start
235 (if (eql type +cons-primtype+)
236 cons-region-offset
237 mixed-region-offset))))
238 (end-addr (ea (sb-x86-64-asm::ea-segment free-pointer)
239 (+ n-word-bytes (ea-disp free-pointer))
240 (ea-base free-pointer))))
241 (cond ((typep size `(integer ,large-object-size))
242 ;; large objects will never be made in a per-thread region
243 (cond (overflow (funcall overflow))
244 (t (fallback size)
245 (when (/= lowtag 0) (inst or :byte alloc-tn lowtag)))))
246 ((and (tn-p size) (location= size alloc-tn))
247 (aver (and temp (not (location= temp size))))
248 (inst mov temp free-pointer)
249 ;; alloc-tn <- old free ptr and temp <- new free ptr
250 (inst xadd temp alloc-tn)
251 (inst cmp temp end-addr)
252 (inst jmp :a NOT-INLINE)
253 (inst mov free-pointer temp)
254 (emit-label DONE)
255 (when (/= lowtag 0) (inst or :byte alloc-tn lowtag))
256 (assemble (:elsewhere)
257 (emit-label NOT-INLINE)
258 (inst sub temp alloc-tn) ; new-free-ptr - old-free-ptr = size
259 (cond (overflow (funcall overflow))
260 (t (fallback temp)
261 (inst jmp DONE)))))
263 ;; fixed-size allocation whose size fits in an imm32 can be done
264 ;; with only one register, the ALLOC-TN. If it doesn't fit in imm32,
265 ;; it would get the first branch of the COND, for large objects.
266 (inst mov alloc-tn free-pointer)
267 (cond (temp
268 (when (tn-p size) (aver (not (location= size temp))))
269 (inst lea temp (ea size alloc-tn))
270 (inst cmp temp end-addr)
271 (inst jmp :a NOT-INLINE)
272 (inst mov free-pointer temp)
273 (emit-label DONE)
274 (when (/= lowtag 0) (inst or :byte alloc-tn lowtag)))
276 (inst add alloc-tn size)
277 (inst cmp alloc-tn end-addr)
278 (inst jmp :a NOT-INLINE)
279 (inst mov free-pointer alloc-tn)
280 (cond ((tn-p size)
281 (inst sub alloc-tn size)
282 (emit-label DONE)
283 (when (/= lowtag 0) (inst or :byte alloc-tn lowtag)))
285 ;; SUB can compute the result and tagify it.
286 ;; The fallback also has to tagify.
287 (let ((bias (+ (- size) lowtag)))
288 (if (= bias -1) (inst dec alloc-tn) (inst add alloc-tn bias)))
289 (emit-label DONE)))))
290 (assemble (:elsewhere)
291 (emit-label NOT-INLINE)
292 (cond (overflow (funcall overflow))
294 (fallback size)
295 (when (and (/= lowtag 0) (not temp) (not (tn-p size)))
296 (inst or :byte alloc-tn lowtag))
297 (inst jmp DONE))))))))
300 ;;; Allocate an other-pointer object of fixed NWORDS with a single-word
301 ;;; header having the specified WIDETAG value. The result is placed in
302 ;;; RESULT-TN. NWORDS counts the header word.
303 (defun alloc-other (widetag nwords result-tn node alloc-temps thread-temp
304 &optional init
305 &aux (bytes (pad-data-block nwords)))
306 (declare (ignorable thread-temp))
307 (declare (dynamic-extent init))
308 #+bignum-assertions
309 (when (= widetag bignum-widetag) (setq bytes (* bytes 2))) ; use 2x the space
310 (instrument-alloc widetag bytes node (cons result-tn (ensure-list alloc-temps)) thread-temp)
311 (let ((header (compute-object-header nwords widetag))
312 (alloc-temp (if (listp alloc-temps) (car alloc-temps) alloc-temps)))
313 (pseudo-atomic ()
314 (cond (alloc-temp
315 (allocation widetag bytes 0 result-tn node alloc-temp thread-temp)
316 (storew* header result-tn 0 0 t)
317 (inst or :byte result-tn other-pointer-lowtag))
319 (allocation widetag bytes other-pointer-lowtag result-tn node nil thread-temp)
320 (storew* header result-tn 0 other-pointer-lowtag t)))
321 (when init
322 (funcall init)))))
324 (defun list-ctor-push-elt (x scratch)
325 (inst push (if (sc-is x immediate)
326 (let ((bits (encode-value-if-immediate x)))
327 (or (plausible-signed-imm32-operand-p bits)
328 (progn (inst mov scratch bits) scratch)))
329 x)))
331 ;;;; CONS, ACONS, LIST and LIST*
333 (defun init-list (prev-constant tn list slot lowtag temp zeroed)
334 ;; TODO: gencgc does not need EMIT-GC-STORE-BARRIER here,
335 ;; but other other GC strategies might.
336 (let* ((immediate-value)
337 (reg
338 (sc-case tn
339 (constant
340 ;; a CONSTANT sc does not imply that we have a compile-time constant-
341 ;; it could be load-time in which case it does not satisfy constant-tn-p.
342 (unless (and (constant-tn-p tn) (eql prev-constant (tn-value tn)))
343 (setf prev-constant (if (constant-tn-p tn) (tn-value tn) temp))
344 (move temp tn))
345 temp)
346 (immediate
347 (if (eql prev-constant (setf immediate-value (tn-value tn)))
348 temp
349 ;; Note1:
350 ;; 1. "-if-immediate" is slighty misleading since it _is_ immediate.
351 ;; 2. This unfortunately treats STOREW* as a leaky abstraction
352 ;; because we have to know exactly what it rejects up front
353 ;; rather than asking it whether it can emit a single instruction
354 ;; that will do the trick.
355 (let ((bits (encode-value-if-immediate tn)))
356 (when (and zeroed (typep bits '(unsigned-byte 31)))
357 (storew* bits list slot lowtag zeroed)
358 (return-from init-list prev-constant)) ; Return our "in/out" arg
359 bits)))
360 (control-stack
361 (setf prev-constant temp) ;; a non-eq initial value
362 (move temp tn)
363 temp)
365 tn))))
366 ;; STOREW returns TEMP if and only if it stored using it.
367 ;; (Perhaps not the clearest idiom.)
368 (when (eq (storew reg list slot lowtag temp) temp)
369 (setf prev-constant immediate-value)))
370 prev-constant) ; Return our "in/out" arg
372 (macrolet ((pop-arg (ref)
373 `(prog1 (tn-ref-tn ,ref) (setf ,ref (tn-ref-across ,ref))))
374 (store-slot (arg list slot &optional (lowtag list-pointer-lowtag))
375 ;; PREV-CONSTANT is akin to a pass-by-reference arg to the function which
376 ;; used to be all inside this macro.
377 `(setq prev-constant
378 (init-list prev-constant ,arg ,list ,slot ,lowtag temp zeroed))))
380 (define-vop (cons)
381 (:args (car :scs (any-reg descriptor-reg constant immediate control-stack))
382 (cdr :scs (any-reg descriptor-reg constant immediate control-stack)))
383 (:temporary (:sc unsigned-reg :to (:result 0) :target result) alloc)
384 (:temporary (:sc unsigned-reg :to (:result 0)
385 :unused-if (node-stack-allocate-p (sb-c::vop-node vop)))
386 temp)
387 (:results (result :scs (descriptor-reg)))
388 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
389 (:vop-var vop)
390 (:node-var node)
391 (:generator 10
392 (cond
393 ((node-stack-allocate-p node)
394 (inst and rsp-tn (lognot lowtag-mask))
395 (cond ((and (sc-is car immediate) (sc-is cdr immediate)
396 (typep (encode-value-if-immediate car) '(signed-byte 8))
397 (typep (encode-value-if-immediate cdr) '(signed-byte 8)))
398 ;; (CONS 0 0) takes just 4 bytes to encode the PUSHes (for example)
399 (inst push (encode-value-if-immediate cdr))
400 (inst push (encode-value-if-immediate car)))
401 ((and (sc-is car immediate) (sc-is cdr immediate)
402 (eql (encode-value-if-immediate car) (encode-value-if-immediate cdr)))
403 (inst mov alloc (encode-value-if-immediate cdr))
404 (inst push alloc)
405 (inst push alloc))
407 (list-ctor-push-elt cdr alloc)
408 (list-ctor-push-elt car alloc)))
409 (inst lea result (ea list-pointer-lowtag rsp-tn)))
411 (let ((nbytes (* cons-size n-word-bytes))
412 (zeroed #+mark-region-gc t)
413 (prev-constant temp)) ;; a non-eq initial value
414 (instrument-alloc +cons-primtype+ nbytes node (list temp alloc) thread-tn)
415 (pseudo-atomic (:thread-tn thread-tn)
416 (allocation +cons-primtype+ nbytes 0 alloc node temp thread-tn)
417 (store-slot car alloc cons-car-slot 0)
418 (store-slot cdr alloc cons-cdr-slot 0)
419 (if (location= alloc result)
420 (inst or :byte alloc list-pointer-lowtag)
421 (inst lea result (ea list-pointer-lowtag alloc)))))))))
423 (define-vop (acons)
424 (:args (key :scs (any-reg descriptor-reg constant immediate control-stack))
425 (val :scs (any-reg descriptor-reg constant immediate control-stack))
426 (tail :scs (any-reg descriptor-reg constant immediate control-stack)))
427 (:temporary (:sc unsigned-reg :to (:result 0)) alloc)
428 (:temporary (:sc unsigned-reg :to (:result 0) :target result) temp)
429 (:results (result :scs (descriptor-reg)))
430 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
431 (:node-var node)
432 (:translate acons)
433 (:policy :fast-safe)
434 (:generator 10
435 (let ((nbytes (* cons-size 2 n-word-bytes))
436 (zeroed #+mark-region-gc t)
437 (prev-constant temp))
438 (instrument-alloc +cons-primtype+ nbytes node (list temp alloc) thread-tn)
439 (pseudo-atomic (:thread-tn thread-tn)
440 (allocation +cons-primtype+ nbytes 0 alloc node temp thread-tn)
441 (store-slot tail alloc cons-cdr-slot 0)
442 (inst lea temp (ea (+ 16 list-pointer-lowtag) alloc))
443 (store-slot temp alloc cons-car-slot 0)
444 (setf prev-constant temp)
445 (let ((pair temp) (temp alloc)) ; give STORE-SLOT the ALLOC as its TEMP
446 (store-slot key pair cons-car-slot)
447 (store-slot val pair cons-cdr-slot))
448 ;; ALLOC could have been clobbered by using it as a temp for
449 ;; loading a constant.
450 (if (location= temp result)
451 (inst sub result 16) ; TEMP is ALLOC+16+lowtag, so just subtract 16
452 (inst lea result (ea (- 16) temp)))))))
454 ;;; CONS-2 is similar to ACONS, except that instead of producing
455 ;;; ((X . Y) . Z) it produces (X Y . Z)
456 (define-vop (cons-2)
457 (:args (car :scs (any-reg descriptor-reg constant immediate control-stack))
458 (cadr :scs (any-reg descriptor-reg constant immediate control-stack))
459 (cddr :scs (any-reg descriptor-reg constant immediate control-stack)))
460 (:temporary (:sc unsigned-reg :to (:result 0) :target result) alloc)
461 (:temporary (:sc unsigned-reg :to (:result 0)
462 :unused-if (node-stack-allocate-p (sb-c::vop-node vop)))
463 temp)
464 (:results (result :scs (descriptor-reg)))
465 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
466 (:vop-var vop)
467 (:node-var node)
468 (:generator 10
469 (cond
470 ((node-stack-allocate-p node)
471 (inst and rsp-tn (lognot lowtag-mask))
472 (list-ctor-push-elt cddr alloc)
473 (list-ctor-push-elt cadr alloc)
474 (inst lea alloc (ea list-pointer-lowtag rsp-tn))
475 (inst push alloc) ; cdr of the first cons
476 (list-ctor-push-elt car alloc)
477 (inst lea result (ea list-pointer-lowtag rsp-tn)))
479 (let ((nbytes (* cons-size 2 n-word-bytes))
480 (zeroed #+mark-region-gc t)
481 (prev-constant temp))
482 (instrument-alloc +cons-primtype+ nbytes node (list temp alloc) thread-tn)
483 (pseudo-atomic (:thread-tn thread-tn)
484 (allocation +cons-primtype+ nbytes 0 alloc node temp thread-tn)
485 (store-slot car alloc cons-car-slot 0)
486 (store-slot cadr alloc (+ 2 cons-car-slot) 0)
487 (store-slot cddr alloc (+ 2 cons-cdr-slot) 0)
488 (inst lea temp (ea (+ 16 list-pointer-lowtag) alloc))
489 (store-slot temp alloc cons-cdr-slot 0)
490 (if (location= alloc result)
491 (inst or :byte alloc list-pointer-lowtag)
492 (inst lea result (ea list-pointer-lowtag alloc)))))))))
494 (define-vop (list)
495 (:args (things :more t :scs (descriptor-reg any-reg constant immediate)))
496 (:temporary (:sc unsigned-reg) ptr temp)
497 (:temporary (:sc unsigned-reg :to (:result 0) :target result) res)
498 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
499 (:info star cons-cells)
500 (:results (result :scs (descriptor-reg)))
501 (:node-var node)
502 (:generator 0
503 (aver (>= cons-cells 3)) ; prevent regressions in ir2tran's vop selection
504 (let* ((stack-allocate-p (node-stack-allocate-p node))
505 (size (* (pad-data-block cons-size) cons-cells))
506 (zeroed #+mark-region-gc (not stack-allocate-p))
507 (prev-constant temp))
508 (unless stack-allocate-p
509 (instrument-alloc +cons-primtype+ size node (list ptr temp) thread-tn))
510 (pseudo-atomic (:elide-if stack-allocate-p :thread-tn thread-tn)
511 (if stack-allocate-p
512 (stack-allocation size list-pointer-lowtag res)
513 (allocation +cons-primtype+ size list-pointer-lowtag res node temp thread-tn))
514 (move ptr res)
515 (dotimes (i (1- cons-cells))
516 (store-slot (pop-arg things) ptr cons-car-slot)
517 (inst add ptr (pad-data-block cons-size))
518 (storew ptr ptr (- cons-cdr-slot cons-size) list-pointer-lowtag))
519 (store-slot (pop-arg things) ptr cons-car-slot list-pointer-lowtag)
520 (if star
521 (store-slot (pop-arg things) ptr cons-cdr-slot list-pointer-lowtag)
522 (storew* nil-value ptr cons-cdr-slot list-pointer-lowtag zeroed))))
523 (aver (null things))
524 (move result res)))
527 (define-vop ()
528 (:translate unaligned-dx-cons)
529 (:args (car))
530 (:results (result :scs (descriptor-reg)))
531 (:ignore car)
532 (:policy :fast-safe)
533 (:generator 0
534 (inst push nil-value)
535 (inst lea result (ea (- list-pointer-lowtag n-word-bytes) rsp-tn))))
537 ;;;; special-purpose inline allocators
539 ;;; Special variant of 'storew' which might have a shorter encoding
540 ;;; when storing to the heap (which starts out zero-filled).
541 ;;; This will always write 8 bytes if WORD is a negative number.
542 (defun storew* (word object slot lowtag zeroed &optional temp)
543 (cond
544 ;; FIXME: I this misses some cases that could use a dword store
545 ;; when the heap is prezeroed. Why can't it take (UNSIGNED-BYTE 32) ?
546 ;; For example #x8000FFFF would work as a :DWORD and we leave the upper 4
547 ;; bytes alone.
548 ((or (not zeroed) (not (typep word '(unsigned-byte 31))))
549 ;; Will use temp reg if WORD can't be encoded as an imm32
550 (storew word object slot lowtag temp))
551 ((/= word 0)
552 (let ((size
553 (cond ((typep word '(unsigned-byte 8))
554 :byte)
555 ((and (not (logtest word #xff))
556 (typep (ash word -8) '(unsigned-byte 8)))
557 ;; Array lengths 128 to 16384 which are multiples of 128
558 (setq word (ash word -8))
559 (decf lowtag 1) ; increment address by 1
560 :byte)
561 ((and (not (logtest word #xffff))
562 (typep (ash word -16) '(unsigned-byte 8)))
563 ;; etc
564 (setq word (ash word -16))
565 (decf lowtag 2) ; increment address by 2
566 :byte)
567 ((typep word '(unsigned-byte 16))
568 :word)
569 (t ; must be an (unsigned-byte 31)
570 :dword))))
571 (inst mov size (ea (- (* slot n-word-bytes) lowtag) object) word)))))
573 ;;; ALLOCATE-VECTOR
574 (defun store-string-trailing-null (vector type length words)
575 ;; BASE-STRING needs to have a null terminator. The byte is inaccessible
576 ;; to lisp, so clear it now.
577 (cond ((and (sc-is type immediate)
578 (/= (tn-value type) sb-vm:simple-base-string-widetag))) ; do nothing
579 ((and (sc-is type immediate)
580 (= (tn-value type) sb-vm:simple-base-string-widetag)
581 (sc-is length immediate))
582 (inst mov :byte (ea (- (+ (ash vector-data-offset word-shift)
583 (tn-value length))
584 other-pointer-lowtag)
585 vector)
587 ;; Zeroizing the entire final word is easier than using LENGTH now.
588 ((sc-is words immediate)
589 ;; I am not convinced that this case is reachable -
590 ;; we won't DXify a vector of unknown type.
591 (inst mov :qword
592 ;; Given N data words, write to word N-1
593 (ea (- (ash (+ (tn-value words) vector-data-offset -1)
594 word-shift)
595 other-pointer-lowtag)
596 vector)
599 ;; This final case is ok with 0 data words - it might clobber the LENGTH
600 ;; slot, but subsequently we rewrite that slot.
601 ;; But strings always have at least 1 word, so no worries either way.
602 (inst mov :qword
603 (ea (- (ash (1- vector-data-offset) word-shift)
604 other-pointer-lowtag)
605 vector
606 words (ash 1 (- word-shift n-fixnum-tag-bits)))
607 0))))
609 (macrolet ((calc-size-in-bytes (n-words size-tn)
610 `(cond ((sc-is ,n-words immediate)
611 (pad-data-block (+ (tn-value ,n-words) vector-data-offset)))
613 (inst lea ,size-tn
614 (ea (+ lowtag-mask (* vector-data-offset n-word-bytes))
615 nil ,n-words (ash 1 (- word-shift n-fixnum-tag-bits))))
616 (inst and ,size-tn (lognot lowtag-mask))
617 ,size-tn)))
618 (put-header (vector-tn lowtag type len zeroed temp)
619 `(let ((len (if (sc-is ,len immediate) (fixnumize (tn-value ,len)) ,len))
620 (type (if (sc-is ,type immediate) (tn-value ,type) ,type)))
621 (storew* type ,vector-tn 0 ,lowtag ,zeroed ,temp)
622 #+ubsan (inst mov :dword (vector-len-ea ,vector-tn ,lowtag) len)
623 #-ubsan (storew* len ,vector-tn vector-length-slot
624 ,lowtag ,zeroed ,temp)))
625 (want-shadow-bits ()
626 `(and poisoned
627 (if (sc-is type immediate)
628 (/= (tn-value type) simple-vector-widetag)
629 :maybe)))
630 (calc-shadow-bits-size (reg)
631 `(cond ((sc-is length immediate)
632 ;; Calculate number of dualwords (as 128 bits per dualword)
633 ;; and multiply by 16 to get number of bytes. Also add the 2 header words.
634 (* 16 (+ 2 (ceiling (tn-value length) 128))))
636 ;; Compute (CEILING length 128) by adding 127, then truncating divide
637 ;; by 128, and untag as part of the divide step.
638 ;; Account for the two fixed words by adding in 128 more bits initially.
639 (inst lea :dword ,reg (ea (fixnumize (+ 128 127)) length))
640 (inst shr :dword ,reg 8) ; divide by 128 and untag as one operation
641 (inst shl :dword ,reg 4) ; multiply by 16 bytes per dualword
642 ,reg)))
643 (store-originating-pc (vector)
644 ;; Put the current program-counter into the length slot of the shadow bits
645 ;; so that we can ascribe blame to the array's creator.
646 `(let ((here (gen-label)))
647 (emit-label here)
648 (inst lea temp (rip-relative-ea here))
649 (inst shl temp 4)
650 (inst mov (ea (- 8 other-pointer-lowtag) ,vector) temp))))
652 (define-vop (allocate-vector-on-heap)
653 #+ubsan (:info poisoned)
654 (:args (type :scs (unsigned-reg immediate))
655 (length :scs (any-reg immediate))
656 (words :scs (any-reg immediate)))
657 ;; Result is live from the beginning, like a temp, because we use it as such
658 ;; in 'calc-size-in-bytes'
659 (:results (result :scs (descriptor-reg) :from :load))
660 (:arg-types #+ubsan (:constant t)
661 positive-fixnum positive-fixnum positive-fixnum)
662 (:temporary (:sc unsigned-reg) temp)
663 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
664 (:policy :fast-safe)
665 (:node-var node)
666 (:generator 100
667 #+ubsan
668 (when (want-shadow-bits)
669 ;; allocate a vector of "written" bits unless the vector is simple-vector-T,
670 ;; which can use unbound-marker as a poison value on reads.
671 (when (sc-is type unsigned-reg)
672 (inst cmp :byte type simple-vector-widetag)
673 (inst push 0)
674 (inst jmp :e NO-SHADOW-BITS))
675 ;; It would be possible to do this and the array proper
676 ;; in a single pseudo-atomic section, but I don't care to do that.
677 (let ((nbytes (calc-shadow-bits-size result)))
678 (pseudo-atomic ()
679 ;; Allocate the bits into RESULT
680 (allocation simple-bit-vector-widetag nbytes 0 result node temp nil)
681 (inst mov :byte (ea result) simple-bit-vector-widetag)
682 (inst mov :dword (vector-len-ea result 0)
683 (if (sc-is length immediate) (fixnumize (tn-value length)) length))
684 (inst or :byte result other-pointer-lowtag)))
685 (store-originating-pc result)
686 (inst push result)) ; save the pointer to the shadow bits
687 NO-SHADOW-BITS
688 ;; The LET generates instructions that needn't be pseudoatomic
689 ;; so don't move it inside.
690 ;; There are 3 possibilities for correctness of INSTRUMENT-ALLOC:
691 ;; * If WORDS is not immediate, and ALLOC-TEMP is R12, then compute size
692 ;; into ALLOC-TEMP, use RESULT as the instrumentation temp.
693 ;; ALLOCATION receives: input = ALLOC-TEMP, output = RESULT, and no other temp
694 ;; * If WORDS is not immediate and ALLOC-TEMP is not R12, then compute size
695 ;; into RESULT, use ALLOC-TEMP as the instrumentation temp.
696 ;; ALLOCATION receives: input = RESULT, output = RESULT, temp = ALLOC-TEMP.
697 (multiple-value-bind (size-tn instrumentation-temp alloc-temp)
698 (cond ((sc-is words immediate)
699 ;; If WORDS is immediate, then let INSTRUMENT-ALLOC choose its temp
700 (values (calc-size-in-bytes words nil) (list result temp) temp))
701 ((location= temp r12-tn)
702 ;; Compute the size into TEMP, use RESULT for instrumentation.
703 ;; Don't give another temp to ALLOCATION, because its SIZE and temp
704 ;; can not be in the same register (which it AVERs).
705 (values (calc-size-in-bytes words temp) result nil))
707 ;; Compute the size into RESULT, use TEMP for instrumentation.
708 ;; ALLOCATION needs the temp register in this case,
709 ;; because input and output are in the same register.
710 (values (calc-size-in-bytes words result) temp temp)))
711 (instrument-alloc (if (sc-is type immediate)
712 (case (tn-value type)
713 (#.simple-vector-widetag 'simple-vector)
714 (t 'unboxed-array))
715 type)
716 size-tn node instrumentation-temp thread-tn)
717 (pseudo-atomic (:thread-tn thread-tn)
718 (allocation type size-tn 0 result node alloc-temp thread-tn)
719 (put-header result 0 type length t alloc-temp)
720 (inst or :byte result other-pointer-lowtag)))
721 #+ubsan
722 (cond ((want-shadow-bits)
723 (inst pop temp-reg-tn) ; restore shadow bits
724 (inst mov (object-slot-ea result 1 other-pointer-lowtag) temp-reg-tn))
725 (poisoned ; uninitialized SIMPLE-VECTOR
726 (store-originating-pc result)))))
728 (define-vop (allocate-vector-on-stack)
729 #+ubsan (:info poisoned)
730 (:args (type :scs (unsigned-reg immediate))
731 (length :scs (any-reg (immediate
732 (typep (fixnumize (tn-value tn))
733 '(signed-byte 32)))))
734 (words :scs (any-reg (immediate
735 (typep (pad-data-block (+ (tn-value tn) vector-data-offset))
736 'sc-offset)))))
737 (:results (result :scs (descriptor-reg) :from :load))
738 (:node-var node)
739 (:vop-var vop)
740 (:arg-types #+ubsan (:constant t)
741 positive-fixnum positive-fixnum positive-fixnum)
742 #+ubsan (:temporary (:sc any-reg :offset rax-offset) rax)
743 #+ubsan (:temporary (:sc any-reg :offset rcx-offset) rcx)
744 #+ubsan (:temporary (:sc any-reg :offset rdi-offset) rdi)
745 (:policy :fast-safe)
746 (:generator 10
747 #+ubsan
748 (when (want-shadow-bits)
749 ;; allocate a vector of "written" bits unless the vector is simple-vector-T,
750 ;; which can use unbound-marker as a poison value on reads.
751 (when (sc-is type unsigned-reg) (bug "vector-on-stack: unknown type"))
752 (zeroize rax)
753 (let ((nbytes (calc-shadow-bits-size rcx)))
754 (stack-allocation nbytes 0 rdi)
755 (when (sc-is length immediate) (inst mov rcx nbytes)))
756 (inst rep)
757 (inst stos :byte) ; RAX was zeroed
758 (inst lea rax (ea other-pointer-lowtag rsp-tn))
759 (inst mov :dword (ea (- other-pointer-lowtag) rax) simple-bit-vector-widetag)
760 (inst mov :dword (vector-len-ea rax)
761 (if (sc-is length immediate) (fixnumize (tn-value length)) length))
762 (store-originating-pc rax))
763 (let ((size (calc-size-in-bytes words result)))
764 (when (sb-c::make-vector-check-overflow-p node)
765 (let ((overflow (generate-error-code vop
766 'stack-allocated-object-overflows-stack-error
767 (if (tn-p size)
768 size
769 (make-sc+offset immediate-sc-number size)))))
770 (inst sub rsp-tn size)
771 (inst cmp :qword rsp-tn (thread-slot-ea thread-control-stack-start-slot))
772 ;; avoid clearing condition codes
773 (inst lea rsp-tn (if (integerp size)
774 (ea size rsp-tn)
775 (ea rsp-tn size)))
776 (inst jmp :be overflow)))
777 ;; Compute tagged pointer sooner than later since access off RSP
778 ;; requires an extra byte in the encoding anyway.
779 (stack-allocation size other-pointer-lowtag result
780 ;; If already aligned RSP, don't need to do it again.
781 #+ubsan (want-shadow-bits))
782 ;; NB: store the trailing null BEFORE storing the header,
783 ;; in case the length in words is 0, which stores into the LENGTH slot
784 ;; as if it were element -1 of data (which probably can't happen).
785 (store-string-trailing-null result type length words)
786 (put-header result other-pointer-lowtag type length nil nil)
788 #+ubsan
789 (cond ((want-shadow-bits)
790 (inst mov (ea (- (ash vector-length-slot word-shift) other-pointer-lowtag)
791 result)
792 rax))
793 (poisoned ; uninitialized SIMPLE-VECTOR
794 (store-originating-pc result)))))
796 #+linux ; unimplemented for others
797 (define-vop (allocate-vector-on-stack+msan-unpoison)
798 #+ubsan (:info poisoned)
799 #+ubsan (:ignore poisoned)
800 (:args (type :scs (unsigned-reg immediate))
801 (length :scs (any-reg immediate))
802 (words :scs (any-reg immediate)))
803 (:results (result :scs (descriptor-reg) :from :load))
804 (:arg-types #+ubsan (:constant t)
805 positive-fixnum positive-fixnum positive-fixnum)
806 ;; This is a separate vop because it needs more temps.
807 (:temporary (:sc any-reg :offset rcx-offset) rcx)
808 (:temporary (:sc any-reg :offset rax-offset) rax)
809 (:temporary (:sc any-reg :offset rdi-offset) rdi)
810 (:policy :fast-safe)
811 (:generator 10
812 (let ((size (calc-size-in-bytes words result)))
813 ;; Compute tagged pointer sooner than later since access off RSP
814 ;; requires an extra byte in the encoding anyway.
815 (stack-allocation size other-pointer-lowtag result)
816 (store-string-trailing-null result type length words)
817 ;; FIXME: It would be good to check for stack overflow here.
818 (put-header result other-pointer-lowtag type length nil nil)
819 (cond ((sc-is words immediate)
820 (inst mov rcx (+ (tn-value words) vector-data-offset)))
822 (inst lea rcx (ea (ash vector-data-offset n-fixnum-tag-bits) words))
823 (inst shr rcx n-fixnum-tag-bits)))
824 (inst mov rdi msan-mem-to-shadow-xor-const)
825 (inst xor rdi rsp-tn) ; compute shadow address
826 (zeroize rax)
827 (inst rep)
828 (inst stos :qword)))))
830 ;;; ALLOCATE-LIST
831 (macrolet ((calc-size-in-bytes (length answer)
832 `(cond ((sc-is ,length immediate)
833 (aver (/= (tn-value ,length) 0))
834 (* (tn-value ,length) n-word-bytes 2))
836 (inst mov result nil-value)
837 (inst test ,length ,length)
838 (inst jmp :z done)
839 (inst lea ,answer
840 (ea nil ,length
841 (ash 1 (1+ (- word-shift n-fixnum-tag-bits)))))
842 ,answer)))
843 (compute-end ()
844 `(let ((size (cond ((typep size '(or (signed-byte 32) tn))
845 size)
847 (inst mov limit size)
848 limit))))
849 (inst lea limit
850 (ea (if (fixnump size) size 0) result
851 (if (fixnump size) nil size))))))
853 (define-vop (allocate-list-on-stack)
854 (:args (length :scs (any-reg immediate))
855 (element :scs (any-reg descriptor-reg)))
856 (:results (result :scs (descriptor-reg) :from :load))
857 (:arg-types positive-fixnum *)
858 (:policy :fast-safe)
859 (:node-var node)
860 (:vop-var vop)
861 (:temporary (:sc descriptor-reg) tail next limit)
862 (:generator 20
863 (let ((size (calc-size-in-bytes length next))
864 (loop (gen-label)))
865 (when (sb-c::make-list-check-overflow-p node)
866 (let ((overflow (generate-error-code vop 'stack-allocated-object-overflows-stack-error size)))
867 (inst sub rsp-tn size)
868 (inst cmp :qword rsp-tn (thread-slot-ea thread-control-stack-start-slot))
869 ;; avoid clearing condition codes
870 (inst lea rsp-tn (ea rsp-tn size))
871 (inst jmp :be overflow)))
872 (stack-allocation size list-pointer-lowtag result)
873 (compute-end)
874 (inst mov next result)
875 (emit-label LOOP)
876 (inst mov tail next)
877 (inst add next (* 2 n-word-bytes))
878 (storew element tail cons-car-slot list-pointer-lowtag)
879 ;; Store the CDR even if it will be smashed to nil.
880 (storew next tail cons-cdr-slot list-pointer-lowtag)
881 (inst cmp next limit)
882 (inst jmp :ne loop)
883 (storew nil-value tail cons-cdr-slot list-pointer-lowtag))
884 done))
886 (define-vop (allocate-list-on-heap)
887 (:args (length :scs (any-reg immediate))
888 ;; Too bad we don't have an SC that implies actually a CPU immediate
889 ;; i.e. fits in an imm32 operand
890 (element :scs (any-reg descriptor-reg)))
891 (:results (result :scs (descriptor-reg) :from :load))
892 (:arg-types positive-fixnum *)
893 (:policy :fast-safe)
894 (:node-var node)
895 (:temporary (:sc descriptor-reg) tail next limit)
896 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
897 (:generator 20
898 (let ((size (calc-size-in-bytes length tail))
899 (entry (gen-label))
900 (loop (gen-label))
901 (leave-pa (gen-label)))
902 (instrument-alloc +cons-primtype+ size node (list next limit) thread-tn)
903 (pseudo-atomic (:thread-tn thread-tn)
904 (allocation +cons-primtype+ size list-pointer-lowtag result node limit thread-tn
905 :overflow
906 (lambda ()
907 ;; Push C call args right-to-left
908 (inst push (if (integerp size) (constantize size) size))
909 (inst push (if (sc-is element immediate) (tn-value element) element))
910 (invoke-asm-routine
911 'call (if (system-tlab-p 0 node) 'sys-make-list 'make-list) node)
912 (inst pop result)
913 (inst jmp leave-pa)))
914 (compute-end)
915 (inst mov next result)
916 (inst jmp entry)
917 (emit-label LOOP)
918 (storew next tail cons-cdr-slot list-pointer-lowtag)
919 (emit-label ENTRY)
920 (inst mov tail next)
921 (inst add next (* 2 n-word-bytes))
922 (storew element tail cons-car-slot list-pointer-lowtag)
923 (inst cmp next limit)
924 (inst jmp :ne loop)
925 ;; still pseudo-atomic
926 (storew nil-value tail cons-cdr-slot list-pointer-lowtag)
927 (emit-label leave-pa)))
928 done))) ; label needed by calc-size-in-bytes
930 #-immobile-space
931 (define-vop (make-fdefn)
932 (:policy :fast-safe)
933 (:translate make-fdefn)
934 (:args (name :scs (descriptor-reg) :to :eval))
935 (:results (result :scs (descriptor-reg) :from :argument))
936 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
937 (:node-var node)
938 (:generator 37
939 (alloc-other fdefn-widetag fdefn-size result node nil thread-tn
940 (lambda ()
941 (storew name result fdefn-name-slot other-pointer-lowtag)
942 (storew nil-value result fdefn-fun-slot other-pointer-lowtag)
943 (storew (make-fixup 'undefined-tramp :assembly-routine)
944 result fdefn-raw-addr-slot other-pointer-lowtag)))))
946 (define-vop (make-closure)
947 (:info label length stack-allocate-p)
948 (:temporary (:sc any-reg) temp)
949 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
950 (:results (result :scs (descriptor-reg)))
951 (:node-var node)
952 (:vop-var vop)
953 (:generator 10
954 (let* ((words (+ length closure-info-offset)) ; including header
955 (bytes (pad-data-block words))
956 (header (logior (ash (1- words) n-widetag-bits) closure-widetag))
957 (remain-pseudo-atomic
958 (eq (car (last (vop-codegen-info vop))) :pseudo-atomic)))
959 (unless stack-allocate-p
960 (instrument-alloc closure-widetag bytes node (list result temp) thread-tn))
961 (pseudo-atomic (:default-exit (not remain-pseudo-atomic)
962 :elide-if stack-allocate-p :thread-tn thread-tn)
963 (if stack-allocate-p
964 (stack-allocation bytes fun-pointer-lowtag result)
965 (allocation closure-widetag bytes fun-pointer-lowtag result node temp thread-tn))
966 (storew* #-compact-instance-header header ; write the widetag and size
967 #+compact-instance-header ; ... plus the layout pointer
968 (let ((layout #-sb-thread (static-symbol-value-ea 'function-layout)
969 #+sb-thread (thread-slot-ea thread-function-layout-slot)))
970 (cond ((typep header '(unsigned-byte 16))
971 (inst mov temp layout)
972 ;; emit a 2-byte constant, the low 4 of TEMP were zeroed
973 (inst mov :word temp header))
975 (inst mov temp header)
976 (inst or temp layout)))
977 temp)
978 result 0 fun-pointer-lowtag (not stack-allocate-p))
979 (inst lea (pc-size vop)
980 temp (rip-relative-ea label (ash simple-fun-insts-offset word-shift)))
981 (storew temp result closure-fun-slot fun-pointer-lowtag)))))
983 (define-vop (reference-closure)
984 (:info label)
985 (:results (result :scs (descriptor-reg)))
986 (:vop-var vop)
987 (:generator 1
988 (inst lea (pc-size vop)
989 result (rip-relative-ea label fun-pointer-lowtag))))
991 ;;; The compiler likes to be able to directly make value cells.
992 (define-vop (make-value-cell)
993 (:args (value :scs (descriptor-reg any-reg immediate constant) :to :result))
994 (:results (result :scs (descriptor-reg) :from :eval))
995 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
996 (:info stack-allocate-p)
997 (:node-var node)
998 (:generator 10
999 (let ((data (if (sc-is value immediate)
1000 (let ((bits (encode-value-if-immediate value)))
1001 (if (integerp bits)
1002 (constantize bits)
1003 bits)) ; could be a fixup
1004 value)))
1005 (cond (stack-allocate-p
1006 ;; No regression test got here. Therefore I think there's no such thing as a
1007 ;; dynamic-extent value cell. It makes sense that there isn't: DX closures
1008 ;; would just reference their frame, wouldn't they?
1009 (inst and rsp-tn (lognot lowtag-mask)) ; align
1010 (inst push data)
1011 (inst push (compute-object-header value-cell-size value-cell-widetag))
1012 (inst lea result (ea other-pointer-lowtag rsp-tn)))
1014 (alloc-other value-cell-widetag value-cell-size result node nil thread-tn
1015 (lambda ()
1016 (if (sc-case value
1017 (immediate
1018 (unless (integerp data) (inst push data) t))
1019 (constant
1020 (inst push value) t)
1021 (t nil))
1022 (inst pop (object-slot-ea result value-cell-value-slot other-pointer-lowtag))
1023 (storew data result value-cell-value-slot other-pointer-lowtag)))))))))
1025 ;;;; automatic allocators for primitive objects
1027 (flet
1028 ((alloc (vop name words type lowtag stack-allocate-p result
1029 &optional alloc-temp node
1030 &aux (bytes (pad-data-block words))
1031 (remain-pseudo-atomic
1032 (eq (car (last (vop-codegen-info vop))) :pseudo-atomic)))
1033 #+bignum-assertions
1034 (when (eq type bignum-widetag) (setq bytes (* bytes 2))) ; use 2x the space
1035 (progn name) ; possibly not used
1036 (unless stack-allocate-p
1037 (instrument-alloc type bytes node (list result alloc-temp) thread-tn))
1038 (pseudo-atomic (:default-exit (not remain-pseudo-atomic)
1039 :elide-if stack-allocate-p :thread-tn thread-tn)
1040 ;; If storing a header word, defer ORing in the lowtag until after
1041 ;; the header is written so that displacement can be 0.
1042 (cond (stack-allocate-p
1043 (stack-allocation bytes (if type 0 lowtag) result))
1044 ((eql type funcallable-instance-widetag)
1045 (inst push bytes)
1046 (invoke-asm-routine 'call 'alloc-funinstance vop)
1047 (inst pop result))
1049 (allocation type bytes (if type 0 lowtag) result node alloc-temp thread-tn)))
1050 (let ((header (compute-object-header words type)))
1051 (cond #+compact-instance-header
1052 ((and (eq name '%make-structure-instance) stack-allocate-p)
1053 ;; Write a :DWORD, not a :QWORD, because the high half will be
1054 ;; filled in when the layout is stored. Can't use STOREW* though,
1055 ;; because it tries to store as few bytes as possible,
1056 ;; where this instruction must write exactly 4 bytes.
1057 (inst mov :dword (ea 0 result) header))
1059 (storew* header result 0 0 (not stack-allocate-p)))))
1060 ;; GC can make the best choice about placement if it has a layout.
1061 ;; Of course with conservative GC the object will be pinned anyway,
1062 ;; but still, always having a layout is a good thing.
1063 (when (typep type 'layout) ; store its layout, while still in pseudo-atomic
1064 (inst mov :dword (ea 4 result) (make-fixup type :layout)))
1065 (inst or :byte result lowtag))))
1066 ;; DX is strictly redundant in these 2 vops, but they're written this way
1067 ;; so that backends can choose to use a single vop for both.
1068 (define-vop (fixed-alloc)
1069 (:info name words type lowtag dx)
1070 (:results (result :scs (descriptor-reg)))
1071 (:temporary (:sc unsigned-reg) alloc-temp)
1072 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
1073 (:vop-var vop)
1074 (:node-var node)
1075 (:generator 50 (alloc vop name words type lowtag dx result alloc-temp node)))
1076 (define-vop (sb-c::fixed-alloc-to-stack)
1077 (:info name words type lowtag dx)
1078 (:results (result :scs (descriptor-reg)))
1079 (:vop-var vop)
1080 (:generator 50 (alloc vop name words type lowtag dx result))))
1082 ;;; Allocate a non-vector variable-length object.
1083 ;;; Exactly 4 allocators are rendered via this vop:
1084 ;;; BIGNUM (%ALLOCATE-BIGNUM)
1085 ;;; FUNCALLABLE-INSTANCE (%MAKE-FUNCALLABLE-INSTANCE)
1086 ;;; CLOSURE (%ALLOC-CLOSURE)
1087 ;;; INSTANCE (%MAKE-INSTANCE,%MAKE-INSTANCE/MIXED)
1088 ;;; WORDS accounts for the mandatory slots *including* the header.
1089 ;;; EXTRA is the variable payload, also measured in words.
1090 (define-vop (var-alloc)
1091 (:args (extra :scs (any-reg)))
1092 (:arg-types positive-fixnum)
1093 (:info name words type lowtag stack-allocate-p)
1094 (:results (result :scs (descriptor-reg) :from (:eval 1)))
1095 (:temporary (:sc unsigned-reg :from :eval :to (:eval 1)) bytes)
1096 (:temporary (:sc unsigned-reg :from :eval :to :result) header)
1097 ;; KLUDGE: wire to RAX so that it doesn't get R12
1098 (:temporary (:sc unsigned-reg :offset 0) alloc-temp)
1099 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
1100 (:node-var node)
1101 (:vop-var vop)
1102 (:generator 50
1103 (when (eq name '%make-funcallable-instance)
1104 ;; %MAKE-FUNCALLABLE-INSTANCE needs to allocate to pages of code,
1105 ;; which it failed to do if the var-alloc translation was invoked.
1106 ;; But it seems we never need this! (so is it FIXME or isn't it?)
1107 (error "can't %MAKE-FUNCALLABLE-INSTANCE of unknown length"))
1108 (let ((remain-pseudo-atomic (eq (car (last (vop-codegen-info vop))) :pseudo-atomic)))
1109 ;; With the exception of bignums, these objects have effectively
1110 ;; 32-bit headers because the high 4 byes contain a layout pointer.
1111 (let ((operand-size (if (= type bignum-widetag) :qword :dword)))
1112 (inst lea operand-size bytes
1113 (ea (* (1+ words) n-word-bytes) nil
1114 extra (ash 1 (- word-shift n-fixnum-tag-bits))))
1115 (inst mov operand-size header bytes)
1116 (inst shl operand-size header (- (length-field-shift type) word-shift)) ; w+1 to length field
1117 (inst lea operand-size header ; (w-1 << 8) | type
1118 (ea (+ (ash -2 (length-field-shift type)) type) header))
1119 (inst and operand-size bytes (lognot lowtag-mask)))
1120 #+bignum-assertions
1121 (when (= type bignum-widetag) (inst shl :dword bytes 1)) ; use 2x the space
1122 (cond (stack-allocate-p
1123 (stack-allocation bytes lowtag result)
1124 (storew header result 0 lowtag))
1126 ;; can't pass RESULT as a possible choice of scratch register
1127 ;; because it might be in the same physical reg as BYTES.
1128 ;; Yup, the lifetime specs in this vop are pretty confusing.
1129 (instrument-alloc type bytes node alloc-temp thread-tn)
1130 (pseudo-atomic (:default-exit (not remain-pseudo-atomic)
1131 :thread-tn thread-tn)
1132 (allocation type bytes lowtag result node alloc-temp thread-tn)
1133 (storew header result 0 lowtag)))))))
1135 #+sb-xc-host
1136 (define-vop (alloc-code)
1137 (:args (total-words :scs (unsigned-reg) :target c-arg-1)
1138 (boxed-words :scs (unsigned-reg) :target c-arg-2))
1139 (:temporary (:sc unsigned-reg
1140 :offset #.(first *c-call-register-arg-offsets*)
1141 :from (:argument 0) :to :result) c-arg-1)
1142 (:temporary (:sc unsigned-reg :offset rsi-offset
1143 :offset #.(second *c-call-register-arg-offsets*)
1144 :from (:argument 1) :to :result) c-arg-2)
1145 (:temporary (:sc unsigned-reg :offset r15-offset) dummy)
1146 (:results (res :scs (descriptor-reg)))
1147 (:ignore dummy)
1148 (:generator 1
1149 (move c-arg-1 total-words)
1150 (move c-arg-2 boxed-words)
1151 (with-registers-preserved (c :except #-win32 rdi #+win32 rcx :frame-reg r15)
1152 (pseudo-atomic () (call-c "alloc_code_object"))
1153 (move c-arg-1 rax-tn))
1154 (move res c-arg-1)))
1156 #+immobile-space
1157 (macrolet ((c-fun (name)
1158 `(let ((c-fun (make-fixup ,name :foreign)))
1159 (cond ((sb-c::code-immobile-p node) c-fun)
1160 (t (progn (inst mov rax c-fun) rax))))))
1161 (define-vop (!alloc-immobile-fixedobj)
1162 (:args (size-class :scs (any-reg) :target c-arg1)
1163 (nwords :scs (any-reg) :target c-arg2)
1164 (header :scs (any-reg) :target c-arg3))
1165 (:temporary (:sc unsigned-reg :from (:argument 0) :to :eval
1166 :offset #.(first *c-call-register-arg-offsets*)) c-arg1)
1167 (:temporary (:sc unsigned-reg :from (:argument 1) :to :eval
1168 :offset #.(second *c-call-register-arg-offsets*)) c-arg2)
1169 (:temporary (:sc unsigned-reg :from (:argument 2) :to :eval
1170 :offset #.(third *c-call-register-arg-offsets*)) c-arg3)
1171 (:temporary (:sc unsigned-reg :from :eval :to (:result 0) :offset rax-offset) rax)
1172 (:results (result :scs (descriptor-reg)))
1173 (:node-var node)
1174 (:generator 50
1175 (inst mov c-arg1 size-class)
1176 (inst mov c-arg2 nwords)
1177 (inst mov c-arg3 header)
1178 ;; RSP needn't be restored because the allocators all return immediately
1179 ;; which has that effect
1180 (inst and rsp-tn -16)
1181 (pseudo-atomic ()
1182 (call-c (c-fun "alloc_immobile_fixedobj"))
1183 (move result rax))))
1185 ;;; Timing test:
1186 ;;; Dynamic-space allocation:
1187 ;;; * (defun f (n) (dotimes (i n) (make-symbol "b")))
1188 ;;; * (time (f 500000))
1189 ;;; Evaluation took: 0.004 seconds of real time
1190 ;;; Immobile-space:
1191 ;;; * (defun f (n) (dotimes (i n) (sb-vm::%alloc-immobile-symbol "b")))
1192 ;;; Evaluation took: 0.043 seconds of real time
1193 ;;; With vop: 0.028 seconds of real time
1195 (eval-when (:compile-toplevel)
1196 (aver (evenp symbol-size))) ; assumptions in the code below
1198 (define-vop (!fast-alloc-immobile-symbol)
1199 (:results (result :scs (descriptor-reg)))
1200 (:temporary (:sc unsigned-reg :offset rax-offset) rax)
1201 (:temporary (:sc unsigned-reg :offset rbx-offset) rbx)
1202 (:temporary (:sc unsigned-reg :offset rcx-offset) rcx)
1203 (:temporary (:sc unsigned-reg) header)
1204 (:generator 1
1205 ;; fixedobj_pages linkage entry: 1 PTE per page, 12-byte struct
1206 (inst mov rbx (rip-relative-ea (make-fixup "fixedobj_pages" :foreign-dataref)))
1207 ;; fixedobj_page_hint: 1 hint per sizeclass. C type = uint32_t
1208 (inst mov rax (rip-relative-ea (make-fixup "fixedobj_page_hint" :foreign-dataref)))
1209 (inst mov rbx (ea rbx)) ; get the base of the fixedobj_pages array
1210 ;; This has to be pseudoatomic as soon as the page hint is loaded.
1211 ;; Consider the situation where there is exactly one symbol on a page that
1212 ;; is almost all free, and the symbol page hint points to that page.
1213 ;; If GC occurs, it might dispose that symbol, resetting the page attributes
1214 ;; and page hint. It would be an error to allocate to that page
1215 ;; because it is no longer a page of symbols but rather a free page.
1216 ;; There is no way to inform GC that we are currently looking at a page
1217 ;; in anticipation of allocating to it.
1218 (pseudo-atomic ()
1219 (inst mov :dword rax (ea 4 rax)) ; rax := fixedobj_page_hint[1] (sizeclass=SYMBOL)
1220 (inst test :dword rax rax)
1221 (inst jmp :z FAIL) ; fail if hint page is 0
1222 (inst lea rbx (ea rbx rax 8)) ; rbx := &fixedobj_pages[hint].free_index
1223 ;; compute fixedobj_page_address(hint) into RAX
1224 (inst mov rcx (rip-relative-ea (make-fixup "FIXEDOBJ_SPACE_START" :foreign-dataref)))
1225 (inst shl rax (integer-length (1- immobile-card-bytes)))
1226 (inst add rax (ea rcx))
1227 ;; load the page's free pointer
1228 (inst mov :dword rcx (ea rbx)) ; rcx := fixedobj_pages[hint].free_index
1229 ;; fail if allocation would overrun the page
1230 (inst cmp :dword rcx (- immobile-card-bytes (* symbol-size n-word-bytes)))
1231 (inst jmp :a FAIL)
1232 ;; compute address of the allegedly free memory block into RESULT
1233 (inst lea result (ea rcx rax)) ; free_index + page_base
1234 ;; read the potential symbol header
1235 (inst mov rax (ea result))
1236 (inst test :dword rax 1)
1237 (inst jmp :nz FAIL) ; not a fixnum implies already taken
1238 ;; try to claim this word of memory
1239 (inst mov header (logior (ash (1- symbol-size) n-widetag-bits) symbol-widetag))
1240 (inst cmpxchg :lock (ea result) header)
1241 (inst jmp :ne FAIL) ; already taken
1242 ;; compute new free_index = spacing + old header + free_index
1243 (inst lea :dword rax (ea (* symbol-size n-word-bytes) rax rcx))
1244 (inst mov :dword (ea rbx) rax) ; store new free_index
1245 ;; set the low bit of the 'gens' field
1246 (inst or :lock :byte (ea 7 rbx) 1) ; 7+rbx = &fixedobj_pages[i].attr.parts.gens_
1247 (inst or :byte result other-pointer-lowtag) ; make_lispobj()
1248 (inst jmp OUT)
1249 FAIL
1250 (inst mov result nil-value)
1251 OUT)))
1253 ) ; end MACROLET