Avoid freeing literal memory.
[sbcl.git] / src / compiler / x86-64 / call.lisp
blob67e1cb7e6c70696db35689ddfb11e6cf669eca49
1 ;;;; function call for the x86 VM
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-VM")
14 (defconstant arg-count-sc (make-sc+offset any-reg-sc-number rcx-offset))
15 (defconstant closure-sc (make-sc+offset any-reg-sc-number rax-offset))
17 (defconstant return-pc-passing-offset
18 (make-sc+offset sap-stack-sc-number return-pc-save-offset))
20 (defconstant old-fp-passing-offset
21 (make-sc+offset control-stack-sc-number ocfp-save-offset))
23 (defun compute-linkage-cell (node name res)
24 (cond ((sb-c::code-immobile-p node)
25 (inst lea res (rip-relative-ea (make-fixup name :linkage-cell))))
27 (inst mov res (thread-slot-ea sb-vm::thread-linkage-table-slot))
28 (inst lea res (ea (make-fixup name :linkage-cell) res)))))
30 ;;; Make the TNs used to hold OLD-FP and RETURN-PC within the current
31 ;;; function. We treat these specially so that the debugger can find
32 ;;; them at a known location.
33 ;;;
34 ;;; Without using a save-tn - which does not make much sense if it is
35 ;;; wired to the stack?
36 (defun make-old-fp-save-location ()
37 (let ((tn (make-wired-tn *fixnum-primitive-type*
38 control-stack-sc-number
39 ocfp-save-offset)))
40 (setf (tn-kind tn) :environment)
41 tn))
42 (defun make-return-pc-save-location ()
43 (let ((tn (make-wired-tn (primitive-type-or-lose 'system-area-pointer)
44 sap-stack-sc-number return-pc-save-offset)))
45 (setf (tn-kind tn) :environment)
46 tn))
48 ;;; Make a TN for the standard argument count passing location. We only
49 ;;; need to make the standard location, since a count is never passed when we
50 ;;; are using non-standard conventions.
51 (defun make-arg-count-location ()
52 (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rcx-offset))
54 ;;;; frame hackery
56 ;;; This is used for setting up the Old-FP in local call.
57 (define-vop (current-fp)
58 (:results (val :scs (any-reg control-stack)))
59 (:generator 1
60 (move val rbp-tn)))
62 ;;; We don't have a separate NFP, so we don't need to do anything here.
63 (define-vop (compute-old-nfp)
64 (:results (val))
65 (:ignore val)
66 (:generator 1
67 nil))
69 ;;; Accessing a slot from an earlier stack frame is definite hackery.
70 (define-vop (ancestor-frame-ref)
71 (:args (frame-pointer :scs (descriptor-reg))
72 (variable-home-tn :load-if nil))
73 (:results (value :scs (descriptor-reg any-reg)))
74 (:policy :fast-safe)
75 (:generator 4
76 (aver (sc-is variable-home-tn control-stack))
77 (loadw value frame-pointer
78 (frame-word-offset (tn-offset variable-home-tn)))))
79 (define-vop (ancestor-frame-set)
80 (:args (frame-pointer :scs (descriptor-reg))
81 (value :scs (descriptor-reg any-reg)))
82 (:results (variable-home-tn :load-if nil))
83 (:policy :fast-safe)
84 (:generator 4
85 (aver (sc-is variable-home-tn control-stack))
86 (storew value frame-pointer
87 (frame-word-offset (tn-offset variable-home-tn)))))
89 (macrolet ((define-frame-op
90 (suffix sc stack-sc instruction
91 &optional (ea `(ea (frame-byte-offset (tn-offset variable-home-tn))
92 frame-pointer)))
93 (let ((reffer (symbolicate 'ancestor-frame-ref '/ suffix))
94 (setter (symbolicate 'ancestor-frame-set '/ suffix)))
95 `(progn
96 (define-vop (,reffer ancestor-frame-ref)
97 (:results (value :scs (,sc)))
98 (:generator 4
99 (aver (sc-is variable-home-tn ,stack-sc))
100 (inst ,instruction value
101 ,ea)))
102 (define-vop (,setter ancestor-frame-set)
103 (:args (frame-pointer :scs (descriptor-reg))
104 (value :scs (,sc)))
105 (:generator 4
106 (aver (sc-is variable-home-tn ,stack-sc))
107 (inst ,instruction ,ea value)))))))
108 (define-frame-op double-float double-reg double-stack movsd)
109 (define-frame-op single-float single-reg single-stack movss)
110 (define-frame-op complex-double-float complex-double-reg complex-double-stack
111 movupd (ea-for-cdf-data-stack variable-home-tn frame-pointer))
112 (define-frame-op complex-single-float complex-single-reg complex-single-stack
113 movq (ea-for-csf-data-stack variable-home-tn frame-pointer))
114 (define-frame-op signed-byte-64 signed-reg signed-stack mov)
115 (define-frame-op unsigned-byte-64 unsigned-reg unsigned-stack mov)
116 (define-frame-op system-area-pointer sap-reg sap-stack mov))
118 (defun primitive-type-indirect-cell-type (ptype)
119 (declare (type primitive-type ptype))
120 (macrolet ((foo (&body data)
121 `(case (primitive-type-name ptype)
122 ,@(loop for (name stack-sc ref set) in data
123 collect
124 `(,name
125 (load-time-value
126 (list (primitive-type-or-lose ',name)
127 (sc-or-lose ',stack-sc)
128 (lambda (node block fp value res)
129 (sb-c::vop ,ref node block
130 fp value res))
131 (lambda (node block fp new-val value)
132 (sb-c::vop ,set node block
133 fp new-val value)))))))))
134 (foo (double-float double-stack
135 ancestor-frame-ref/double-float
136 ancestor-frame-set/double-float)
137 (single-float single-stack
138 ancestor-frame-ref/single-float
139 ancestor-frame-set/single-float)
140 (complex-double-float complex-double-stack
141 ancestor-frame-ref/complex-double-float
142 ancestor-frame-set/complex-double-float)
143 (complex-single-float complex-single-stack
144 ancestor-frame-ref/complex-single-float
145 ancestor-frame-set/complex-single-float)
146 (signed-byte-64 signed-stack
147 ancestor-frame-ref/signed-byte-64
148 ancestor-frame-set/signed-byte-64)
149 (unsigned-byte-64 unsigned-stack
150 ancestor-frame-ref/unsigned-byte-64
151 ancestor-frame-set/unsigned-byte-64)
152 (unsigned-byte-63 unsigned-stack
153 ancestor-frame-ref/unsigned-byte-64
154 ancestor-frame-set/unsigned-byte-64)
155 (system-area-pointer sap-stack
156 ancestor-frame-ref/system-area-pointer
157 ancestor-frame-set/system-area-pointer))))
159 (define-vop (xep-allocate-frame)
160 (:info start-lab)
161 (:generator 1
162 (let ((nop-kind
163 (shiftf (sb-assem::asmstream-inter-function-padding sb-assem:*asmstream*)
164 :nop)))
165 (emit-alignment n-lowtag-bits (if (eq nop-kind :nop) #x90 0)))
166 (emit-label start-lab)
167 ;; Skip space for the function header.
168 (inst simple-fun-header-word)
169 (inst .skip (* (1- simple-fun-insts-offset) n-word-bytes))
170 ;; The start of the actual code.
171 ;; Save the return-pc.
172 (popw rbp-tn (frame-word-offset return-pc-save-offset))))
174 (defun emit-lea (target source disp)
175 (if (eql disp 0)
176 (inst mov target source)
177 (inst lea target (ea disp source))))
179 (define-vop (xep-setup-sp)
180 (:generator 1
181 (emit-lea rsp-tn rbp-tn (- (* n-word-bytes
182 (- (sb-allocated-size 'stack)
183 sp->fp-offset))))))
185 ;;; This is emitted directly before either a known-call-local, call-local,
186 ;;; or a multiple-call-local. All it does is allocate stack space for the
187 ;;; callee (who has the same size stack as us).
188 (define-vop (allocate-frame)
189 (:results (res :scs (any-reg))
190 (nfp))
191 (:info callee)
192 (:ignore nfp callee)
193 (:generator 2
194 (inst lea res (ea (- (* sp->fp-offset n-word-bytes)) rsp-tn))
195 (inst sub rsp-tn (* n-word-bytes (sb-allocated-size 'stack)))))
197 (defun make-stack-pointer-tn (&optional nargs)
198 ;; Avoid using a temporary register if the new frame pointer will be
199 ;; at the same location as the new stack pointer
200 (if (and nargs
201 (= (* sp->fp-offset n-word-bytes)
202 (* (max (if (> nargs register-arg-count)
203 nargs
205 (sb-c::sb-size (sb-or-lose 'stack)))
206 n-word-bytes)))
207 (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rsp-offset)
208 (make-normal-tn *fixnum-primitive-type*)))
210 ;;; Allocate a partial frame for passing stack arguments in a full
211 ;;; call. NARGS is the number of arguments passed. We allocate at
212 ;;; least 2 slots, because the XEP noise is going to want to use them
213 ;;; before it can extend the stack.
214 (define-vop (allocate-full-call-frame)
215 (:info nargs)
216 (:results (res :scs (any-reg)))
217 (:generator 2
218 (let ((fp-offset (* sp->fp-offset n-word-bytes))
219 (stack-size (* (max (if (> nargs register-arg-count)
220 nargs
222 (sb-c::sb-size (sb-or-lose 'stack)))
223 n-word-bytes)))
224 (cond ((= fp-offset stack-size)
225 (inst sub rsp-tn stack-size)
226 (move res rsp-tn))
228 (inst lea res (ea (- fp-offset) rsp-tn))
229 (inst sub rsp-tn stack-size))))))
231 ;;; Emit code needed at the return-point from an unknown-values call
232 ;;; for a fixed number of values. Values is the head of the TN-REF
233 ;;; list for the locations that the values are to be received into.
234 ;;; Nvals is the number of values that are to be received (should
235 ;;; equal the length of Values).
237 ;;; If 0 or 1 values are expected, then we just emit an instruction to
238 ;;; reset the SP (which will only be executed when other than 1 value
239 ;;; is returned.)
241 ;;; In the general case we have to do three things:
242 ;;; -- Default unsupplied register values. This need only be done
243 ;;; when a single value is returned, since register values are
244 ;;; defaulted by the called in the non-single case.
245 ;;; -- Default unsupplied stack values. This needs to be done whenever
246 ;;; there are stack values.
247 ;;; -- Reset SP. This must be done whenever other than 1 value is
248 ;;; returned, regardless of the number of values desired.
249 (defun default-unknown-values (vop values nvals node rbx move-temp)
250 (declare (type (or tn-ref null) values)
251 (type unsigned-byte nvals))
252 (multiple-value-bind (type name leaf) (sb-c::lvar-fun-type (sb-c::basic-combination-fun node))
253 (let* ((verify (and leaf
254 (policy node (and (>= safety 1)
255 (= debug 3)))
256 (memq (sb-c::leaf-where-from leaf) '(:declared-verify :defined-here))))
257 (type (if verify
258 (if (fun-type-p type)
259 (fun-type-returns type)
260 *wild-type*)
261 (sb-c::node-derived-type node)))
262 (min-values (values-type-min-value-count type))
263 (max-values (values-type-max-value-count type))
264 (trust (or (and (= min-values 0)
265 (= max-values call-arguments-limit))
266 (not verify))))
267 (flet ((check-nargs ()
268 (assemble ()
269 (let* ((*location-context* (list* name
270 (type-specifier type)
271 (make-restart-location SKIP)))
272 (err-lab (generate-error-code vop 'invalid-arg-count-error))
273 (min min-values)
274 (max (and (< max-values call-arguments-limit)
275 max-values)))
276 (cond ((eql min max)
277 (if (zerop max)
278 (inst test :dword rcx-tn rcx-tn)
279 (inst cmp :dword rcx-tn (fixnumize max)))
280 (inst jmp :ne err-lab))
281 (max
282 (let ((nargs move-temp))
283 (if (zerop min)
284 (setf nargs rcx-tn)
285 (inst lea :dword move-temp (ea (fixnumize (- min)) rcx-tn)))
286 (inst cmp :dword nargs (fixnumize (- max min)))
287 (inst jmp :a err-lab)))
289 (cond ((= min 1)
290 (inst test :dword rcx-tn rcx-tn)
291 (inst jmp :e err-lab))
292 ((plusp min)
293 (inst cmp :dword rcx-tn (fixnumize min))
294 (inst jmp :b err-lab))))))
295 SKIP)))
296 (cond
297 ((<= nvals 1)
298 (note-this-location vop :single-value-return)
299 (cond
300 ((and trust
301 (<= (sb-kernel:values-type-max-value-count type)
302 register-arg-count)))
303 ((and trust
304 (not (sb-kernel:values-type-may-be-single-value-p type)))
305 (inst mov rsp-tn rbx))
307 (inst cmov :c rsp-tn rbx)
308 (unless trust
309 (inst mov move-temp (fixnumize 1))
310 (inst cmov :nc rcx-tn move-temp)
311 (check-nargs)))))
312 ((<= nvals register-arg-count)
313 (note-this-location vop :unknown-return)
314 (when (or (not trust)
315 (sb-kernel:values-type-may-be-single-value-p type))
316 (assemble ()
317 (inst jmp :c regs-defaulted)
318 ;; Default the unsupplied registers.
319 (let* ((2nd-tn-ref (tn-ref-across values))
320 (2nd-tn (tn-ref-tn 2nd-tn-ref))
321 (2nd-tn-live (neq (tn-kind 2nd-tn) :unused)))
322 (when 2nd-tn-live
323 (inst mov 2nd-tn nil-value))
324 (when (> nvals 2)
325 (loop
326 for tn-ref = (tn-ref-across 2nd-tn-ref)
327 then (tn-ref-across tn-ref)
328 for count from 2 below register-arg-count
329 unless (eq (tn-kind (tn-ref-tn tn-ref)) :unused)
331 (inst mov :dword (tn-ref-tn tn-ref)
332 (if 2nd-tn-live 2nd-tn nil-value)))))
333 (inst mov rbx rsp-tn)
334 regs-defaulted))
336 (when (or (not trust)
337 (< register-arg-count
338 (sb-kernel:values-type-max-value-count type)))
339 (inst mov rsp-tn rbx))
340 (unless trust
341 (inst mov move-temp (fixnumize 1))
342 (inst cmov :nc rcx-tn move-temp)
343 (check-nargs)))
345 (collect ((defaults))
346 (let ((default-stack-slots (gen-label))
347 (used-registers
348 (loop for i from 1 below register-arg-count
349 for tn = (tn-ref-tn (setf values (tn-ref-across values)))
350 unless (eq (tn-kind tn) :unused)
351 collect tn
352 finally (setf values (tn-ref-across values))))
353 (used-stack-slots-p
354 (loop for ref = values then (tn-ref-across ref)
355 while ref
356 thereis (neq (tn-kind (tn-ref-tn ref)) :unused))))
357 (assemble ()
358 (note-this-location vop :unknown-return)
359 (unless trust
360 (inst mov move-temp (fixnumize 1))
361 (inst cmov :nc rcx-tn move-temp))
362 ;; If it returned exactly one value the registers and the
363 ;; stack slots need to be filled with NIL.
364 (cond ((and trust
365 (> min-values 1)))
366 (used-stack-slots-p
367 (inst jmp :nc default-stack-slots))
369 (inst jmp :c regs-defaulted)
370 (loop for null = nil-value then (car used-registers)
371 for reg in used-registers
372 do (inst mov :dword reg null))
373 (inst jmp done)))
374 REGS-DEFAULTED
375 (do ((i register-arg-count (1+ i))
376 (val values (tn-ref-across val)))
377 ((null val))
378 (let ((tn (tn-ref-tn val)))
379 (unless (eq (tn-kind tn) :unused)
380 (when (or (not trust)
381 (>= i min-values))
382 (let ((default-lab (gen-label)))
383 (defaults (cons default-lab tn))
384 ;; Note that the max number of values received
385 ;; is assumed to fit in a :dword register.
386 (inst cmp :dword rcx-tn (fixnumize i))
387 (inst jmp :be default-lab)))
388 (sc-case tn
389 (control-stack
390 (loadw move-temp rbx (frame-word-offset (+ sp->fp-offset i)))
391 (inst mov tn move-temp))
393 (loadw tn rbx (frame-word-offset (+ sp->fp-offset i))))))))
394 DEFAULTING-DONE
395 (move rsp-tn rbx)
396 (unless trust
397 (check-nargs))
398 DONE
399 (let ((defaults (defaults)))
400 (when defaults
401 (assemble (:elsewhere)
402 (when (or (not trust)
403 (<= min-values 1))
404 (emit-label default-stack-slots)
405 (loop for null = nil-value then (car used-registers)
406 for reg in used-registers
407 do (inst mov :dword reg null))
408 (move rbx rsp-tn))
409 (dolist (default defaults)
410 (emit-label (car default))
411 (inst mov (cdr default) nil-value))
412 (inst jmp defaulting-done)))))))))))))
414 ;;;; unknown values receiving
416 ;;; Emit code needed at the return point for an unknown-values call
417 ;;; for an arbitrary number of values.
419 ;;; We do the single and non-single cases with no shared code: there
420 ;;; doesn't seem to be any potential overlap, and receiving a single
421 ;;; value is more important efficiency-wise.
423 ;;; When there is a single value, we just push it on the stack,
424 ;;; returning the old SP and 1.
426 ;;; When there is a variable number of values, we move all of the
427 ;;; argument registers onto the stack, and return ARGS and NARGS.
429 ;;; ARGS and NARGS are TNs wired to the named locations. We must
430 ;;; explicitly allocate these TNs, since their lifetimes overlap with
431 ;;; the results start and count. (Also, it's nice to be able to target
432 ;;; them.)
433 (defun receive-unknown-values (args nargs start count node)
434 (declare (type tn args nargs start count))
435 (let ((type (sb-c::basic-combination-derived-type node))
436 (variable-values (gen-label))
437 (stack-values (gen-label))
438 (done (gen-label))
439 (unused-count-p (eq (tn-kind count) :unused)))
440 (when (sb-kernel:values-type-may-be-single-value-p type)
441 (inst jmp :c variable-values)
442 (cond ((eq (tn-kind start) :unused)
443 (inst push (first *register-arg-tns*)))
444 ((location= start (first *register-arg-tns*))
445 (inst push (first *register-arg-tns*))
446 (inst lea start (ea n-word-bytes rsp-tn)))
447 (t (inst mov start rsp-tn)
448 (inst push (first *register-arg-tns*))))
449 (unless unused-count-p
450 (inst mov count (fixnumize 1)))
451 (inst jmp done)
452 (emit-label variable-values))
453 ;; The stack frame is burnt and RETurned from if there are no
454 ;; stack values. In this case quickly reallocate sufficient space.
455 (when (<= (sb-kernel:values-type-min-value-count type)
456 register-arg-count)
457 (inst cmp nargs (fixnumize register-arg-count))
458 (inst jmp :g stack-values)
459 #+#.(cl:if (cl:= sb-vm:word-shift sb-vm:n-fixnum-tag-bits) '(and) '(or))
460 (inst sub rsp-tn nargs)
461 #-#.(cl:if (cl:= sb-vm:word-shift sb-vm:n-fixnum-tag-bits) '(and) '(or))
462 (let ((sub nargs))
463 (unless unused-count-p
464 (inst mov :dword (setf sub rax-tn) nargs))
465 (inst shl :dword sub (- word-shift n-fixnum-tag-bits))
466 (inst sub rsp-tn sub))
467 (emit-label stack-values))
468 ;; dtc: this writes the registers onto the stack even if they are
469 ;; not needed, only the number specified in rcx are used and have
470 ;; stack allocated to them. No harm is done.
471 (loop
472 for arg in *register-arg-tns*
473 for i downfrom -1
474 for j below (sb-kernel:values-type-max-value-count type)
475 do (storew arg args i))
476 (unless (eq (tn-kind start) :unused)
477 (move start args))
478 (unless unused-count-p
479 (move count nargs))
481 (emit-label done))
482 (values))
484 ;;; VOP that can be inherited by unknown values receivers. The main thing this
485 ;;; handles is allocation of the result temporaries.
486 (define-vop (unknown-values-receiver)
487 (:temporary (:sc descriptor-reg :offset rbx-offset
488 :from :eval :to (:result 0))
489 values-start)
490 (:temporary (:sc any-reg :offset rcx-offset
491 :from :eval :to (:result 1))
492 nvals)
493 (:results (start :scs (any-reg control-stack))
494 (count :scs (any-reg control-stack))))
496 ;;;; local call with unknown values convention return
498 (defun check-ocfp-and-return-pc (old-fp return-pc)
499 #+nil
500 (format t "*known-return: old-fp ~S, tn-kind ~S; ~S ~S~%"
501 old-fp (tn-kind old-fp) (sb-c::tn-save-tn old-fp)
502 (tn-kind (sb-c::tn-save-tn old-fp)))
503 #+nil
504 (format t "*known-return: return-pc ~S, tn-kind ~S; ~S ~S~%"
505 return-pc (tn-kind return-pc)
506 (sb-c::tn-save-tn return-pc)
507 (tn-kind (sb-c::tn-save-tn return-pc)))
508 (unless (and (sc-is old-fp control-stack)
509 (= (tn-offset old-fp) ocfp-save-offset))
510 (error "ocfp not on stack in standard save location?"))
511 (unless (and (sc-is return-pc sap-stack)
512 (= (tn-offset return-pc) return-pc-save-offset))
513 (error "return-pc not on stack in standard save location?")))
515 ;;; The local call convention doesn't fit that well with x86-style
516 ;;; calls. Emit a header for local calls to pop the return address
517 ;;; in the right place.
518 (defun emit-block-header (start-label trampoline-label fall-thru-p alignp)
519 (when (and fall-thru-p trampoline-label)
520 (inst jmp start-label))
521 (when trampoline-label
522 (emit-label trampoline-label)
523 (popw rbp-tn (frame-word-offset return-pc-save-offset)))
524 (when alignp
525 (emit-alignment n-lowtag-bits alignp))
526 (emit-label start-label))
528 ;;; Non-TR local call for a fixed number of values passed according to
529 ;;; the unknown values convention.
531 ;;; FP is the frame pointer in install before doing the call.
533 ;;; NFP would be the number-stack frame pointer if we had a separate
534 ;;; number stack.
536 ;;; Args are the argument passing locations, which are specified only
537 ;;; to terminate their lifetimes in the caller.
539 ;;; VALUES are the return value locations (wired to the standard
540 ;;; passing locations). NVALS is the number of values received.
542 ;;; Save is the save info, which we can ignore since saving has been
543 ;;; done.
545 ;;; TARGET is a continuation pointing to the start of the called
546 ;;; function.
547 (define-vop (call-local)
548 (:args (fp)
549 (nfp)
550 (args :more t))
551 (:results (values :more t))
552 (:save-p t)
553 (:move-args :local-call)
554 (:info arg-locs callee target nvals)
555 (:vop-var vop)
556 (:ignore nfp arg-locs args callee)
557 (:node-var node)
558 (:temporary (:sc any-reg) move-temp)
559 (:generator 5
560 (move rbp-tn fp)
561 (note-this-location vop :call-site)
562 (inst call target)
563 (default-unknown-values vop values nvals node rbx-tn move-temp)))
565 ;;; Non-TR local call for a variable number of return values passed according
566 ;;; to the unknown values convention. The results are the start of the values
567 ;;; glob and the number of values received.
568 (define-vop (multiple-call-local unknown-values-receiver)
569 (:args (fp)
570 (nfp)
571 (args :more t))
572 (:save-p t)
573 (:move-args :local-call)
574 (:info save callee target)
575 (:ignore args save nfp callee)
576 (:vop-var vop)
577 (:node-var node)
578 (:generator 20
579 (move rbp-tn fp)
580 (note-this-location vop :call-site)
581 (inst call target)
582 (note-this-location vop :unknown-return)
583 (receive-unknown-values values-start nvals start count node)))
585 ;;;; local call with known values return
587 ;;; Non-TR local call with known return locations. Known-value return
588 ;;; works just like argument passing in local call.
590 ;;; Note: we can't use normal load-tn allocation for the fixed args,
591 ;;; since all registers may be tied up by the more operand. Instead,
592 ;;; we use MAYBE-LOAD-STACK-TN.
593 (define-vop (known-call-local)
594 (:args (fp)
595 (nfp)
596 (args :more t))
597 (:results (res :more t))
598 (:move-args :local-call)
599 (:save-p t)
600 (:info save callee target)
601 (:ignore args res save nfp callee)
602 (:vop-var vop)
603 (:generator 5
604 (move rbp-tn fp)
605 (note-this-location vop :call-site)
606 (inst call target)
607 (note-this-location vop :known-return)))
609 ;;; From Douglas Crosher
610 ;;; Return from known values call. We receive the return locations as
611 ;;; arguments to terminate their lifetimes in the returning function. We
612 ;;; restore FP and CSP and jump to the Return-PC.
613 (define-vop (known-return)
614 (:args (old-fp)
615 (return-pc)
616 (vals :more t))
617 (:move-args :known-return)
618 (:info val-locs)
619 (:ignore val-locs vals)
620 (:vop-var vop)
621 (:generator 6
622 (check-ocfp-and-return-pc old-fp return-pc)
623 ;; Zot all of the stack except for the old-fp and return-pc.
624 (inst leave)
625 (inst ret)))
627 ;;;; full call
629 ;;; There is something of a cross-product effect with full calls.
630 ;;; Different versions are used depending on whether we know the
631 ;;; number of arguments or the name of the called function, and
632 ;;; whether we want fixed values, unknown values, or a tail call.
634 ;;; In full call, the arguments are passed creating a partial frame on
635 ;;; the stack top and storing stack arguments into that frame. On
636 ;;; entry to the callee, this partial frame is pointed to by FP.
638 ;;; This macro helps in the definition of full call VOPs by avoiding
639 ;;; code replication in defining the cross-product VOPs.
641 ;;; NAME is the name of the VOP to define.
643 ;;; NAMED is true if the first argument is an fdefinition object whose
644 ;;; definition is to be called.
646 ;;; RETURN is either :FIXED, :UNKNOWN or :TAIL:
647 ;;; -- If :FIXED, then the call is for a fixed number of values, returned in
648 ;;; the standard passing locations (passed as result operands).
649 ;;; -- If :UNKNOWN, then the result values are pushed on the stack, and the
650 ;;; result values are specified by the Start and Count as in the
651 ;;; unknown-values continuation representation.
652 ;;; -- If :TAIL, then do a tail-recursive call. No values are returned.
653 ;;; The Old-Fp and Return-PC are passed as the second and third arguments.
655 ;;; In non-tail calls, the pointer to the stack arguments is passed as
656 ;;; the last fixed argument. If Variable is false, then the passing
657 ;;; locations are passed as a more arg. Variable is true if there are
658 ;;; a variable number of arguments passed on the stack. Variable
659 ;;; cannot be specified with :TAIL return. TR variable argument call
660 ;;; is implemented separately.
662 ;;; In tail call with fixed arguments, the passing locations are
663 ;;; passed as a more arg, but there is no new-FP, since the arguments
664 ;;; have been set up in the current frame.
665 (defmacro define-full-call (vop-name named return variable &optional args)
666 (aver (not (and variable (eq return :tail))))
667 `(define-vop (,vop-name ,@(when (eq return :unknown) '(unknown-values-receiver)))
668 (:args ,@(unless (eq return :tail)
669 '((new-fp :scs (any-reg) :to (:argument 1))))
671 ,@(unless named ; FUN is an info argument for named call
672 '((fun :scs (descriptor-reg control-stack)
673 :target rax :to (:argument 0))))
675 ,@(when (eq return :tail)
676 '((old-fp)
677 (return-pc)))
679 ,@(unless variable
680 `((args :more t ,@(unless (eq args :fixed)
681 '(:scs (descriptor-reg control-stack)))))))
683 ,@(when (memq return '(:fixed :unboxed)) '((:results (values :more t))))
685 (:save-p ,(if (eq return :tail) :compute-only t))
687 ,@(unless (or (eq return :tail) variable)
688 `((:move-args ,(if (eq args :fixed) :fixed :full-call))))
690 (:vop-var vop)
691 (:node-var node)
692 (:info ,@(unless (or variable (eq return :tail)) '(arg-locs))
693 ,@(unless variable '(nargs))
694 ;; Intuitively you might want FUN to be the first codegen arg,
695 ;; but that won't work, because EMIT-ARG-MOVES wants the
696 ;; passing locs in (FIRST (vop-codegen-info vop)).
697 ,@(when named '(fun))
698 ,@(when (eq return :fixed) '(nvals))
699 step-instrumenting
700 ,@(unless named '(fun-type)))
702 (:ignore ,@(unless (or variable (eq return :tail)) '(arg-locs))
703 ,@(unless variable '(args))
704 ,@(when (eq return :unboxed) '(values)))
706 ;; For anonymous call, RAX is the function. For named call, RAX will be the linkage
707 ;; table base if not stepping, or the linkage cell itself if stepping.
708 ;; Calls from immobile-space without stepping avoid using RAX, and instead
709 ;; access the linkage table relative to RIP.
710 (:temporary (:sc descriptor-reg :offset rax-offset :from (:argument 0) :to :eval) rax)
712 ;; We pass the number of arguments in RCX.
713 (:temporary
714 (:sc unsigned-reg :offset rcx-offset :to ,(if (eq return :fixed) :save :eval))
715 rcx)
717 ,@(when (eq return :fixed)
718 ;; Save it for DEFAULT-UNKNOWN-VALUES to work
719 `((:temporary (:sc unsigned-reg :offset rbx-offset :from :result) rbx)
720 (:temporary (:sc any-reg) move-temp)))
722 ;; With variable call, we have to load the
723 ;; register-args out of the (new) stack frame before
724 ;; doing the call. Therefore, we have to tell the
725 ;; lifetime stuff that we need to use them.
726 ,@(when variable
727 (mapcar (lambda (name offset)
728 `(:temporary (:sc descriptor-reg
729 :offset ,offset
730 :from (:argument 0)
731 :to :eval)
732 ,name))
733 *register-arg-names* *register-arg-offsets*))
735 ,@(when (eq return :tail)
736 '((:temporary (:sc unsigned-reg :from (:argument 1) :to (:argument 2))
737 old-fp-tmp)))
738 ,@(unless (eq return :tail) '((:node-var node)))
740 (:generator ,(+ (if named 5 0)
741 (if variable 19 1)
742 (if (eq return :tail) 0 10)
744 (if (eq return :unknown) 25 0))
746 (progn node) ; always "use" it
748 ;; This has to be done before the frame pointer is
749 ;; changed! RAX stores the 'lexical environment' needed
750 ;; for closures.
751 ,@(unless named '((move rax fun)))
753 ,@(if variable
754 ;; For variable call, compute the number of
755 ;; arguments and move some of the arguments to
756 ;; registers.
757 `((inst mov rcx new-fp)
758 (inst sub rcx rsp-tn)
759 (inst shr rcx ,(- word-shift n-fixnum-tag-bits))
760 ;; Move the necessary args to registers,
761 ;; this moves them all even if they are
762 ;; not all needed.
763 ,@(loop for name in *register-arg-names*
764 for index downfrom -1
765 collect `(loadw ,name new-fp ,index)))
766 '((cond ((listp nargs)) ;; no-verify-arg-count
767 ((zerop nargs)
768 (zeroize rcx))
770 (inst mov rcx (fixnumize nargs))))))
771 ,@(cond ((eq return :tail)
772 '( ;; Python has figured out what frame we should
773 ;; return to so might as well use that clue.
774 ;; This seems really important to the
775 ;; implementation of things like
776 ;; (without-interrupts ...)
778 ;; dtc; Could be doing a tail call from a
779 ;; known-call-local etc in which the old-fp
780 ;; or ret-pc are in regs or in non-standard
781 ;; places. If the passing location were
782 ;; wired to the stack in standard locations
783 ;; then these moves will be un-necessary;
784 ;; this is probably best for the x86.
785 (sc-case old-fp
786 ((control-stack)
787 (unless (= ocfp-save-offset (tn-offset old-fp))
788 ;; FIXME: FORMAT T for stale
789 ;; diagnostic output (several of
790 ;; them around here), ick
791 (error "** tail-call old-fp not S0~%")
792 (move old-fp-tmp old-fp)
793 (storew old-fp-tmp rbp-tn (frame-word-offset ocfp-save-offset))))
794 ((any-reg descriptor-reg)
795 (error "** tail-call old-fp in reg not S0~%")
796 (storew old-fp rbp-tn (frame-word-offset ocfp-save-offset))))
798 ;; For tail call, we have to push the
799 ;; return-pc so that it looks like we CALLed
800 ;; despite the fact that we are going to JMP.
801 (inst push return-pc)))
803 ;; For non-tail call, we have to save our
804 ;; frame pointer and install the new frame
805 ;; pointer. We can't load stack tns after this
806 ;; point.
807 `( ;; Python doesn't seem to allocate a frame
808 ;; here which doesn't leave room for the
809 ;; ofp/ret stuff.
811 ;; The variable args are on the stack and
812 ;; become the frame, but there may be <3
813 ;; args and 3 stack slots are assumed
814 ;; allocate on the call. So need to ensure
815 ;; there are at least 3 slots. This hack
816 ;; just adds 3 more.
817 ,(if variable
818 '(inst sub rsp-tn (* 3 n-word-bytes)))
820 ;; Bias the new-fp for use as an fp
821 ,(if variable
822 '(inst sub new-fp (* sp->fp-offset n-word-bytes)))
824 ;; Save the fp
825 (storew rbp-tn new-fp (frame-word-offset ocfp-save-offset))
826 (move rbp-tn new-fp)))) ; NB - now on new stack frame.
828 (when step-instrumenting
829 ,@(when named '((compute-linkage-cell node fun rax)))
830 (emit-single-step-test)
831 (inst jmp :eq DONE)
832 (inst break single-step-around-trap))
833 DONE
834 (note-this-location vop :call-site)
835 ,(cond (named
836 `(emit-direct-call fun ',(if (eq return :tail) 'jmp 'call)
837 node step-instrumenting))
838 ((eq return :tail)
839 `(tail-call-unnamed rax fun-type vop))
841 `(call-unnamed rax fun-type vop)))
842 ,@(ecase return
843 (:fixed '((default-unknown-values vop values nvals node rbx move-temp)))
844 (:unknown
845 '((note-this-location vop :unknown-return)
846 (receive-unknown-values values-start nvals start count node)))
847 ((:tail :unboxed))))))
849 (define-full-call call nil :fixed nil)
850 (define-full-call call-named t :fixed nil)
851 (define-full-call multiple-call nil :unknown nil)
852 (define-full-call multiple-call-named t :unknown nil)
853 (define-full-call tail-call nil :tail nil)
854 (define-full-call tail-call-named t :tail nil)
856 (define-full-call call-variable nil :fixed t)
857 (define-full-call multiple-call-variable nil :unknown t)
858 (define-full-call fixed-call-named t :fixed nil :fixed)
859 (define-full-call fixed-tail-call-named t :tail nil :fixed)
861 (define-full-call unboxed-call-named t :unboxed nil)
862 (define-full-call fixed-unboxed-call-named t :unboxed nil :fixed)
864 ;;; Call NAME "directly" meaning in a single JMP or CALL instruction,
865 ;;; if possible (without loading RAX)
866 (defun emit-direct-call (name instruction node step-instrumenting)
867 (cond (step-instrumenting
868 ;; If step-instrumenting, then RAX points to the linkage table cell
869 (inst* instruction (ea rax-tn)))
870 ((sb-c::code-immobile-p node)
871 (inst* instruction (rip-relative-ea (make-fixup name :linkage-cell))))
873 ;; get the linkage table base into RAX
874 (inst mov rax-tn (thread-slot-ea sb-vm::thread-linkage-table-slot))
875 (inst* instruction (ea (make-fixup name :linkage-cell) rax-tn)))))
877 ;;; Invoke the function-designator FUN.
878 (defun tail-call-unnamed (fun type vop)
879 (let ((relative-call (sb-c::code-immobile-p vop))
880 (fun-ea (ea (- (* closure-fun-slot n-word-bytes) fun-pointer-lowtag)
881 fun)))
882 (case type
883 (:designator
884 (assemble ()
885 (%lea-for-lowtag-test rbx-tn fun fun-pointer-lowtag)
886 (inst test :byte rbx-tn lowtag-mask)
887 (inst jmp :nz (if relative-call
888 (make-fixup 'call-symbol :assembly-routine)
889 not-fun))
890 (inst jmp fun-ea)
891 not-fun
892 (unless relative-call
893 (invoke-asm-routine 'jmp 'call-symbol vop))))
894 (:symbol
895 (invoke-asm-routine 'jmp 'call-symbol vop))
897 (inst jmp fun-ea)))))
899 (defun call-unnamed (fun type vop)
900 (case type
901 (:symbol
902 (invoke-asm-routine 'call 'call-symbol vop))
904 (assemble ()
905 (when (eq type :designator)
906 (%lea-for-lowtag-test rbx-tn fun fun-pointer-lowtag)
907 (inst test :byte rbx-tn lowtag-mask)
908 (inst jmp :z call)
909 (invoke-asm-routine 'call 'call-symbol vop)
910 (inst jmp ret))
911 call
912 (inst call (ea (- (* closure-fun-slot n-word-bytes) fun-pointer-lowtag)
913 fun))
914 ret))))
916 ;;; This is defined separately, since it needs special code that BLT's
917 ;;; the arguments down. All the real work is done in the assembly
918 ;;; routine. We just set things up so that it can find what it needs.
919 (define-vop (tail-call-variable)
920 (:args (args :scs (any-reg control-stack) :target rsi)
921 (function :scs (descriptor-reg control-stack) :target rax)
922 (old-fp)
923 (return-pc))
924 (:info fun-type)
925 (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 0)) rsi)
926 (:temporary (:sc unsigned-reg :offset rax-offset :from (:argument 1)) rax)
927 (:vop-var vop)
928 (:generator 75
929 (check-ocfp-and-return-pc old-fp return-pc)
930 ;; Move these into the passing locations if they are not already there.
931 (move rsi args)
932 (move rax function)
933 ;; And jump to the assembly routine.
934 (invoke-asm-routine 'jmp (if (eq fun-type :function)
935 'tail-call-variable
936 'tail-call-callable-variable)
937 vop)))
939 ;;;; unknown values return
941 ;;; Return a single-value using the Unknown-Values convention.
943 ;;; pfw--get wired-tn conflicts sometimes if register sc specd for args
944 ;;; having problems targeting args to regs -- using temps instead.
946 ;;; First off, modifying the return-pc defeats the branch-prediction
947 ;;; optimizations on modern CPUs quite handily. Second, we can do all
948 ;;; this without needing a temp register. Fixed the latter, at least.
949 ;;; -- AB 2006/Feb/04
950 (define-vop (return-single)
951 (:args (old-fp)
952 (return-pc)
953 (value))
954 (:ignore value)
955 (:generator 6
956 (check-ocfp-and-return-pc old-fp return-pc)
957 ;; Drop stack above old-fp and restore old frame pointer
958 (inst leave)
959 ;; Clear the multiple-value return flag
960 (inst clc)
961 ;; And return.
962 (inst ret)))
964 ;;; Do unknown-values return of a fixed (other than 1) number of
965 ;;; values. The VALUES are required to be set up in the standard
966 ;;; passing locations. NVALS is the number of values returned.
968 ;;; Basically, we just load RCX with the number of values returned and
969 ;;; RBX with a pointer to the values, set RSP to point to the end of
970 ;;; the values, and jump directly to return-pc.
971 (define-vop (return)
972 (:args (old-fp)
973 (return-pc :to (:eval 1))
974 (values :more t))
975 (:ignore values)
976 (:info nvals)
977 ;; In the case of other than one value, we need these registers to
978 ;; tell the caller where they are and how many there are.
979 (:temporary (:sc unsigned-reg :offset rbx-offset) rbx)
980 (:temporary (:sc unsigned-reg :offset rcx-offset) rcx)
981 ;; We need to stretch the lifetime of return-pc past the argument
982 ;; registers so that we can default the argument registers without
983 ;; trashing return-pc.
984 (:temporary (:sc unsigned-reg :offset (first *register-arg-offsets*)
985 :from :eval) a0)
986 (:temporary (:sc unsigned-reg :offset (second *register-arg-offsets*)
987 :from :eval) a1)
988 (:temporary (:sc unsigned-reg :offset (third *register-arg-offsets*)
989 :from :eval) a2)
991 (:generator 6
992 (check-ocfp-and-return-pc old-fp return-pc)
993 (when (= nvals 1)
994 ;; This is handled in RETURN-SINGLE.
995 (error "nvalues is 1"))
996 ;; Establish the values pointer and values count.
997 (inst lea rbx (ea (* sp->fp-offset n-word-bytes) rbp-tn))
998 (if (zerop nvals)
999 (zeroize rcx) ; smaller
1000 (inst mov rcx (fixnumize nvals)))
1001 ;; Pre-default any argument register that need it.
1002 (when (< nvals register-arg-count)
1003 (let* ((arg-tns (nthcdr nvals (list a0 a1 a2)))
1004 (first (first arg-tns)))
1005 (inst mov first nil-value)
1006 (dolist (tn (cdr arg-tns))
1007 (inst mov tn first))))
1008 ;; Set the multiple value return flag.
1009 (inst stc)
1010 ;; And away we go. Except that return-pc is still on the
1011 ;; stack and we've changed the stack pointer. So we have to
1012 ;; tell it to index off of RBX instead of RBP.
1013 (cond ((<= nvals register-arg-count)
1014 (inst leave)
1015 (inst ret))
1017 ;; Some values are on the stack after RETURN-PC and OLD-FP,
1018 ;; can't return normally and some slots of the frame will
1019 ;; be used as temporaries by the receiver.
1021 ;; Clear as much of the stack as possible, but not past the
1022 ;; old frame address.
1023 (inst lea rsp-tn
1024 (ea (frame-byte-offset (1- nvals)) rbp-tn))
1025 (move rbp-tn old-fp)
1026 (inst push (ea (frame-byte-offset
1027 (+ sp->fp-offset (tn-offset return-pc)))
1028 rbx))
1029 (inst ret)))))
1031 ;;; Do unknown-values return of an arbitrary number of values (passed
1032 ;;; on the stack.) We check for the common case of a single return
1033 ;;; value, and do that inline using the normal single value return
1034 ;;; convention. Otherwise, we branch off to code that calls an
1035 ;;; assembly-routine.
1037 ;;; The assembly routine takes the following args:
1038 ;;; RCX -- number of values to find there.
1039 ;;; RSI -- pointer to where to find the values.
1040 (define-vop (return-multiple)
1041 (:args (old-fp)
1042 (return-pc)
1043 (vals :scs (any-reg) :target rsi)
1044 (nvals :scs (any-reg) :target rcx))
1045 (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 2)) rsi)
1046 (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 3)) rcx)
1047 (:temporary (:sc descriptor-reg :offset (first *register-arg-offsets*)
1048 :from (:eval 0)) a0)
1049 (:node-var node)
1050 (:vop-var vop)
1051 (:generator 13
1052 (check-ocfp-and-return-pc old-fp return-pc)
1053 (unless (policy node (> space speed))
1054 ;; Check for the single case.
1055 (let ((not-single (gen-label)))
1056 (inst cmp nvals (fixnumize 1))
1057 (inst jmp :ne not-single)
1058 ;; Return with one value.
1059 (loadw a0 vals -1)
1060 ;; Clear the stack until ocfp.
1061 (inst leave)
1062 ;; clear the multiple-value return flag
1063 (inst clc)
1064 ;; Out of here.
1065 (inst ret)
1066 ;; Nope, not the single case. Jump to the assembly routine.
1067 (emit-label not-single)))
1068 (move rsi vals)
1069 (move rcx nvals)
1070 (invoke-asm-routine 'jmp 'return-multiple vop)))
1072 ;;;; XEP hackery
1074 ;;; Get the lexical environment from its passing location.
1075 (define-vop (setup-closure-environment)
1076 (:results (closure :scs (descriptor-reg)))
1077 (:info label)
1078 (:ignore label)
1079 (:generator 6
1080 ;; Get result.
1081 (move closure rax-tn)))
1083 ;;; Copy a &MORE arg from the argument area to the end of the current
1084 ;;; frame. FIXED is the number of non-&MORE arguments.
1085 (define-vop (copy-more-arg)
1086 (:temporary (:sc any-reg :offset r8-offset) copy-index)
1087 (:temporary (:sc any-reg :offset r9-offset) source)
1088 (:temporary (:sc descriptor-reg :offset r10-offset) temp)
1089 (:info fixed min-verified)
1090 (:generator 20
1091 ;; Avoid the copy if there are no more args.
1092 (cond ((zerop fixed)
1093 (inst test :dword rcx-tn rcx-tn)
1094 (inst jmp :z JUST-ALLOC-FRAME))
1095 ((and (eql min-verified fixed)
1096 (> fixed 1))
1097 ;; verify-arg-count will do a CMP
1098 (inst jmp :e JUST-ALLOC-FRAME))
1100 (inst cmp :dword rcx-tn (fixnumize fixed))
1101 (inst jmp :be JUST-ALLOC-FRAME)))
1103 ;; Create a negated copy of the number of arguments to allow us to
1104 ;; use EA calculations in order to do scaled subtraction.
1105 (inst mov :dword temp rcx-tn)
1106 (inst neg temp)
1108 ;; Allocate the space on the stack.
1109 ;; stack = rbp + sp->fp-offset - frame-size - (nargs - fixed)
1110 ;; if we'd move SP backward, swap the meaning of rsp and source;
1111 ;; otherwise, we'd be accessing values below SP, and that's no good
1112 ;; if a signal interrupts this code sequence. In that case, store
1113 ;; the final value in rsp after the stack-stack memmove loop.
1114 (let* ((delta (- fixed (sb-allocated-size 'stack)))
1115 (loop (gen-label))
1116 (fixnum->word (ash 1 (- word-shift n-fixnum-tag-bits)))
1117 (below (plusp delta)))
1118 (inst lea (if below source rsp-tn)
1119 (ea (* n-word-bytes (+ sp->fp-offset delta))
1120 rbp-tn temp fixnum->word))
1122 ;; Now: nargs>=1 && nargs>fixed
1124 (cond ((< fixed register-arg-count)
1125 ;; the code above only moves the final value of rsp in
1126 ;; rsp directly if that condition is satisfied. Currently,
1127 ;; r-a-c is 3, so the aver is OK. If the calling convention
1128 ;; ever changes, the logic above with LEA will have to be
1129 ;; adjusted.
1130 (aver (<= fixed (sb-allocated-size 'stack)))
1131 ;; We must stop when we run out of stack args, not when we
1132 ;; run out of more args.
1133 ;; Number to copy = nargs-3
1134 ;; Save the original count of args.
1135 (inst mov rbx-tn rcx-tn)
1136 (inst sub rbx-tn (fixnumize register-arg-count))
1137 ;; Everything of interest in registers.
1138 (inst jmp :be DO-REGS))
1140 ;; Number to copy = nargs-fixed
1141 (inst lea rbx-tn (ea (- (fixnumize fixed)) rcx-tn))))
1143 ;; Initialize R8 to be the end of args.
1144 ;; Swap with SP if necessary to mirror the previous condition
1145 (unless (zerop delta)
1146 (inst lea (if below rsp-tn source)
1147 (ea (* sp->fp-offset n-word-bytes)
1148 rbp-tn temp fixnum->word)))
1150 ;; src: rbp + temp + sp->fp
1151 ;; dst: rbp + temp + sp->fp + (fixed - [stack-size])
1152 (cond ((zerop delta)) ; no-op move
1153 ((minusp delta)
1154 ;; dst is lower than src, copy forward
1155 (zeroize copy-index)
1156 ;; We used to use REP MOVS here, but on modern x86 it performs
1157 ;; much worse than an explicit loop for small blocks.
1159 (emit-label loop)
1160 (inst mov temp (ea source copy-index))
1161 (inst mov (ea rsp-tn copy-index) temp)
1162 (inst add copy-index n-word-bytes)
1163 (inst sub rbx-tn (fixnumize 1))
1164 (inst jmp :nz loop))
1165 ((plusp delta)
1166 ;; dst is higher than src; copy backward
1167 (emit-label loop)
1168 (inst sub rbx-tn (fixnumize 1))
1169 (inst mov temp (ea rsp-tn rbx-tn fixnum->word))
1170 (inst mov (ea source rbx-tn fixnum->word) temp)
1171 (inst jmp :nz loop)
1172 ;; done with the stack--stack copy. Reset RSP to its final
1173 ;; value
1174 (inst mov rsp-tn source))))
1175 DO-REGS
1177 ;; Here: nargs>=1 && nargs>fixed
1178 (when (< fixed register-arg-count)
1179 ;; Now we have to deposit any more args that showed up in
1180 ;; registers.
1181 (do ((i fixed))
1182 ( nil )
1183 ;; Store it relative to rbp
1184 (inst mov (ea (* n-word-bytes
1185 (- sp->fp-offset
1186 (+ 1 (- i fixed) (sb-allocated-size 'stack))))
1187 rbp-tn)
1188 (nth i *register-arg-tns*))
1190 (incf i)
1191 (when (>= i register-arg-count)
1192 (return))
1194 ;; Don't deposit any more than there are.
1195 #.(assert (= register-arg-count 3))
1196 (cond ((> fixed 0)
1197 (inst cmp :dword rcx-tn (fixnumize i))
1198 (inst jmp :eq DONE))
1199 ;; Use a single comparison for 1 and 2
1200 ((= i 1)
1201 (inst cmp :dword rcx-tn (fixnumize 2))
1202 (inst jmp :l DONE))
1204 (inst jmp :eq DONE)))))
1206 (inst jmp DONE)
1208 JUST-ALLOC-FRAME
1209 (emit-lea rsp-tn rbp-tn
1210 (* n-word-bytes
1211 (- sp->fp-offset
1212 (sb-allocated-size 'stack))))
1214 DONE))
1216 (define-vop ()
1217 (:translate sb-c::%more-kw-arg)
1218 (:policy :fast-safe)
1219 (:args (object :scs (descriptor-reg) :to (:result 1))
1220 (index :scs (any-reg) :to (:result 1) :target keyword))
1221 (:arg-types * tagged-num)
1222 (:results (value :scs (descriptor-reg any-reg))
1223 (keyword :scs (descriptor-reg any-reg)))
1224 (:result-types * *)
1225 (:generator 4
1226 (inst mov value (ea object index (ash 1 (- word-shift n-fixnum-tag-bits))))
1227 (inst mov keyword (ea n-word-bytes object index
1228 (ash 1 (- word-shift n-fixnum-tag-bits))))))
1230 (define-vop (more-arg/c)
1231 (:translate sb-c:%more-arg)
1232 (:policy :fast-safe)
1233 (:args (object :scs (descriptor-reg) :to (:result 1)))
1234 (:info index)
1235 (:arg-types * (:constant (signed-byte #.(- 32 word-shift))))
1236 (:results (value :scs (descriptor-reg any-reg)))
1237 (:result-types *)
1238 (:generator 3
1239 (inst mov value (ea (- (* index n-word-bytes)) object))))
1241 (define-vop (more-arg)
1242 (:translate sb-c:%more-arg)
1243 (:policy :fast-safe)
1244 (:args (object :scs (descriptor-reg) :to (:result 1))
1245 (index :scs (any-reg) :to (:result 1) :target value))
1246 (:arg-types * tagged-num)
1247 (:results (value :scs (descriptor-reg any-reg)))
1248 (:result-types *)
1249 (:generator 4
1250 (move value index)
1251 (inst neg value)
1252 (inst mov value (ea object value
1253 (ash 1 (- word-shift n-fixnum-tag-bits))))))
1255 (define-vop (more-arg-or-nil)
1256 (:policy :fast-safe)
1257 (:args (object :scs (descriptor-reg) :to (:result 1))
1258 (count :scs (any-reg) :to (:result 1)))
1259 (:arg-types * tagged-num)
1260 (:info index)
1261 (:results (value :scs (descriptor-reg any-reg)))
1262 (:result-types *)
1263 (:generator 3
1264 (inst mov value nil-value)
1265 (inst cmp count (fixnumize index))
1266 (inst jmp :be done)
1267 (inst mov value (ea (- (* index n-word-bytes)) object))
1268 done))
1270 ;;; Turn more arg (context, count) into a list.
1271 ;;; Cons cells will be filled in right-to-left.
1272 ;;; This has a slight advantage in code size, and eliminates an initial
1273 ;;; forward jump into the loop. it also admits an interesting possibility
1274 ;;; to reduce the scope of the pseudo-atomic section so as not to
1275 ;;; encompass construction of the list. To do that, we will need to invent
1276 ;;; a new widetag for "contiguous CONS block" which has a header conveying
1277 ;;; the total payload length. Initially we would store that into the CAR of the
1278 ;;; first cons cell. Upon seeing such header, GC shall treat that entire object
1279 ;;; as a boxed payload of specified length. It will be implicitly pinned
1280 ;;; (if conservative) or transported as a whole (if precise). Then when the CAR
1281 ;;; of the first cons is overwritten, the object changes to a linked list.
1282 (define-vop ()
1283 (:translate %listify-rest-args)
1284 (:policy :safe)
1285 ;; CONTEXT is used throughout the copying loop
1286 (:args (context :scs (descriptor-reg) :to :save)
1287 (count :scs (any-reg) :target rcx))
1288 (:arg-types * tagged-num)
1289 ;; The only advantage to specifying RCX here is that JRCXZ can be used
1290 ;; in one place, and then only in the unlikely scenario that CONTEXT is not
1291 ;; in RCX. If it was, SHL sets/clears the Z flag, but LEA doesn't.
1292 ;; Not much of an advantage, but why not.
1293 (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 1)) rcx)
1294 ;; Note that DST conflicts with RESULT because we use both as temps
1295 (:temporary (:sc unsigned-reg) value dst)
1296 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
1297 (:results (result :scs (descriptor-reg)))
1298 (:node-var node)
1299 (:generator 20
1301 ;; TODO: if instrumenting, just revert to the older way of precomputing
1302 ;; a size rather than scaling by 8 in ALLOCATION so that we don't have
1303 ;; to scale and unscale.
1304 ;; Compute the number of bytes to allocate
1305 (let ((shift (- (1+ word-shift) n-fixnum-tag-bits)))
1306 (if (location= count rcx)
1307 (inst shl :dword rcx shift)
1308 (inst lea :dword rcx (ea nil count (ash 1 shift)))))
1310 (move rcx count :dword)
1311 ;; Setup for the CDR of the last cons (or the entire result) being NIL.
1312 (inst mov result nil-value)
1313 (cond ((not (member :allocation-size-histogram sb-xc:*features*))
1314 (inst jrcxz DONE))
1315 (t ; jumps too far for JRCXZ sometimes
1316 (inst test rcx rcx)
1317 (inst jmp :z done)))
1318 (when (and (not (node-stack-allocate-p node)) (instrument-alloc-policy-p node))
1319 (inst shl :dword rcx word-shift) ; compute byte count
1320 (instrument-alloc +cons-primtype+ rcx node (list value dst) thread-tn)
1321 (inst shr :dword rcx word-shift)) ; undo the computation
1322 (pseudo-atomic (:elide-if (node-stack-allocate-p node) :thread-tn thread-tn)
1323 ;; Produce an untagged pointer into DST
1324 (let ((scale
1325 (cond ((node-stack-allocate-p node)
1326 ;; LEA on RSP would be ok but we'd need to negate RCX first, then un-negate
1327 ;; to compute the final cons, then negate again. So use SHL and SUB instead.
1328 (inst shl :dword rcx word-shift)
1329 (stack-allocation rcx 0 dst)
1332 (allocation +cons-primtype+ rcx 0 dst node value thread-tn
1333 :scale 8
1334 :overflow
1335 (lambda ()
1336 (inst push rcx)
1337 (inst push context)
1338 (invoke-asm-routine
1339 'call (if (system-tlab-p 0 node) 'sys-listify-&rest 'listify-&rest)
1340 node)
1341 (inst pop result)
1342 (inst jmp alloc-done)))
1343 8))))
1344 ;; Recalculate DST as a tagged pointer to the last cons
1345 (inst lea dst (ea (- list-pointer-lowtag (* cons-size n-word-bytes)) dst rcx scale))
1346 ;; scale=8 implies RCX counts ncells (as a fixnum) therefore just untag it.
1347 ;; scale=1 implies RCX counts nbytes therefore ncells = RCX/16
1348 (inst shr :dword rcx (if (= scale 8) n-fixnum-tag-bits (1+ word-shift))))
1349 ;; The rightmost arguments are at lower addresses.
1350 ;; Start by indexing the last argument
1351 (inst neg rcx) ; :QWORD because it's negative
1352 LOOP
1353 ;; Grab one value and store into this cons. Use RCX as an index into the
1354 ;; vector of values in CONTEXT, but add 8 because CONTEXT points exactly at
1355 ;; the 0th value, which means that the index is 1 word too low.
1356 ;; (It's -1 if there is exactly 1 value, instead of 0, and so on)
1357 (inst mov value (ea 8 context rcx 8))
1358 ;; RESULT began as NIL which gives the correct value for the CDR in the final cons.
1359 ;; Subsequently it points to each cons just populated, which is correct all the way
1360 ;; up to and including the final result.
1361 (storew result dst cons-cdr-slot list-pointer-lowtag)
1362 (storew value dst cons-car-slot list-pointer-lowtag)
1363 (inst mov result dst) ; preserve the value to put in the CDR of the preceding cons
1364 (inst sub dst (* cons-size n-word-bytes)) ; get the preceding cons
1365 (inst inc rcx) ; :QWORD because it's negative
1366 (inst jmp :nz loop)
1367 ALLOC-DONE)
1368 DONE))
1370 ;;; Return the location and size of the &MORE arg glob created by
1371 ;;; COPY-MORE-ARG. SUPPLIED is the total number of arguments supplied
1372 ;;; (originally passed in RCX). FIXED is the number of non-rest
1373 ;;; arguments.
1375 ;;; We must duplicate some of the work done by COPY-MORE-ARG, since at
1376 ;;; that time the environment is in a pretty brain-damaged state,
1377 ;;; preventing this info from being returned as values. What we do is
1378 ;;; compute supplied - fixed, and return a pointer that many words
1379 ;;; below the current stack top.
1380 (define-vop ()
1381 (:policy :fast-safe)
1382 (:translate sb-c::%more-arg-context)
1383 (:args (supplied :scs (any-reg) :target count))
1384 (:arg-types positive-fixnum (:constant fixnum))
1385 (:info fixed)
1386 (:results (context :scs (descriptor-reg))
1387 (count :scs (any-reg)))
1388 (:result-types t tagged-num)
1389 (:note "more-arg-context")
1390 (:generator 5
1391 (move count supplied)
1392 ;; SP at this point points at the last arg pushed.
1393 ;; Point to the first more-arg, not above it.
1394 (inst lea context (ea (- (* (1+ fixed) n-word-bytes))
1395 rsp-tn count
1396 (ash 1 (- word-shift n-fixnum-tag-bits))))
1397 (unless (zerop fixed)
1398 (inst sub count (fixnumize fixed)))))
1400 (define-vop (verify-arg-count)
1401 (:policy :fast-safe)
1402 (:args (nargs :scs (any-reg)))
1403 (:arg-types positive-fixnum (:constant t) (:constant t))
1404 (:temporary (:sc unsigned-reg :offset rbx-offset) temp)
1405 (:info min max)
1406 (:vop-var vop)
1407 (:save-p :compute-only)
1408 (:generator 3
1409 ;; NOTE: copy-more-arg expects this to issue a CMP for min > 1
1410 (let ((err-lab
1411 (generate-error-code vop 'invalid-arg-count-error nargs)))
1412 (cond ((not min)
1413 (if (zerop max)
1414 (inst test :dword nargs nargs)
1415 (inst cmp :dword nargs (fixnumize max)))
1416 (inst jmp :ne err-lab))
1417 (max
1418 (if (zerop min)
1419 (setf temp nargs)
1420 (inst lea :dword temp (ea (fixnumize (- min)) nargs)))
1421 (inst cmp :dword temp (fixnumize (- max min)))
1422 (inst jmp :a err-lab))
1424 (cond ((= min 1)
1425 (inst test :dword nargs nargs)
1426 (inst jmp :e err-lab))
1427 ((plusp min)
1428 (inst cmp :dword nargs (fixnumize min))
1429 (inst jmp :b err-lab))))))))
1430 ;;; Single-stepping
1432 (defun emit-single-step-test ()
1433 ;; We use different ways of representing whether stepping is on on
1434 ;; +SB-THREAD / -SB-THREAD: on +SB-THREAD, we use a slot in the
1435 ;; thread structure. On -SB-THREAD we use the value of a static
1436 ;; symbol. Things are done this way, since reading a thread-local
1437 ;; slot from a symbol would require an extra register on +SB-THREAD,
1438 ;; and reading a slot from a thread structure would require an extra
1439 ;; register on -SB-THREAD. While this isn't critical for x86-64,
1440 ;; it's more serious for x86.
1441 #+sb-thread (inst cmp :byte (thread-slot-ea thread-stepping-slot) 0)
1442 #-sb-thread (inst cmp :byte (static-symbol-value-ea 'sb-impl::*stepping*) 0))
1444 (define-vop (step-instrument-before-vop)
1445 (:policy :fast-safe)
1446 (:vop-var vop)
1447 (:generator 3
1448 (emit-single-step-test)
1449 (inst jmp :eq DONE)
1450 (inst break single-step-before-trap)
1451 DONE
1452 (note-this-location vop :internal-error)))