src/compiler/x86-64/call.lisp

   1 ;;;; function call for the x86 VM
   2
   3 ;;;; This software is part of the SBCL system. See the README file for
   4 ;;;; more information.
   5 ;;;;
   6 ;;;; This software is derived from the CMU CL system, which was
   7 ;;;; written at Carnegie Mellon University and released into the
   8 ;;;; public domain. The software is in the public domain and is
   9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
  10 ;;;; files for more information.
  11
  12 (in-package "SB-VM")
  13
  14 (defconstant arg-count-sc (make-sc+offset any-reg-sc-number rcx-offset))
  15 (defconstant closure-sc (make-sc+offset any-reg-sc-number rax-offset))
  16
  17 (defconstant return-pc-passing-offset
  18   (make-sc+offset sap-stack-sc-number return-pc-save-offset))
  19
  20 (defconstant old-fp-passing-offset
  21   (make-sc+offset control-stack-sc-number ocfp-save-offset))
  22
  23 (defun compute-linkage-cell (node name res)
  24   (cond ((sb-c::code-immobile-p node)
  25          (inst lea res (rip-relative-ea (make-fixup name :linkage-cell))))
  26         (t
  27          (inst mov res (thread-slot-ea sb-vm::thread-linkage-table-slot))
  28          (inst lea res (ea (make-fixup name :linkage-cell) res)))))
  29
  30 ;;; Make the TNs used to hold OLD-FP and RETURN-PC within the current
  31 ;;; function. We treat these specially so that the debugger can find
  32 ;;; them at a known location.
  33 ;;;
  34 ;;; Without using a save-tn - which does not make much sense if it is
  35 ;;; wired to the stack?
  36 (defun make-old-fp-save-location ()
  37   (let ((tn (make-wired-tn *fixnum-primitive-type*
  38                            control-stack-sc-number
  39                            ocfp-save-offset)))
  40     (setf (tn-kind tn) :environment)
  41     tn))
  42 (defun make-return-pc-save-location ()
  43   (let ((tn (make-wired-tn (primitive-type-or-lose 'system-area-pointer)
  44                            sap-stack-sc-number return-pc-save-offset)))
  45     (setf (tn-kind tn) :environment)
  46     tn))
  47
  48 ;;; Make a TN for the standard argument count passing location. We only
  49 ;;; need to make the standard location, since a count is never passed when we
  50 ;;; are using non-standard conventions.
  51 (defun make-arg-count-location ()
  52   (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rcx-offset))
  53 \f
  54 ;;;; frame hackery
  55
  56 ;;; This is used for setting up the Old-FP in local call.
  57 (define-vop (current-fp)
  58   (:results (val :scs (any-reg control-stack)))
  59   (:generator 1
  60     (move val rbp-tn)))
  61
  62 ;;; We don't have a separate NFP, so we don't need to do anything here.
  63 (define-vop (compute-old-nfp)
  64   (:results (val))
  65   (:ignore val)
  66   (:generator 1
  67     nil))
  68
  69 ;;; Accessing a slot from an earlier stack frame is definite hackery.
  70 (define-vop (ancestor-frame-ref)
  71   (:args (frame-pointer :scs (descriptor-reg))
  72          (variable-home-tn :load-if nil))
  73   (:results (value :scs (descriptor-reg any-reg)))
  74   (:policy :fast-safe)
  75   (:generator 4
  76     (aver (sc-is variable-home-tn control-stack))
  77     (loadw value frame-pointer
  78            (frame-word-offset (tn-offset variable-home-tn)))))
  79 (define-vop (ancestor-frame-set)
  80   (:args (frame-pointer :scs (descriptor-reg))
  81          (value :scs (descriptor-reg any-reg)))
  82   (:results (variable-home-tn :load-if nil))
  83   (:policy :fast-safe)
  84   (:generator 4
  85     (aver (sc-is variable-home-tn control-stack))
  86     (storew value frame-pointer
  87             (frame-word-offset (tn-offset variable-home-tn)))))
  88
  89 (macrolet ((define-frame-op
  90                (suffix sc stack-sc instruction
  91                 &optional (ea `(ea (frame-byte-offset (tn-offset variable-home-tn))
  92                                    frame-pointer)))
  93                (let ((reffer (symbolicate 'ancestor-frame-ref '/ suffix))
  94                      (setter (symbolicate 'ancestor-frame-set '/ suffix)))
  95                  `(progn
  96                     (define-vop (,reffer ancestor-frame-ref)
  97                       (:results (value :scs (,sc)))
  98                       (:generator 4
  99                         (aver (sc-is variable-home-tn ,stack-sc))
 100                         (inst ,instruction value
 101                               ,ea)))
 102                     (define-vop (,setter ancestor-frame-set)
 103                       (:args (frame-pointer :scs (descriptor-reg))
 104                              (value :scs (,sc)))
 105                       (:generator 4
 106                         (aver (sc-is variable-home-tn ,stack-sc))
 107                         (inst ,instruction ,ea value)))))))
 108   (define-frame-op double-float double-reg double-stack movsd)
 109   (define-frame-op single-float single-reg single-stack movss)
 110   (define-frame-op complex-double-float complex-double-reg complex-double-stack
 111     movupd (ea-for-cdf-data-stack variable-home-tn frame-pointer))
 112   (define-frame-op complex-single-float complex-single-reg complex-single-stack
 113     movq   (ea-for-csf-data-stack variable-home-tn frame-pointer))
 114   (define-frame-op signed-byte-64 signed-reg signed-stack mov)
 115   (define-frame-op unsigned-byte-64 unsigned-reg unsigned-stack mov)
 116   (define-frame-op system-area-pointer sap-reg sap-stack mov))
 117
 118 (defun primitive-type-indirect-cell-type (ptype)
 119   (declare (type primitive-type ptype))
 120   (macrolet ((foo (&body data)
 121                  `(case (primitive-type-name ptype)
 122                     ,@(loop for (name stack-sc ref set) in data
 123                             collect
 124                             `(,name
 125                                (load-time-value
 126                                 (list (primitive-type-or-lose ',name)
 127                                       (sc-or-lose ',stack-sc)
 128                                       (lambda (node block fp value res)
 129                                         (sb-c::vop ,ref node block
 130                                                    fp value res))
 131                                       (lambda (node block fp new-val value)
 132                                         (sb-c::vop ,set node block
 133                                                    fp new-val value)))))))))
 134     (foo (double-float double-stack
 135                        ancestor-frame-ref/double-float
 136                        ancestor-frame-set/double-float)
 137          (single-float single-stack
 138                        ancestor-frame-ref/single-float
 139                        ancestor-frame-set/single-float)
 140          (complex-double-float complex-double-stack
 141                                ancestor-frame-ref/complex-double-float
 142                                ancestor-frame-set/complex-double-float)
 143          (complex-single-float complex-single-stack
 144                                ancestor-frame-ref/complex-single-float
 145                                ancestor-frame-set/complex-single-float)
 146          (signed-byte-64 signed-stack
 147                          ancestor-frame-ref/signed-byte-64
 148                          ancestor-frame-set/signed-byte-64)
 149          (unsigned-byte-64 unsigned-stack
 150                            ancestor-frame-ref/unsigned-byte-64
 151                            ancestor-frame-set/unsigned-byte-64)
 152          (unsigned-byte-63 unsigned-stack
 153                            ancestor-frame-ref/unsigned-byte-64
 154                            ancestor-frame-set/unsigned-byte-64)
 155          (system-area-pointer sap-stack
 156                               ancestor-frame-ref/system-area-pointer
 157                               ancestor-frame-set/system-area-pointer))))
 158
 159 (define-vop (xep-allocate-frame)
 160   (:info start-lab)
 161   (:generator 1
 162     (let ((nop-kind
 163            (shiftf (sb-assem::asmstream-inter-function-padding sb-assem:*asmstream*)
 164                    :nop)))
 165       (emit-alignment n-lowtag-bits (if (eq nop-kind :nop) #x90 0)))
 166     (emit-label start-lab)
 167     ;; Skip space for the function header.
 168     (inst simple-fun-header-word)
 169     (inst .skip (* (1- simple-fun-insts-offset) n-word-bytes))
 170     ;; The start of the actual code.
 171     ;; Save the return-pc.
 172     (popw rbp-tn (frame-word-offset return-pc-save-offset))))
 173
 174 (defun emit-lea (target source disp)
 175   (if (eql disp 0)
 176       (inst mov target source)
 177       (inst lea target (ea disp source))))
 178
 179 (define-vop (xep-setup-sp)
 180   (:generator 1
 181     (emit-lea rsp-tn rbp-tn     (- (* n-word-bytes
 182                                       (- (sb-allocated-size 'stack)
 183                                          sp->fp-offset))))))
 184
 185 ;;; This is emitted directly before either a known-call-local, call-local,
 186 ;;; or a multiple-call-local. All it does is allocate stack space for the
 187 ;;; callee (who has the same size stack as us).
 188 (define-vop (allocate-frame)
 189   (:results (res :scs (any-reg))
 190             (nfp))
 191   (:info callee)
 192   (:ignore nfp callee)
 193   (:generator 2
 194     (inst lea res (ea (- (* sp->fp-offset n-word-bytes)) rsp-tn))
 195     (inst sub rsp-tn (* n-word-bytes (sb-allocated-size 'stack)))))
 196
 197 (defun make-stack-pointer-tn (&optional nargs)
 198   ;; Avoid using a temporary register if the new frame pointer will be
 199   ;; at the same location as the new stack pointer
 200   (if (and nargs
 201            (= (* sp->fp-offset n-word-bytes)
 202               (* (max (if (> nargs register-arg-count)
 203                           nargs
 204                           0)
 205                       (sb-c::sb-size (sb-or-lose 'stack)))
 206                  n-word-bytes)))
 207       (make-wired-tn *fixnum-primitive-type* any-reg-sc-number rsp-offset)
 208       (make-normal-tn *fixnum-primitive-type*)))
 209
 210 ;;; Allocate a partial frame for passing stack arguments in a full
 211 ;;; call. NARGS is the number of arguments passed. We allocate at
 212 ;;; least 2 slots, because the XEP noise is going to want to use them
 213 ;;; before it can extend the stack.
 214 (define-vop (allocate-full-call-frame)
 215   (:info nargs)
 216   (:results (res :scs (any-reg)))
 217   (:generator 2
 218     (let ((fp-offset (* sp->fp-offset n-word-bytes))
 219           (stack-size (* (max (if (> nargs register-arg-count)
 220                                   nargs
 221                                   0)
 222                               (sb-c::sb-size (sb-or-lose 'stack)))
 223                          n-word-bytes)))
 224       (cond ((= fp-offset stack-size)
 225              (inst sub rsp-tn stack-size)
 226              (move res rsp-tn))
 227             (t
 228              (inst lea res (ea (- fp-offset) rsp-tn))
 229              (inst sub rsp-tn stack-size))))))
 230 \f
 231 ;;; Emit code needed at the return-point from an unknown-values call
 232 ;;; for a fixed number of values. Values is the head of the TN-REF
 233 ;;; list for the locations that the values are to be received into.
 234 ;;; Nvals is the number of values that are to be received (should
 235 ;;; equal the length of Values).
 236 ;;;
 237 ;;; If 0 or 1 values are expected, then we just emit an instruction to
 238 ;;; reset the SP (which will only be executed when other than 1 value
 239 ;;; is returned.)
 240 ;;;
 241 ;;; In the general case we have to do three things:
 242 ;;;  -- Default unsupplied register values. This need only be done
 243 ;;;     when a single value is returned, since register values are
 244 ;;;     defaulted by the called in the non-single case.
 245 ;;;  -- Default unsupplied stack values. This needs to be done whenever
 246 ;;;     there are stack values.
 247 ;;;  -- Reset SP. This must be done whenever other than 1 value is
 248 ;;;     returned, regardless of the number of values desired.
 249 (defun default-unknown-values (vop values nvals node rbx move-temp)
 250   (declare (type (or tn-ref null) values)
 251            (type unsigned-byte nvals))
 252   (multiple-value-bind (type name leaf) (sb-c::lvar-fun-type (sb-c::basic-combination-fun node))
 253    (let* ((verify (and leaf
 254                         (policy node (and (>= safety 1)
 255                                           (= debug 3)))
 256                         (memq (sb-c::leaf-where-from leaf) '(:declared-verify :defined-here))))
 257            (type (if verify
 258                      (if (fun-type-p type)
 259                          (fun-type-returns type)
 260                          *wild-type*)
 261                      (sb-c::node-derived-type node)))
 262            (min-values (values-type-min-value-count type))
 263            (max-values (values-type-max-value-count type))
 264            (trust (or (and (= min-values 0)
 265                            (= max-values call-arguments-limit))
 266                       (not verify))))
 267      (flet ((check-nargs ()
 268               (assemble ()
 269                 (let* ((*location-context* (list* name
 270                                                   (type-specifier type)
 271                                                   (make-restart-location SKIP)))
 272                        (err-lab (generate-error-code vop 'invalid-arg-count-error))
 273                        (min min-values)
 274                        (max (and (< max-values call-arguments-limit)
 275                                  max-values)))
 276                   (cond ((eql min max)
 277                          (if (zerop max)
 278                              (inst test :dword rcx-tn rcx-tn)
 279                              (inst cmp :dword rcx-tn (fixnumize max)))
 280                          (inst jmp :ne err-lab))
 281                         (max
 282                          (let ((nargs move-temp))
 283                           (if (zerop min)
 284                               (setf nargs rcx-tn)
 285                               (inst lea :dword move-temp (ea (fixnumize (- min)) rcx-tn)))
 286                           (inst cmp :dword nargs (fixnumize (- max min)))
 287                           (inst jmp :a err-lab)))
 288                         (t
 289                          (cond ((= min 1)
 290                                 (inst test :dword rcx-tn rcx-tn)
 291                                 (inst jmp :e err-lab))
 292                                ((plusp min)
 293                                 (inst cmp :dword rcx-tn (fixnumize min))
 294                                 (inst jmp :b err-lab))))))
 295                 SKIP)))
 296        (cond
 297          ((<= nvals 1)
 298           (note-this-location vop :single-value-return)
 299           (cond
 300             ((and trust
 301                   (<= (sb-kernel:values-type-max-value-count type)
 302                       register-arg-count)))
 303             ((and trust
 304                   (not (sb-kernel:values-type-may-be-single-value-p type)))
 305              (inst mov rsp-tn rbx))
 306             (t
 307              (inst cmov :c rsp-tn rbx)
 308              (unless trust
 309                (inst mov move-temp (fixnumize 1))
 310                (inst cmov :nc rcx-tn move-temp)
 311                (check-nargs)))))
 312          ((<= nvals register-arg-count)
 313           (note-this-location vop :unknown-return)
 314           (when (or (not trust)
 315                     (sb-kernel:values-type-may-be-single-value-p type))
 316             (assemble ()
 317               (inst jmp :c regs-defaulted)
 318               ;; Default the unsupplied registers.
 319               (let* ((2nd-tn-ref (tn-ref-across values))
 320                      (2nd-tn (tn-ref-tn 2nd-tn-ref))
 321                      (2nd-tn-live (neq (tn-kind 2nd-tn) :unused)))
 322                 (when 2nd-tn-live
 323                   (inst mov 2nd-tn nil-value))
 324                 (when (> nvals 2)
 325                   (loop
 326                     for tn-ref = (tn-ref-across 2nd-tn-ref)
 327                     then (tn-ref-across tn-ref)
 328                     for count from 2 below register-arg-count
 329                     unless (eq (tn-kind (tn-ref-tn tn-ref)) :unused)
 330                     do
 331                     (inst mov :dword (tn-ref-tn tn-ref)
 332                           (if 2nd-tn-live 2nd-tn nil-value)))))
 333               (inst mov rbx rsp-tn)
 334               regs-defaulted))
 335
 336           (when (or (not trust)
 337                     (< register-arg-count
 338                        (sb-kernel:values-type-max-value-count type)))
 339             (inst mov rsp-tn rbx))
 340           (unless trust
 341             (inst mov move-temp (fixnumize 1))
 342             (inst cmov :nc rcx-tn move-temp)
 343             (check-nargs)))
 344          (t
 345           (collect ((defaults))
 346             (let ((default-stack-slots (gen-label))
 347                   (used-registers
 348                     (loop for i from 1 below register-arg-count
 349                           for tn = (tn-ref-tn (setf values (tn-ref-across values)))
 350                           unless (eq (tn-kind tn) :unused)
 351                           collect tn
 352                           finally (setf values (tn-ref-across values))))
 353                   (used-stack-slots-p
 354                     (loop for ref = values then (tn-ref-across ref)
 355                           while ref
 356                           thereis (neq (tn-kind (tn-ref-tn ref)) :unused))))
 357               (assemble ()
 358                 (note-this-location vop :unknown-return)
 359                 (unless trust
 360                   (inst mov move-temp (fixnumize 1))
 361                   (inst cmov :nc rcx-tn move-temp))
 362                 ;; If it returned exactly one value the registers and the
 363                 ;; stack slots need to be filled with NIL.
 364                 (cond ((and trust
 365                             (> min-values 1)))
 366                       (used-stack-slots-p
 367                        (inst jmp :nc default-stack-slots))
 368                       (t
 369                        (inst jmp :c regs-defaulted)
 370                        (loop for null = nil-value then (car used-registers)
 371                              for reg in used-registers
 372                              do (inst mov :dword reg null))
 373                        (inst jmp done)))
 374                 REGS-DEFAULTED
 375                 (do ((i register-arg-count (1+ i))
 376                      (val values (tn-ref-across val)))
 377                     ((null val))
 378                   (let ((tn (tn-ref-tn val)))
 379                     (unless (eq (tn-kind tn) :unused)
 380                       (when (or (not trust)
 381                                 (>= i min-values))
 382                         (let ((default-lab (gen-label)))
 383                           (defaults (cons default-lab tn))
 384                           ;; Note that the max number of values received
 385                           ;; is assumed to fit in a :dword register.
 386                           (inst cmp :dword rcx-tn (fixnumize i))
 387                           (inst jmp :be default-lab)))
 388                       (sc-case tn
 389                         (control-stack
 390                          (loadw move-temp rbx (frame-word-offset (+ sp->fp-offset i)))
 391                          (inst mov tn move-temp))
 392                         (t
 393                          (loadw tn rbx (frame-word-offset (+ sp->fp-offset i))))))))
 394                 DEFAULTING-DONE
 395                 (move rsp-tn rbx)
 396                 (unless trust
 397                   (check-nargs))
 398                 DONE
 399                 (let ((defaults (defaults)))
 400                   (when defaults
 401                     (assemble (:elsewhere)
 402                       (when (or (not trust)
 403                                 (<= min-values 1))
 404                         (emit-label default-stack-slots)
 405                         (loop for null = nil-value then (car used-registers)
 406                               for reg in used-registers
 407                               do (inst mov :dword reg null))
 408                         (move rbx rsp-tn))
 409                       (dolist (default defaults)
 410                         (emit-label (car default))
 411                         (inst mov (cdr default) nil-value))
 412                       (inst jmp defaulting-done)))))))))))))
 413 \f
 414 ;;;; unknown values receiving
 415
 416 ;;; Emit code needed at the return point for an unknown-values call
 417 ;;; for an arbitrary number of values.
 418 ;;;
 419 ;;; We do the single and non-single cases with no shared code: there
 420 ;;; doesn't seem to be any potential overlap, and receiving a single
 421 ;;; value is more important efficiency-wise.
 422 ;;;
 423 ;;; When there is a single value, we just push it on the stack,
 424 ;;; returning the old SP and 1.
 425 ;;;
 426 ;;; When there is a variable number of values, we move all of the
 427 ;;; argument registers onto the stack, and return ARGS and NARGS.
 428 ;;;
 429 ;;; ARGS and NARGS are TNs wired to the named locations. We must
 430 ;;; explicitly allocate these TNs, since their lifetimes overlap with
 431 ;;; the results start and count. (Also, it's nice to be able to target
 432 ;;; them.)
 433 (defun receive-unknown-values (args nargs start count node)
 434   (declare (type tn args nargs start count))
 435   (let ((type (sb-c::basic-combination-derived-type node))
 436         (variable-values (gen-label))
 437         (stack-values (gen-label))
 438         (done (gen-label))
 439         (unused-count-p (eq (tn-kind count) :unused)))
 440     (when (sb-kernel:values-type-may-be-single-value-p type)
 441       (inst jmp :c variable-values)
 442       (cond ((eq (tn-kind start) :unused)
 443              (inst push (first *register-arg-tns*)))
 444             ((location= start (first *register-arg-tns*))
 445              (inst push (first *register-arg-tns*))
 446              (inst lea start (ea n-word-bytes rsp-tn)))
 447             (t (inst mov start rsp-tn)
 448                (inst push (first *register-arg-tns*))))
 449       (unless unused-count-p
 450         (inst mov count (fixnumize 1)))
 451       (inst jmp done)
 452       (emit-label variable-values))
 453     ;; The stack frame is burnt and RETurned from if there are no
 454     ;; stack values. In this case quickly reallocate sufficient space.
 455     (when (<= (sb-kernel:values-type-min-value-count type)
 456               register-arg-count)
 457       (inst cmp nargs (fixnumize register-arg-count))
 458       (inst jmp :g stack-values)
 459       #+#.(cl:if (cl:= sb-vm:word-shift sb-vm:n-fixnum-tag-bits) '(and) '(or))
 460       (inst sub rsp-tn nargs)
 461       #-#.(cl:if (cl:= sb-vm:word-shift sb-vm:n-fixnum-tag-bits) '(and) '(or))
 462       (let ((sub nargs))
 463         (unless unused-count-p
 464           (inst mov :dword (setf sub rax-tn) nargs))
 465         (inst shl :dword sub (- word-shift n-fixnum-tag-bits))
 466         (inst sub rsp-tn sub))
 467       (emit-label stack-values))
 468     ;; dtc: this writes the registers onto the stack even if they are
 469     ;; not needed, only the number specified in rcx are used and have
 470     ;; stack allocated to them. No harm is done.
 471     (loop
 472       for arg in *register-arg-tns*
 473       for i downfrom -1
 474       for j below (sb-kernel:values-type-max-value-count type)
 475       do (storew arg args i))
 476     (unless (eq (tn-kind start) :unused)
 477      (move start args))
 478     (unless unused-count-p
 479       (move count nargs))
 480
 481     (emit-label done))
 482   (values))
 483
 484 ;;; VOP that can be inherited by unknown values receivers. The main thing this
 485 ;;; handles is allocation of the result temporaries.
 486 (define-vop (unknown-values-receiver)
 487   (:temporary (:sc descriptor-reg :offset rbx-offset
 488                    :from :eval :to (:result 0))
 489               values-start)
 490   (:temporary (:sc any-reg :offset rcx-offset
 491                :from :eval :to (:result 1))
 492               nvals)
 493   (:results (start :scs (any-reg control-stack))
 494             (count :scs (any-reg control-stack))))
 495 \f
 496 ;;;; local call with unknown values convention return
 497
 498 (defun check-ocfp-and-return-pc (old-fp return-pc)
 499   #+nil
 500   (format t "*known-return: old-fp ~S, tn-kind ~S; ~S ~S~%"
 501           old-fp (tn-kind old-fp) (sb-c::tn-save-tn old-fp)
 502           (tn-kind (sb-c::tn-save-tn old-fp)))
 503   #+nil
 504   (format t "*known-return: return-pc ~S, tn-kind ~S; ~S ~S~%"
 505           return-pc (tn-kind return-pc)
 506           (sb-c::tn-save-tn return-pc)
 507           (tn-kind (sb-c::tn-save-tn return-pc)))
 508   (unless (and (sc-is old-fp control-stack)
 509                (= (tn-offset old-fp) ocfp-save-offset))
 510     (error "ocfp not on stack in standard save location?"))
 511   (unless (and (sc-is return-pc sap-stack)
 512                (= (tn-offset return-pc) return-pc-save-offset))
 513     (error "return-pc not on stack in standard save location?")))
 514
 515 ;;; The local call convention doesn't fit that well with x86-style
 516 ;;; calls. Emit a header for local calls to pop the return address
 517 ;;; in the right place.
 518 (defun emit-block-header (start-label trampoline-label fall-thru-p alignp)
 519   (when (and fall-thru-p trampoline-label)
 520     (inst jmp start-label))
 521   (when trampoline-label
 522     (emit-label trampoline-label)
 523     (popw rbp-tn (frame-word-offset return-pc-save-offset)))
 524   (when alignp
 525     (emit-alignment n-lowtag-bits alignp))
 526   (emit-label start-label))
 527
 528 ;;; Non-TR local call for a fixed number of values passed according to
 529 ;;; the unknown values convention.
 530 ;;;
 531 ;;; FP is the frame pointer in install before doing the call.
 532 ;;;
 533 ;;; NFP would be the number-stack frame pointer if we had a separate
 534 ;;; number stack.
 535 ;;;
 536 ;;; Args are the argument passing locations, which are specified only
 537 ;;; to terminate their lifetimes in the caller.
 538 ;;;
 539 ;;; VALUES are the return value locations (wired to the standard
 540 ;;; passing locations). NVALS is the number of values received.
 541 ;;;
 542 ;;; Save is the save info, which we can ignore since saving has been
 543 ;;; done.
 544 ;;;
 545 ;;; TARGET is a continuation pointing to the start of the called
 546 ;;; function.
 547 (define-vop (call-local)
 548   (:args (fp)
 549          (nfp)
 550          (args :more t))
 551   (:results (values :more t))
 552   (:save-p t)
 553   (:move-args :local-call)
 554   (:info arg-locs callee target nvals)
 555   (:vop-var vop)
 556   (:ignore nfp arg-locs args callee)
 557   (:node-var node)
 558   (:temporary (:sc any-reg) move-temp)
 559   (:generator 5
 560     (move rbp-tn fp)
 561     (note-this-location vop :call-site)
 562     (inst call target)
 563     (default-unknown-values vop values nvals node rbx-tn move-temp)))
 564
 565 ;;; Non-TR local call for a variable number of return values passed according
 566 ;;; to the unknown values convention. The results are the start of the values
 567 ;;; glob and the number of values received.
 568 (define-vop (multiple-call-local unknown-values-receiver)
 569   (:args (fp)
 570          (nfp)
 571          (args :more t))
 572   (:save-p t)
 573   (:move-args :local-call)
 574   (:info save callee target)
 575   (:ignore args save nfp callee)
 576   (:vop-var vop)
 577   (:node-var node)
 578   (:generator 20
 579     (move rbp-tn fp)
 580     (note-this-location vop :call-site)
 581     (inst call target)
 582     (note-this-location vop :unknown-return)
 583     (receive-unknown-values values-start nvals start count node)))
 584 \f
 585 ;;;; local call with known values return
 586
 587 ;;; Non-TR local call with known return locations. Known-value return
 588 ;;; works just like argument passing in local call.
 589 ;;;
 590 ;;; Note: we can't use normal load-tn allocation for the fixed args,
 591 ;;; since all registers may be tied up by the more operand. Instead,
 592 ;;; we use MAYBE-LOAD-STACK-TN.
 593 (define-vop (known-call-local)
 594   (:args (fp)
 595          (nfp)
 596          (args :more t))
 597   (:results (res :more t))
 598   (:move-args :local-call)
 599   (:save-p t)
 600   (:info save callee target)
 601   (:ignore args res save nfp callee)
 602   (:vop-var vop)
 603   (:generator 5
 604     (move rbp-tn fp)
 605     (note-this-location vop :call-site)
 606     (inst call target)
 607     (note-this-location vop :known-return)))
 608 \f
 609 ;;; From Douglas Crosher
 610 ;;; Return from known values call. We receive the return locations as
 611 ;;; arguments to terminate their lifetimes in the returning function. We
 612 ;;; restore FP and CSP and jump to the Return-PC.
 613 (define-vop (known-return)
 614   (:args (old-fp)
 615          (return-pc)
 616          (vals :more t))
 617   (:move-args :known-return)
 618   (:info val-locs)
 619   (:ignore val-locs vals)
 620   (:vop-var vop)
 621   (:generator 6
 622     (check-ocfp-and-return-pc old-fp return-pc)
 623     ;; Zot all of the stack except for the old-fp and return-pc.
 624     (inst leave)
 625     (inst ret)))
 626 \f
 627 ;;;; full call
 628 ;;;
 629 ;;; There is something of a cross-product effect with full calls.
 630 ;;; Different versions are used depending on whether we know the
 631 ;;; number of arguments or the name of the called function, and
 632 ;;; whether we want fixed values, unknown values, or a tail call.
 633 ;;;
 634 ;;; In full call, the arguments are passed creating a partial frame on
 635 ;;; the stack top and storing stack arguments into that frame. On
 636 ;;; entry to the callee, this partial frame is pointed to by FP.
 637
 638 ;;; This macro helps in the definition of full call VOPs by avoiding
 639 ;;; code replication in defining the cross-product VOPs.
 640 ;;;
 641 ;;; NAME is the name of the VOP to define.
 642 ;;;
 643 ;;; NAMED is true if the first argument is an fdefinition object whose
 644 ;;; definition is to be called.
 645 ;;;
 646 ;;; RETURN is either :FIXED, :UNKNOWN or :TAIL:
 647 ;;; -- If :FIXED, then the call is for a fixed number of values, returned in
 648 ;;;    the standard passing locations (passed as result operands).
 649 ;;; -- If :UNKNOWN, then the result values are pushed on the stack, and the
 650 ;;;    result values are specified by the Start and Count as in the
 651 ;;;    unknown-values continuation representation.
 652 ;;; -- If :TAIL, then do a tail-recursive call. No values are returned.
 653 ;;;    The Old-Fp and Return-PC are passed as the second and third arguments.
 654 ;;;
 655 ;;; In non-tail calls, the pointer to the stack arguments is passed as
 656 ;;; the last fixed argument. If Variable is false, then the passing
 657 ;;; locations are passed as a more arg. Variable is true if there are
 658 ;;; a variable number of arguments passed on the stack. Variable
 659 ;;; cannot be specified with :TAIL return. TR variable argument call
 660 ;;; is implemented separately.
 661 ;;;
 662 ;;; In tail call with fixed arguments, the passing locations are
 663 ;;; passed as a more arg, but there is no new-FP, since the arguments
 664 ;;; have been set up in the current frame.
 665 (defmacro define-full-call (vop-name named return variable &optional args)
 666   (aver (not (and variable (eq return :tail))))
 667   `(define-vop (,vop-name ,@(when (eq return :unknown) '(unknown-values-receiver)))
 668      (:args    ,@(unless (eq return :tail)
 669                    '((new-fp :scs (any-reg) :to (:argument 1))))
 670
 671                ,@(unless named ; FUN is an info argument for named call
 672                    '((fun :scs (descriptor-reg control-stack)
 673                           :target rax :to (:argument 0))))
 674
 675                ,@(when (eq return :tail)
 676                    '((old-fp)
 677                      (return-pc)))
 678
 679                ,@(unless variable
 680                    `((args :more t ,@(unless (eq args :fixed)
 681                                        '(:scs (descriptor-reg control-stack)))))))
 682
 683      ,@(when (memq return '(:fixed :unboxed)) '((:results (values :more t))))
 684
 685      (:save-p ,(if (eq return :tail) :compute-only t))
 686
 687      ,@(unless (or (eq return :tail) variable)
 688          `((:move-args ,(if (eq args :fixed) :fixed :full-call))))
 689
 690      (:vop-var vop)
 691      (:node-var node)
 692      (:info    ,@(unless (or variable (eq return :tail)) '(arg-locs))
 693                ,@(unless variable '(nargs))
 694                ;; Intuitively you might want FUN to be the first codegen arg,
 695                ;; but that won't work, because EMIT-ARG-MOVES wants the
 696                ;; passing locs in (FIRST (vop-codegen-info vop)).
 697                ,@(when named '(fun))
 698                ,@(when (eq return :fixed) '(nvals))
 699                step-instrumenting
 700                ,@(unless named '(fun-type)))
 701
 702      (:ignore   ,@(unless (or variable (eq return :tail)) '(arg-locs))
 703                 ,@(unless variable '(args))
 704                 ,@(when (eq return :unboxed) '(values)))
 705
 706      ;; For anonymous call, RAX is the function. For named call, RAX will be the linkage
 707      ;; table base if not stepping, or the linkage cell itself if stepping.
 708      ;; Calls from immobile-space without stepping avoid using RAX, and instead
 709      ;; access the linkage table relative to RIP.
 710      (:temporary (:sc descriptor-reg :offset rax-offset :from (:argument 0) :to :eval) rax)
 711
 712      ;; We pass the number of arguments in RCX.
 713      (:temporary
 714       (:sc unsigned-reg :offset rcx-offset :to ,(if (eq return :fixed) :save :eval))
 715       rcx)
 716
 717      ,@(when (eq return :fixed)
 718                    ;; Save it for DEFAULT-UNKNOWN-VALUES to work
 719          `((:temporary (:sc unsigned-reg :offset rbx-offset :from :result) rbx)
 720            (:temporary (:sc any-reg) move-temp)))
 721
 722                ;; With variable call, we have to load the
 723                ;; register-args out of the (new) stack frame before
 724                ;; doing the call. Therefore, we have to tell the
 725                ;; lifetime stuff that we need to use them.
 726      ,@(when variable
 727          (mapcar (lambda (name offset)
 728                    `(:temporary (:sc descriptor-reg
 729                                  :offset ,offset
 730                                  :from (:argument 0)
 731                                  :to :eval)
 732                                 ,name))
 733                  *register-arg-names* *register-arg-offsets*))
 734
 735      ,@(when (eq return :tail)
 736          '((:temporary (:sc unsigned-reg :from (:argument 1) :to (:argument 2))
 737             old-fp-tmp)))
 738      ,@(unless (eq return :tail) '((:node-var node)))
 739
 740      (:generator ,(+ (if named 5 0)
 741                      (if variable 19 1)
 742                      (if (eq return :tail) 0 10)
 743                      15
 744                      (if (eq return :unknown) 25 0))
 745
 746        (progn node) ; always "use" it
 747
 748                ;; This has to be done before the frame pointer is
 749                ;; changed! RAX stores the 'lexical environment' needed
 750                ;; for closures.
 751        ,@(unless named '((move rax fun)))
 752
 753        ,@(if variable
 754                      ;; For variable call, compute the number of
 755                      ;; arguments and move some of the arguments to
 756                      ;; registers.
 757              `((inst mov rcx new-fp)
 758                (inst sub rcx rsp-tn)
 759                (inst shr rcx ,(- word-shift n-fixnum-tag-bits))
 760                               ;; Move the necessary args to registers,
 761                               ;; this moves them all even if they are
 762                               ;; not all needed.
 763                ,@(loop for name in *register-arg-names*
 764                        for index downfrom -1
 765                        collect `(loadw ,name new-fp ,index)))
 766              '((cond ((listp nargs)) ;; no-verify-arg-count
 767                      ((zerop nargs)
 768                       (zeroize rcx))
 769                      (t
 770                       (inst mov rcx (fixnumize nargs))))))
 771        ,@(cond ((eq return :tail)
 772                 '(        ;; Python has figured out what frame we should
 773                           ;; return to so might as well use that clue.
 774                           ;; This seems really important to the
 775                           ;; implementation of things like
 776                           ;; (without-interrupts ...)
 777                           ;;
 778                           ;; dtc; Could be doing a tail call from a
 779                           ;; known-call-local etc in which the old-fp
 780                           ;; or ret-pc are in regs or in non-standard
 781                           ;; places. If the passing location were
 782                           ;; wired to the stack in standard locations
 783                           ;; then these moves will be un-necessary;
 784                           ;; this is probably best for the x86.
 785                   (sc-case old-fp
 786                    ((control-stack)
 787                     (unless (= ocfp-save-offset (tn-offset old-fp))
 788                                       ;; FIXME: FORMAT T for stale
 789                                       ;; diagnostic output (several of
 790                                       ;; them around here), ick
 791                       (error "** tail-call old-fp not S0~%")
 792                       (move old-fp-tmp old-fp)
 793                       (storew old-fp-tmp rbp-tn (frame-word-offset ocfp-save-offset))))
 794                    ((any-reg descriptor-reg)
 795                     (error "** tail-call old-fp in reg not S0~%")
 796                     (storew old-fp rbp-tn (frame-word-offset ocfp-save-offset))))
 797
 798                           ;; For tail call, we have to push the
 799                           ;; return-pc so that it looks like we CALLed
 800                           ;; despite the fact that we are going to JMP.
 801                   (inst push return-pc)))
 802                (t
 803                         ;; For non-tail call, we have to save our
 804                         ;; frame pointer and install the new frame
 805                         ;; pointer. We can't load stack tns after this
 806                         ;; point.
 807                 `(        ;; Python doesn't seem to allocate a frame
 808                           ;; here which doesn't leave room for the
 809                           ;; ofp/ret stuff.
 810
 811                           ;; The variable args are on the stack and
 812                           ;; become the frame, but there may be <3
 813                           ;; args and 3 stack slots are assumed
 814                           ;; allocate on the call. So need to ensure
 815                           ;; there are at least 3 slots. This hack
 816                           ;; just adds 3 more.
 817                   ,(if variable
 818                        '(inst sub rsp-tn (* 3 n-word-bytes)))
 819
 820                           ;; Bias the new-fp for use as an fp
 821                    ,(if variable
 822                         '(inst sub new-fp (* sp->fp-offset n-word-bytes)))
 823
 824                           ;; Save the fp
 825                    (storew rbp-tn new-fp (frame-word-offset ocfp-save-offset))
 826                    (move rbp-tn new-fp))))  ; NB - now on new stack frame.
 827
 828        (when step-instrumenting
 829          ,@(when named '((compute-linkage-cell node fun rax)))
 830          (emit-single-step-test)
 831          (inst jmp :eq DONE)
 832          (inst break single-step-around-trap))
 833        DONE
 834        (note-this-location vop :call-site)
 835        ,(cond (named
 836                `(emit-direct-call fun ',(if (eq return :tail) 'jmp 'call)
 837                                   node step-instrumenting))
 838               ((eq return :tail)
 839                `(tail-call-unnamed rax fun-type vop))
 840               (t
 841                `(call-unnamed rax fun-type vop)))
 842        ,@(ecase return
 843            (:fixed '((default-unknown-values vop values nvals node rbx move-temp)))
 844            (:unknown
 845             '((note-this-location vop :unknown-return)
 846               (receive-unknown-values values-start nvals start count node)))
 847            ((:tail :unboxed))))))
 848
 849 (define-full-call call nil :fixed nil)
 850 (define-full-call call-named t :fixed nil)
 851 (define-full-call multiple-call nil :unknown nil)
 852 (define-full-call multiple-call-named t :unknown nil)
 853 (define-full-call tail-call nil :tail nil)
 854 (define-full-call tail-call-named t :tail nil)
 855
 856 (define-full-call call-variable nil :fixed t)
 857 (define-full-call multiple-call-variable nil :unknown t)
 858 (define-full-call fixed-call-named t :fixed nil :fixed)
 859 (define-full-call fixed-tail-call-named t :tail nil :fixed)
 860
 861 (define-full-call unboxed-call-named t :unboxed nil)
 862 (define-full-call fixed-unboxed-call-named t :unboxed nil :fixed)
 863
 864 ;;; Call NAME "directly" meaning in a single JMP or CALL instruction,
 865 ;;; if possible (without loading RAX)
 866 (defun emit-direct-call (name instruction node step-instrumenting)
 867   (cond (step-instrumenting
 868          ;; If step-instrumenting, then RAX points to the linkage table cell
 869          (inst* instruction (ea rax-tn)))
 870         ((sb-c::code-immobile-p node)
 871          (inst* instruction (rip-relative-ea (make-fixup name :linkage-cell))))
 872         (t
 873          ;; get the linkage table base into RAX
 874          (inst mov rax-tn (thread-slot-ea sb-vm::thread-linkage-table-slot))
 875          (inst* instruction (ea (make-fixup name :linkage-cell) rax-tn)))))
 876
 877 ;;; Invoke the function-designator FUN.
 878 (defun tail-call-unnamed (fun type vop)
 879   (let ((relative-call (sb-c::code-immobile-p vop))
 880         (fun-ea (ea (- (* closure-fun-slot n-word-bytes) fun-pointer-lowtag)
 881                     fun)))
 882     (case type
 883       (:designator
 884        (assemble ()
 885          (%lea-for-lowtag-test rbx-tn fun fun-pointer-lowtag)
 886          (inst test :byte rbx-tn lowtag-mask)
 887          (inst jmp :nz (if relative-call
 888                            (make-fixup 'call-symbol :assembly-routine)
 889                            not-fun))
 890          (inst jmp fun-ea)
 891          not-fun
 892          (unless relative-call
 893            (invoke-asm-routine 'jmp 'call-symbol vop))))
 894       (:symbol
 895        (invoke-asm-routine 'jmp 'call-symbol vop))
 896       (t
 897        (inst jmp fun-ea)))))
 898
 899 (defun call-unnamed (fun type vop)
 900   (case type
 901     (:symbol
 902      (invoke-asm-routine 'call 'call-symbol vop))
 903     (t
 904      (assemble ()
 905        (when (eq type :designator)
 906          (%lea-for-lowtag-test rbx-tn fun fun-pointer-lowtag)
 907          (inst test :byte rbx-tn lowtag-mask)
 908          (inst jmp :z call)
 909          (invoke-asm-routine 'call 'call-symbol vop)
 910          (inst jmp ret))
 911        call
 912        (inst call (ea (- (* closure-fun-slot n-word-bytes) fun-pointer-lowtag)
 913                       fun))
 914        ret))))
 915
 916 ;;; This is defined separately, since it needs special code that BLT's
 917 ;;; the arguments down. All the real work is done in the assembly
 918 ;;; routine. We just set things up so that it can find what it needs.
 919 (define-vop (tail-call-variable)
 920   (:args (args :scs (any-reg control-stack) :target rsi)
 921          (function :scs (descriptor-reg control-stack) :target rax)
 922          (old-fp)
 923          (return-pc))
 924   (:info fun-type)
 925   (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 0)) rsi)
 926   (:temporary (:sc unsigned-reg :offset rax-offset :from (:argument 1)) rax)
 927   (:vop-var vop)
 928   (:generator 75
 929     (check-ocfp-and-return-pc old-fp return-pc)
 930     ;; Move these into the passing locations if they are not already there.
 931     (move rsi args)
 932     (move rax function)
 933     ;; And jump to the assembly routine.
 934     (invoke-asm-routine 'jmp (if (eq fun-type :function)
 935                                  'tail-call-variable
 936                                  'tail-call-callable-variable)
 937                         vop)))
 938 \f
 939 ;;;; unknown values return
 940
 941 ;;; Return a single-value using the Unknown-Values convention.
 942 ;;;
 943 ;;; pfw--get wired-tn conflicts sometimes if register sc specd for args
 944 ;;; having problems targeting args to regs -- using temps instead.
 945 ;;;
 946 ;;; First off, modifying the return-pc defeats the branch-prediction
 947 ;;; optimizations on modern CPUs quite handily. Second, we can do all
 948 ;;; this without needing a temp register. Fixed the latter, at least.
 949 ;;; -- AB 2006/Feb/04
 950 (define-vop (return-single)
 951   (:args (old-fp)
 952          (return-pc)
 953          (value))
 954   (:ignore value)
 955   (:generator 6
 956     (check-ocfp-and-return-pc old-fp return-pc)
 957     ;; Drop stack above old-fp and restore old frame pointer
 958     (inst leave)
 959     ;; Clear the multiple-value return flag
 960     (inst clc)
 961     ;; And return.
 962     (inst ret)))
 963
 964 ;;; Do unknown-values return of a fixed (other than 1) number of
 965 ;;; values. The VALUES are required to be set up in the standard
 966 ;;; passing locations. NVALS is the number of values returned.
 967 ;;;
 968 ;;; Basically, we just load RCX with the number of values returned and
 969 ;;; RBX with a pointer to the values, set RSP to point to the end of
 970 ;;; the values, and jump directly to return-pc.
 971 (define-vop (return)
 972   (:args (old-fp)
 973          (return-pc :to (:eval 1))
 974          (values :more t))
 975   (:ignore values)
 976   (:info nvals)
 977   ;; In the case of other than one value, we need these registers to
 978   ;; tell the caller where they are and how many there are.
 979   (:temporary (:sc unsigned-reg :offset rbx-offset) rbx)
 980   (:temporary (:sc unsigned-reg :offset rcx-offset) rcx)
 981   ;; We need to stretch the lifetime of return-pc past the argument
 982   ;; registers so that we can default the argument registers without
 983   ;; trashing return-pc.
 984   (:temporary (:sc unsigned-reg :offset (first *register-arg-offsets*)
 985                    :from :eval) a0)
 986   (:temporary (:sc unsigned-reg :offset (second *register-arg-offsets*)
 987                    :from :eval) a1)
 988   (:temporary (:sc unsigned-reg :offset (third *register-arg-offsets*)
 989                    :from :eval) a2)
 990
 991   (:generator 6
 992     (check-ocfp-and-return-pc old-fp return-pc)
 993     (when (= nvals 1)
 994       ;; This is handled in RETURN-SINGLE.
 995       (error "nvalues is 1"))
 996     ;; Establish the values pointer and values count.
 997     (inst lea rbx (ea (* sp->fp-offset n-word-bytes) rbp-tn))
 998     (if (zerop nvals)
 999         (zeroize rcx) ; smaller
1000         (inst mov rcx (fixnumize nvals)))
1001     ;; Pre-default any argument register that need it.
1002     (when (< nvals register-arg-count)
1003       (let* ((arg-tns (nthcdr nvals (list a0 a1 a2)))
1004              (first (first arg-tns)))
1005         (inst mov first nil-value)
1006         (dolist (tn (cdr arg-tns))
1007           (inst mov tn first))))
1008     ;; Set the multiple value return flag.
1009     (inst stc)
1010     ;; And away we go. Except that return-pc is still on the
1011     ;; stack and we've changed the stack pointer. So we have to
1012     ;; tell it to index off of RBX instead of RBP.
1013     (cond ((<= nvals register-arg-count)
1014            (inst leave)
1015            (inst ret))
1016           (t
1017            ;; Some values are on the stack after RETURN-PC and OLD-FP,
1018            ;; can't return normally and some slots of the frame will
1019            ;; be used as temporaries by the receiver.
1020            ;;
1021            ;; Clear as much of the stack as possible, but not past the
1022            ;; old frame address.
1023            (inst lea rsp-tn
1024                  (ea (frame-byte-offset (1- nvals)) rbp-tn))
1025            (move rbp-tn old-fp)
1026            (inst push (ea (frame-byte-offset
1027                            (+ sp->fp-offset (tn-offset return-pc)))
1028                           rbx))
1029            (inst ret)))))
1030
1031 ;;; Do unknown-values return of an arbitrary number of values (passed
1032 ;;; on the stack.) We check for the common case of a single return
1033 ;;; value, and do that inline using the normal single value return
1034 ;;; convention. Otherwise, we branch off to code that calls an
1035 ;;; assembly-routine.
1036 ;;;
1037 ;;; The assembly routine takes the following args:
1038 ;;;  RCX -- number of values to find there.
1039 ;;;  RSI -- pointer to where to find the values.
1040 (define-vop (return-multiple)
1041   (:args (old-fp)
1042          (return-pc)
1043          (vals :scs (any-reg) :target rsi)
1044          (nvals :scs (any-reg) :target rcx))
1045   (:temporary (:sc unsigned-reg :offset rsi-offset :from (:argument 2)) rsi)
1046   (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 3)) rcx)
1047   (:temporary (:sc descriptor-reg :offset (first *register-arg-offsets*)
1048                    :from (:eval 0)) a0)
1049   (:node-var node)
1050   (:vop-var vop)
1051   (:generator 13
1052     (check-ocfp-and-return-pc old-fp return-pc)
1053     (unless (policy node (> space speed))
1054       ;; Check for the single case.
1055       (let ((not-single (gen-label)))
1056         (inst cmp nvals (fixnumize 1))
1057         (inst jmp :ne not-single)
1058         ;; Return with one value.
1059         (loadw a0 vals -1)
1060         ;; Clear the stack until ocfp.
1061         (inst leave)
1062         ;; clear the multiple-value return flag
1063         (inst clc)
1064         ;; Out of here.
1065         (inst ret)
1066         ;; Nope, not the single case. Jump to the assembly routine.
1067         (emit-label not-single)))
1068     (move rsi vals)
1069     (move rcx nvals)
1070     (invoke-asm-routine 'jmp 'return-multiple vop)))
1071 \f
1072 ;;;; XEP hackery
1073
1074 ;;; Get the lexical environment from its passing location.
1075 (define-vop (setup-closure-environment)
1076   (:results (closure :scs (descriptor-reg)))
1077   (:info label)
1078   (:ignore label)
1079   (:generator 6
1080     ;; Get result.
1081     (move closure rax-tn)))
1082
1083 ;;; Copy a &MORE arg from the argument area to the end of the current
1084 ;;; frame. FIXED is the number of non-&MORE arguments.
1085 (define-vop (copy-more-arg)
1086   (:temporary (:sc any-reg :offset r8-offset) copy-index)
1087   (:temporary (:sc any-reg :offset r9-offset) source)
1088   (:temporary (:sc descriptor-reg :offset r10-offset) temp)
1089   (:info fixed min-verified)
1090   (:generator 20
1091     ;; Avoid the copy if there are no more args.
1092     (cond ((zerop fixed)
1093            (inst test :dword rcx-tn rcx-tn)
1094            (inst jmp :z JUST-ALLOC-FRAME))
1095           ((and (eql min-verified fixed)
1096                 (> fixed 1))
1097            ;; verify-arg-count will do a CMP
1098            (inst jmp :e JUST-ALLOC-FRAME))
1099           (t
1100            (inst cmp :dword rcx-tn (fixnumize fixed))
1101            (inst jmp :be JUST-ALLOC-FRAME)))
1102
1103     ;; Create a negated copy of the number of arguments to allow us to
1104     ;; use EA calculations in order to do scaled subtraction.
1105     (inst mov :dword temp rcx-tn)
1106     (inst neg temp)
1107
1108     ;; Allocate the space on the stack.
1109     ;; stack = rbp + sp->fp-offset - frame-size - (nargs - fixed)
1110     ;; if we'd move SP backward, swap the meaning of rsp and source;
1111     ;; otherwise, we'd be accessing values below SP, and that's no good
1112     ;; if a signal interrupts this code sequence.  In that case, store
1113     ;; the final value in rsp after the stack-stack memmove loop.
1114     (let* ((delta (- fixed (sb-allocated-size 'stack)))
1115            (loop (gen-label))
1116            (fixnum->word (ash 1 (- word-shift n-fixnum-tag-bits)))
1117            (below (plusp delta)))
1118       (inst lea (if below source rsp-tn)
1119             (ea (* n-word-bytes (+ sp->fp-offset delta))
1120                 rbp-tn temp fixnum->word))
1121
1122       ;; Now: nargs>=1 && nargs>fixed
1123
1124       (cond ((< fixed register-arg-count)
1125              ;; the code above only moves the final value of rsp in
1126              ;; rsp directly if that condition is satisfied.  Currently,
1127              ;; r-a-c is 3, so the aver is OK. If the calling convention
1128              ;; ever changes, the logic above with LEA will have to be
1129              ;; adjusted.
1130              (aver (<= fixed (sb-allocated-size 'stack)))
1131              ;; We must stop when we run out of stack args, not when we
1132              ;; run out of more args.
1133              ;; Number to copy = nargs-3
1134              ;; Save the original count of args.
1135              (inst mov rbx-tn rcx-tn)
1136              (inst sub rbx-tn (fixnumize register-arg-count))
1137              ;; Everything of interest in registers.
1138              (inst jmp :be DO-REGS))
1139             (t
1140              ;; Number to copy = nargs-fixed
1141              (inst lea rbx-tn (ea (- (fixnumize fixed)) rcx-tn))))
1142
1143       ;; Initialize R8 to be the end of args.
1144       ;; Swap with SP if necessary to mirror the previous condition
1145       (unless (zerop delta)
1146         (inst lea (if below rsp-tn source)
1147               (ea (* sp->fp-offset n-word-bytes)
1148                   rbp-tn temp fixnum->word)))
1149
1150       ;; src: rbp + temp + sp->fp
1151       ;; dst: rbp + temp + sp->fp + (fixed - [stack-size])
1152       (cond ((zerop delta))             ; no-op move
1153             ((minusp delta)
1154              ;; dst is lower than src, copy forward
1155              (zeroize copy-index)
1156              ;; We used to use REP MOVS here, but on modern x86 it performs
1157              ;; much worse than an explicit loop for small blocks.
1158
1159              (emit-label loop)
1160              (inst mov temp (ea source copy-index))
1161              (inst mov (ea rsp-tn copy-index) temp)
1162              (inst add copy-index n-word-bytes)
1163              (inst sub rbx-tn (fixnumize 1))
1164              (inst jmp :nz loop))
1165             ((plusp delta)
1166              ;; dst is higher than src; copy backward
1167              (emit-label loop)
1168              (inst sub rbx-tn (fixnumize 1))
1169              (inst mov temp (ea rsp-tn rbx-tn fixnum->word))
1170              (inst mov (ea source rbx-tn fixnum->word) temp)
1171              (inst jmp :nz loop)
1172              ;; done with the stack--stack copy. Reset RSP to its final
1173              ;; value
1174              (inst mov rsp-tn source))))
1175     DO-REGS
1176
1177     ;; Here: nargs>=1 && nargs>fixed
1178     (when (< fixed register-arg-count)
1179       ;; Now we have to deposit any more args that showed up in
1180       ;; registers.
1181       (do ((i fixed))
1182           ( nil )
1183         ;; Store it relative to rbp
1184         (inst mov (ea (* n-word-bytes
1185                          (- sp->fp-offset
1186                             (+ 1 (- i fixed) (sb-allocated-size 'stack))))
1187                        rbp-tn)
1188               (nth i *register-arg-tns*))
1189
1190         (incf i)
1191         (when (>= i register-arg-count)
1192           (return))
1193
1194         ;; Don't deposit any more than there are.
1195         #.(assert (= register-arg-count 3))
1196         (cond ((> fixed 0)
1197                (inst cmp :dword rcx-tn (fixnumize i))
1198                (inst jmp :eq DONE))
1199               ;; Use a single comparison for 1 and 2
1200               ((= i 1)
1201                (inst cmp :dword rcx-tn (fixnumize 2))
1202                (inst jmp :l DONE))
1203               (t
1204                (inst jmp :eq DONE)))))
1205
1206     (inst jmp DONE)
1207
1208     JUST-ALLOC-FRAME
1209     (emit-lea rsp-tn rbp-tn
1210                          (* n-word-bytes
1211                             (- sp->fp-offset
1212                                (sb-allocated-size 'stack))))
1213
1214     DONE))
1215
1216 (define-vop ()
1217   (:translate sb-c::%more-kw-arg)
1218   (:policy :fast-safe)
1219   (:args (object :scs (descriptor-reg) :to (:result 1))
1220          (index :scs (any-reg) :to (:result 1) :target keyword))
1221   (:arg-types * tagged-num)
1222   (:results (value :scs (descriptor-reg any-reg))
1223             (keyword :scs (descriptor-reg any-reg)))
1224   (:result-types * *)
1225   (:generator 4
1226      (inst mov value (ea object index (ash 1 (- word-shift n-fixnum-tag-bits))))
1227      (inst mov keyword (ea n-word-bytes object index
1228                            (ash 1 (- word-shift n-fixnum-tag-bits))))))
1229
1230 (define-vop (more-arg/c)
1231   (:translate sb-c:%more-arg)
1232   (:policy :fast-safe)
1233   (:args (object :scs (descriptor-reg) :to (:result 1)))
1234   (:info index)
1235   (:arg-types * (:constant (signed-byte #.(- 32 word-shift))))
1236   (:results (value :scs (descriptor-reg any-reg)))
1237   (:result-types *)
1238   (:generator 3
1239     (inst mov value (ea (- (* index n-word-bytes)) object))))
1240
1241 (define-vop (more-arg)
1242   (:translate sb-c:%more-arg)
1243   (:policy :fast-safe)
1244   (:args (object :scs (descriptor-reg) :to (:result 1))
1245          (index :scs (any-reg) :to (:result 1) :target value))
1246   (:arg-types * tagged-num)
1247   (:results (value :scs (descriptor-reg any-reg)))
1248   (:result-types *)
1249   (:generator 4
1250     (move value index)
1251     (inst neg value)
1252     (inst mov value (ea object value
1253                         (ash 1 (- word-shift n-fixnum-tag-bits))))))
1254
1255 (define-vop (more-arg-or-nil)
1256   (:policy :fast-safe)
1257   (:args (object :scs (descriptor-reg) :to (:result 1))
1258          (count :scs (any-reg) :to (:result 1)))
1259   (:arg-types * tagged-num)
1260   (:info index)
1261   (:results (value :scs (descriptor-reg any-reg)))
1262   (:result-types *)
1263   (:generator 3
1264     (inst mov value nil-value)
1265     (inst cmp count (fixnumize index))
1266     (inst jmp :be done)
1267     (inst mov value (ea (- (* index n-word-bytes)) object))
1268     done))
1269
1270 ;;; Turn more arg (context, count) into a list.
1271 ;;; Cons cells will be filled in right-to-left.
1272 ;;; This has a slight advantage in code size, and eliminates an initial
1273 ;;; forward jump into the loop. it also admits an interesting possibility
1274 ;;; to reduce the scope of the pseudo-atomic section so as not to
1275 ;;; encompass construction of the list. To do that, we will need to invent
1276 ;;; a new widetag for "contiguous CONS block" which has a header conveying
1277 ;;; the total payload length. Initially we would store that into the CAR of the
1278 ;;; first cons cell. Upon seeing such header, GC shall treat that entire object
1279 ;;; as a boxed payload of specified length. It will be implicitly pinned
1280 ;;; (if conservative) or transported as a whole (if precise). Then when the CAR
1281 ;;; of the first cons is overwritten, the object changes to a linked list.
1282 (define-vop ()
1283   (:translate %listify-rest-args)
1284   (:policy :safe)
1285   ;; CONTEXT is used throughout the copying loop
1286   (:args (context :scs (descriptor-reg) :to :save)
1287          (count :scs (any-reg) :target rcx))
1288   (:arg-types * tagged-num)
1289   ;; The only advantage to specifying RCX here is that JRCXZ can be used
1290   ;; in one place, and then only in the unlikely scenario that CONTEXT is not
1291   ;; in RCX. If it was, SHL sets/clears the Z flag, but LEA doesn't.
1292   ;; Not much of an advantage, but why not.
1293   (:temporary (:sc unsigned-reg :offset rcx-offset :from (:argument 1)) rcx)
1294   ;; Note that DST conflicts with RESULT because we use both as temps
1295   (:temporary (:sc unsigned-reg) value dst)
1296   #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
1297   (:results (result :scs (descriptor-reg)))
1298   (:node-var node)
1299   (:generator 20
1300 #|
1301     ;; TODO: if instrumenting, just revert to the older way of precomputing
1302     ;; a size rather than scaling by 8 in ALLOCATION so that we don't have
1303     ;; to scale and unscale.
1304     ;; Compute the number of bytes to allocate
1305     (let ((shift (- (1+ word-shift) n-fixnum-tag-bits)))
1306       (if (location= count rcx)
1307           (inst shl :dword rcx shift)
1308           (inst lea :dword rcx (ea nil count (ash 1 shift)))))
1309 |#
1310     (move rcx count :dword)
1311     ;; Setup for the CDR of the last cons (or the entire result) being NIL.
1312     (inst mov result nil-value)
1313     (cond ((not (member :allocation-size-histogram sb-xc:*features*))
1314            (inst jrcxz DONE))
1315           (t ; jumps too far for JRCXZ sometimes
1316            (inst test rcx rcx)
1317            (inst jmp :z done)))
1318     (when (and (not (node-stack-allocate-p node)) (instrument-alloc-policy-p node))
1319       (inst shl :dword rcx word-shift) ; compute byte count
1320       (instrument-alloc +cons-primtype+ rcx node (list value dst) thread-tn)
1321       (inst shr :dword rcx word-shift)) ; undo the computation
1322     (pseudo-atomic (:elide-if (node-stack-allocate-p node) :thread-tn thread-tn)
1323        ;; Produce an untagged pointer into DST
1324       (let ((scale
1325              (cond ((node-stack-allocate-p node)
1326                     ;; LEA on RSP would be ok but we'd need to negate RCX first, then un-negate
1327                     ;; to compute the final cons, then negate again. So use SHL and SUB instead.
1328                     (inst shl :dword rcx word-shift)
1329                     (stack-allocation rcx 0 dst)
1330                     1)
1331                    (t
1332                     (allocation +cons-primtype+ rcx 0 dst node value thread-tn
1333                        :scale 8
1334                        :overflow
1335                        (lambda ()
1336                          (inst push rcx)
1337                          (inst push context)
1338                          (invoke-asm-routine
1339                           'call (if (system-tlab-p 0 node) 'sys-listify-&rest 'listify-&rest)
1340                           node)
1341                          (inst pop result)
1342                          (inst jmp alloc-done)))
1343                     8))))
1344        ;; Recalculate DST as a tagged pointer to the last cons
1345        (inst lea dst (ea (- list-pointer-lowtag (* cons-size n-word-bytes)) dst rcx scale))
1346        ;; scale=8 implies RCX counts ncells (as a fixnum) therefore just untag it.
1347        ;; scale=1 implies RCX counts nbytes therefore ncells = RCX/16
1348        (inst shr :dword rcx (if (= scale 8) n-fixnum-tag-bits (1+ word-shift))))
1349        ;; The rightmost arguments are at lower addresses.
1350        ;; Start by indexing the last argument
1351        (inst neg rcx) ; :QWORD because it's negative
1352        LOOP
1353        ;; Grab one value and store into this cons. Use RCX as an index into the
1354        ;; vector of values in CONTEXT, but add 8 because CONTEXT points exactly at
1355        ;; the 0th value, which means that the index is 1 word too low.
1356        ;; (It's -1 if there is exactly 1 value, instead of 0, and so on)
1357        (inst mov value (ea 8 context rcx 8))
1358        ;; RESULT began as NIL which gives the correct value for the CDR in the final cons.
1359        ;; Subsequently it points to each cons just populated, which is correct all the way
1360        ;; up to and including the final result.
1361        (storew result dst cons-cdr-slot list-pointer-lowtag)
1362        (storew value dst cons-car-slot list-pointer-lowtag)
1363        (inst mov result dst) ; preserve the value to put in the CDR of the preceding cons
1364        (inst sub dst (* cons-size n-word-bytes)) ; get the preceding cons
1365        (inst inc rcx) ; :QWORD because it's negative
1366        (inst jmp :nz loop)
1367        ALLOC-DONE)
1368     DONE))
1369
1370 ;;; Return the location and size of the &MORE arg glob created by
1371 ;;; COPY-MORE-ARG. SUPPLIED is the total number of arguments supplied
1372 ;;; (originally passed in RCX). FIXED is the number of non-rest
1373 ;;; arguments.
1374 ;;;
1375 ;;; We must duplicate some of the work done by COPY-MORE-ARG, since at
1376 ;;; that time the environment is in a pretty brain-damaged state,
1377 ;;; preventing this info from being returned as values. What we do is
1378 ;;; compute supplied - fixed, and return a pointer that many words
1379 ;;; below the current stack top.
1380 (define-vop ()
1381   (:policy :fast-safe)
1382   (:translate sb-c::%more-arg-context)
1383   (:args (supplied :scs (any-reg) :target count))
1384   (:arg-types positive-fixnum (:constant fixnum))
1385   (:info fixed)
1386   (:results (context :scs (descriptor-reg))
1387             (count :scs (any-reg)))
1388   (:result-types t tagged-num)
1389   (:note "more-arg-context")
1390   (:generator 5
1391     (move count supplied)
1392     ;; SP at this point points at the last arg pushed.
1393     ;; Point to the first more-arg, not above it.
1394     (inst lea context (ea (- (* (1+ fixed) n-word-bytes))
1395                           rsp-tn count
1396                           (ash 1 (- word-shift n-fixnum-tag-bits))))
1397     (unless (zerop fixed)
1398       (inst sub count (fixnumize fixed)))))
1399
1400 (define-vop (verify-arg-count)
1401   (:policy :fast-safe)
1402   (:args (nargs :scs (any-reg)))
1403   (:arg-types positive-fixnum (:constant t) (:constant t))
1404   (:temporary (:sc unsigned-reg :offset rbx-offset) temp)
1405   (:info min max)
1406   (:vop-var vop)
1407   (:save-p :compute-only)
1408   (:generator 3
1409     ;; NOTE: copy-more-arg expects this to issue a CMP for min > 1
1410     (let ((err-lab
1411             (generate-error-code vop 'invalid-arg-count-error nargs)))
1412       (cond ((not min)
1413              (if (zerop max)
1414                  (inst test :dword nargs nargs)
1415                  (inst cmp :dword nargs (fixnumize max)))
1416              (inst jmp :ne err-lab))
1417             (max
1418              (if (zerop min)
1419                  (setf temp nargs)
1420                  (inst lea :dword temp (ea (fixnumize (- min)) nargs)))
1421              (inst cmp :dword temp (fixnumize (- max min)))
1422              (inst jmp :a err-lab))
1423             (t
1424              (cond ((= min 1)
1425                     (inst test :dword nargs nargs)
1426                     (inst jmp :e err-lab))
1427                    ((plusp min)
1428                     (inst cmp :dword nargs (fixnumize min))
1429                     (inst jmp :b err-lab))))))))
1430 ;;; Single-stepping
1431
1432 (defun emit-single-step-test ()
1433   ;; We use different ways of representing whether stepping is on on
1434   ;; +SB-THREAD / -SB-THREAD: on +SB-THREAD, we use a slot in the
1435   ;; thread structure. On -SB-THREAD we use the value of a static
1436   ;; symbol. Things are done this way, since reading a thread-local
1437   ;; slot from a symbol would require an extra register on +SB-THREAD,
1438   ;; and reading a slot from a thread structure would require an extra
1439   ;; register on -SB-THREAD. While this isn't critical for x86-64,
1440   ;; it's more serious for x86.
1441   #+sb-thread (inst cmp :byte (thread-slot-ea thread-stepping-slot) 0)
1442   #-sb-thread (inst cmp :byte (static-symbol-value-ea 'sb-impl::*stepping*) 0))
1443
1444 (define-vop (step-instrument-before-vop)
1445   (:policy :fast-safe)
1446   (:vop-var vop)
1447   (:generator 3
1448      (emit-single-step-test)
1449      (inst jmp :eq DONE)
1450      (inst break single-step-before-trap)
1451      DONE
1452      (note-this-location vop :internal-error)))