Optimize some VOPS on ARM64 with LDP/STP.
[sbcl.git] / src / assembly / arm64 / assem-rtns.lisp
bloba410f005212844d1ea55c41837b3e83be312263e
1 (in-package "SB!VM")
2 \f
3 ;;;; Return-multiple with other than one value
5 #+sb-assembling ;; we don't want a vop for this one.
6 (define-assembly-routine
7 (return-multiple
8 (:return-style :none))
10 ;; These four are really arguments.
11 ((:temp nvals any-reg nargs-offset)
12 (:temp vals any-reg nl1-offset)
13 (:temp old-fp any-reg nl2-offset)
14 (:temp lra descriptor-reg r6-offset)
16 ;; These are just needed to facilitate the transfer
17 (:temp count any-reg nl3-offset)
18 (:temp src any-reg nl4-offset)
19 (:temp dst descriptor-reg r4-offset)
20 (:temp temp descriptor-reg r5-offset)
22 ;; These are needed so we can get at the register args.
23 (:temp r0 descriptor-reg r0-offset)
24 (:temp r1 descriptor-reg r1-offset)
25 (:temp r2 descriptor-reg r2-offset)
26 (:temp r3 descriptor-reg r3-offset)
27 (:temp lip interior-reg lr-offset))
29 ;; Note, because of the way the return-multiple vop is written, we
30 ;; can assume that we are never called with nvals == 1 (not that it
31 ;; helps overmuch).
32 (inst cmp nvals 0)
33 (inst b :le default-r0-and-on)
34 (inst cmp nvals (fixnumize 2))
35 (inst ldp r0 r1 (@ vals))
36 (inst b :le default-r2-and-on)
37 (inst cmp nvals (fixnumize 3))
38 (loadw r2 vals 2)
39 (inst b :le default-r3-and-on)
40 (inst cmp nvals (fixnumize 4))
41 (loadw r3 vals 3)
42 (inst b :le DONE)
44 ;; Copy the remaining args over the outbound stack frame.
45 (inst add src vals (* 4 n-word-bytes))
46 (inst add dst cfp-tn (* 4 n-word-bytes))
47 (inst sub count nvals (fixnumize 4))
49 LOOP
50 (inst subs count count (fixnumize 1))
51 (inst ldr temp (@ src n-word-bytes :post-index))
52 (inst str temp (@ dst n-word-bytes :post-index))
53 (inst b :ge LOOP)
54 (inst b DONE)
56 DEFAULT-R0-AND-ON
57 (move r0 null-tn)
58 (move r1 null-tn)
59 DEFAULT-R2-AND-ON
60 (move r2 null-tn)
61 DEFAULT-R3-AND-ON
62 (move r3 null-tn)
64 DONE
66 ;; Deallocate the unused stack space.
67 (move ocfp-tn cfp-tn)
68 (move cfp-tn old-fp)
69 (inst add csp-tn ocfp-tn (lsl nvals (- word-shift n-fixnum-tag-bits)))
71 ;; Return.
72 (lisp-return lra lip :multiple-values))
74 ;;;; tail-call-variable.
76 #+sb-assembling ;; no vop for this one either.
77 (define-assembly-routine
78 (tail-call-variable
79 (:return-style :none))
81 ;; These are really args.
82 ((:temp args any-reg nl2-offset)
83 (:temp lexenv descriptor-reg lexenv-offset)
85 ;; We need to compute this
86 (:temp nargs any-reg nargs-offset)
88 ;; These are needed by the blitting code.
89 (:temp dest any-reg nl2-offset) ;; Not live concurrent with ARGS.
90 (:temp count any-reg nl3-offset)
91 (:temp temp descriptor-reg r8-offset)
92 (:temp lip interior-reg lr-offset)
94 ;; These are needed so we can get at the register args.
95 (:temp r0 descriptor-reg r0-offset)
96 (:temp r1 descriptor-reg r1-offset)
97 (:temp r2 descriptor-reg r2-offset)
98 (:temp r3 descriptor-reg r3-offset))
100 ;; We're in a tail-call scenario, so we use the existing LRA and
101 ;; OCFP, both already set up in the stack frame. We have a set of
102 ;; arguments, represented as the address of the first argument
103 ;; (ARGS) and the address just beyond the last argument (CSP-TN),
104 ;; and need to set up the arg-passing-registers, any stack arguments
105 ;; (the fourth and subsequent arguments, if such exist), and the
106 ;; total arg count (NARGS).
108 ;; Calculate NARGS
109 (inst sub nargs csp-tn args)
111 ;; Load the argument regs (must do this now, 'cause the blt might
112 ;; trash these locations, and we need ARGS to be dead for the blt)
113 (inst ldp r0 r1 (@ args))
114 (inst ldp r2 r3 (@ args (* n-word-bytes 2)))
116 ;; ARGS is now dead, we access the remaining arguments by offset
117 ;; from CSP-TN.
119 ;; Figure out how many arguments we really need to shift.
120 (inst subs count nargs (* register-arg-count n-word-bytes))
121 ;; If there aren't any stack args then we're done.
122 (inst b :le DONE)
124 ;; Find where our shifted arguments need to go.
125 (inst add dest cfp-tn nargs)
127 (inst neg count count)
128 LOOP
129 ;; Copy one arg.
130 (inst ldr temp (@ csp-tn count))
131 (inst str temp (@ dest count))
132 (inst adds count count n-word-bytes)
133 (inst b :ne LOOP)
135 DONE
136 ;; The call frame is all set up, so all that remains is to jump to
137 ;; the new function. We need a boxed register to hold the actual
138 ;; function object (in case of closure functions or funcallable
139 ;; instances)
140 (inst asr nargs nargs (- word-shift n-fixnum-tag-bits))
141 (loadw temp lexenv closure-fun-slot fun-pointer-lowtag)
142 (lisp-jump temp lip))
144 ;;;; Non-local exit noise.
146 (define-assembly-routine (throw
147 (:return-style :none))
148 ((:arg target descriptor-reg r0-offset)
149 (:arg start any-reg r8-offset)
150 (:arg count any-reg nargs-offset)
151 (:temp catch any-reg r1-offset)
152 (:temp tag descriptor-reg r2-offset))
153 (declare (ignore start count))
155 (load-tl-symbol-value catch *current-catch-block*)
157 LOOP
159 (let ((error (generate-error-code nil 'unseen-throw-tag-error target)))
160 (inst cbz catch error))
162 #.(assert (and (= catch-block-tag-slot 4)
163 (= catch-block-previous-catch-slot 5)))
164 (inst ldp tag tmp-tn (@ catch (* 4 n-word-bytes)))
165 (inst cmp tag target)
166 (inst b :eq DONE)
167 (inst mov catch tmp-tn)
168 (inst b LOOP)
169 DONE
170 (move target catch) ;; TARGET coincides with UNWIND's BLOCK argument
171 (inst b (make-fixup 'unwind :assembly-routine)))
173 (define-assembly-routine (unwind
174 (:return-style :none)
175 (:translate %continue-unwind)
176 (:policy :fast-safe))
177 ((:arg block (any-reg descriptor-reg) r0-offset)
178 (:arg start (any-reg descriptor-reg) r8-offset)
179 (:arg count (any-reg descriptor-reg) nargs-offset)
180 (:temp ocfp any-reg ocfp-offset)
181 (:temp lra descriptor-reg lexenv-offset)
182 (:temp cur-uwp any-reg nl2-offset)
183 (:temp lip interior-reg lr-offset))
184 (declare (ignore start count))
185 (let ((error (generate-error-code nil 'invalid-unwind-error)))
186 (inst cbz block error))
187 (load-tl-symbol-value cur-uwp *current-unwind-protect-block*)
188 (loadw ocfp block unwind-block-current-uwp-slot)
189 (inst cmp cur-uwp ocfp)
190 (inst b :eq EQ)
191 (loadw ocfp cur-uwp unwind-block-current-uwp-slot)
192 (store-tl-symbol-value ocfp *current-unwind-protect-block*)
194 (inst csel cur-uwp block cur-uwp :eq)
196 #.(assert (and (= unwind-block-current-cont-slot 1)
197 (= unwind-block-current-code-slot 2)))
198 (inst ldp cfp-tn code-tn (@ cur-uwp n-word-bytes))
199 (loadw lra cur-uwp unwind-block-entry-pc-slot)
200 (lisp-return lra lip :known))