From b85c67767a5283814690ca2165fa145d49019a4e Mon Sep 17 00:00:00 2001 From: Stas Boukarev Date: Wed, 10 Feb 2016 15:47:08 +0300 Subject: [PATCH] Optimize some VOPS on ARM64 with LDP/STP. Replace pairs of LOADW/STOREW with LDP/STP where feasible. --- src/assembly/arm64/assem-rtns.lisp | 45 +++++++++++++++++++------------------- src/compiler/arm64/call.lisp | 21 +++++++++++------- src/compiler/arm64/nlx.lisp | 26 ++++++++++++---------- src/compiler/arm64/vm.lisp | 2 +- 4 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/assembly/arm64/assem-rtns.lisp b/src/assembly/arm64/assem-rtns.lisp index ac69f9ea0..a410f0052 100644 --- a/src/assembly/arm64/assem-rtns.lisp +++ b/src/assembly/arm64/assem-rtns.lisp @@ -32,8 +32,7 @@ (inst cmp nvals 0) (inst b :le default-r0-and-on) (inst cmp nvals (fixnumize 2)) - (loadw r0 vals) - (loadw r1 vals 1) + (inst ldp r0 r1 (@ vals)) (inst b :le default-r2-and-on) (inst cmp nvals (fixnumize 3)) (loadw r2 vals 2) @@ -111,10 +110,8 @@ ;; Load the argument regs (must do this now, 'cause the blt might ;; trash these locations, and we need ARGS to be dead for the blt) - (loadw r0 args 0) - (loadw r1 args 1) - (loadw r2 args 2) - (loadw r3 args 3) + (inst ldp r0 r1 (@ args)) + (inst ldp r2 r3 (@ args (* n-word-bytes 2))) ;; ARGS is now dead, we access the remaining arguments by offset ;; from CSP-TN. @@ -147,12 +144,12 @@ ;;;; Non-local exit noise. (define-assembly-routine (throw - (:return-style :none)) - ((:arg target descriptor-reg r0-offset) - (:arg start any-reg r8-offset) - (:arg count any-reg nargs-offset) - (:temp catch any-reg r1-offset) - (:temp tag descriptor-reg r2-offset)) + (:return-style :none)) + ((:arg target descriptor-reg r0-offset) + (:arg start any-reg r8-offset) + (:arg count any-reg nargs-offset) + (:temp catch any-reg r1-offset) + (:temp tag descriptor-reg r2-offset)) (declare (ignore start count)) (load-tl-symbol-value catch *current-catch-block*) @@ -162,15 +159,16 @@ (let ((error (generate-error-code nil 'unseen-throw-tag-error target))) (inst cbz catch error)) - (assemble () - (loadw tag catch catch-block-tag-slot) - (inst cmp tag target) - (inst b :eq DONE) - (loadw catch catch catch-block-previous-catch-slot) - (inst b LOOP) - DONE - (move target catch) ;; TARGET coincides with UNWIND's BLOCK argument - (inst b (make-fixup 'unwind :assembly-routine)))) + #.(assert (and (= catch-block-tag-slot 4) + (= catch-block-previous-catch-slot 5))) + (inst ldp tag tmp-tn (@ catch (* 4 n-word-bytes))) + (inst cmp tag target) + (inst b :eq DONE) + (inst mov catch tmp-tn) + (inst b LOOP) + DONE + (move target catch) ;; TARGET coincides with UNWIND's BLOCK argument + (inst b (make-fixup 'unwind :assembly-routine))) (define-assembly-routine (unwind (:return-style :none) @@ -195,7 +193,8 @@ EQ (inst csel cur-uwp block cur-uwp :eq) - (loadw cfp-tn cur-uwp unwind-block-current-cont-slot) - (loadw code-tn cur-uwp unwind-block-current-code-slot) + #.(assert (and (= unwind-block-current-cont-slot 1) + (= unwind-block-current-code-slot 2))) + (inst ldp cfp-tn code-tn (@ cur-uwp n-word-bytes)) (loadw lra cur-uwp unwind-block-entry-pc-slot) (lisp-return lra lip :known)) diff --git a/src/compiler/arm64/call.lisp b/src/compiler/arm64/call.lisp index 1417766f8..d69b3f0b4 100644 --- a/src/compiler/arm64/call.lisp +++ b/src/compiler/arm64/call.lisp @@ -285,15 +285,16 @@ (inst mov count (fixnumize 1)) (inst b DONE) MULTIPLE - (do ((arg *register-arg-tns* (rest arg)) - (i 0 (1+ i))) + #.(assert (evenp register-arg-count)) + (do ((arg *register-arg-tns* (cddr arg)) + (i 0 (+ i 2))) ((null arg)) - (storew (first arg) args i 0)) + (inst stp (first arg) (second arg) + (@ args (* i n-word-bytes)))) (move start args) (move count nargs) DONE)) - ;;; VOP that can be inherited by unknown values receivers. The main ;;; thing this handles is allocation of the result temporaries. (define-vop (unknown-values-receiver) @@ -866,10 +867,14 @@ (inst add csp-tn nargs-pass (* 2 n-word-bytes)) (inst sub nargs-pass nargs-pass new-fp) (inst asr nargs-pass nargs-pass (- word-shift n-fixnum-tag-bits)) - ,@(let ((index -1)) - (mapcar #'(lambda (name) - `(loadw ,name new-fp ,(incf index))) - *register-arg-names*)) + ,@(do ((arg *register-arg-names* (cddr arg)) + (i 0 (+ i 2)) + (insts)) + ((null arg) (nreverse insts)) + #.(assert (evenp register-arg-count)) + (push `(inst ldp ,(first arg) ,(second arg) + (@ new-fp ,(* i n-word-bytes))) + insts)) (storew cfp-tn new-fp ocfp-save-offset)) '((inst mov nargs-pass (fixnumize nargs))))) ,@(if (eq return :tail) diff --git a/src/compiler/arm64/nlx.lisp b/src/compiler/arm64/nlx.lisp index e01e2e405..3c90d72a8 100644 --- a/src/compiler/arm64/nlx.lisp +++ b/src/compiler/arm64/nlx.lisp @@ -74,11 +74,13 @@ (:generator 22 (inst add block cfp-tn (add-sub-immediate (* (tn-offset tn) n-word-bytes))) (load-tl-symbol-value temp *current-unwind-protect-block*) - (storew temp block unwind-block-current-uwp-slot) - (storew cfp-tn block unwind-block-current-cont-slot) - (storew code-tn block unwind-block-current-code-slot) + #.(assert (and (= unwind-block-current-uwp-slot 0) + (= unwind-block-current-cont-slot 1))) + (inst stp temp cfp-tn (@ block)) (inst compute-lra temp lip entry-label) - (storew temp block catch-block-entry-pc-slot))) + #.(assert (and (= unwind-block-current-code-slot 2) + (= unwind-block-entry-pc-slot 3))) + (inst stp code-tn temp (@ block (* n-word-bytes 2))))) ;;; Like Make-Unwind-Block, except that we also store in the specified tag, and ;;; link the block into the Current-Catch list. @@ -93,15 +95,17 @@ (:generator 44 (inst add result cfp-tn (add-sub-immediate (* (tn-offset tn) n-word-bytes))) (load-tl-symbol-value temp *current-unwind-protect-block*) - (storew temp result catch-block-current-uwp-slot) - (storew cfp-tn result catch-block-current-cont-slot) - (storew code-tn result catch-block-current-code-slot) + #.(assert (and (= catch-block-current-uwp-slot 0) + (= catch-block-current-cont-slot 1))) + (inst stp temp cfp-tn (@ result)) + #.(assert (and (= catch-block-current-code-slot 2) + (= catch-block-entry-pc-slot 3))) (inst compute-lra temp lip entry-label) - (storew temp result catch-block-entry-pc-slot) - - (storew tag result catch-block-tag-slot) + (inst stp code-tn temp (@ result (* n-word-bytes 2))) + #.(assert (and (= catch-block-tag-slot 4) + (= catch-block-previous-catch-slot 5))) (load-tl-symbol-value temp *current-catch-block*) - (storew temp result catch-block-previous-catch-slot) + (inst stp tag temp (@ result (* n-word-bytes 4))) (store-tl-symbol-value result *current-catch-block*) (move block result))) diff --git a/src/compiler/arm64/vm.lisp b/src/compiler/arm64/vm.lisp index 1e40561ae..84ff75b57 100644 --- a/src/compiler/arm64/vm.lisp +++ b/src/compiler/arm64/vm.lisp @@ -82,7 +82,7 @@ ;; registers used to pass arguments ;; ;; the number of arguments/return values passed in registers - (def!constant register-arg-count 4) + (def!constant register-arg-count 4) ;; names and offsets for registers used to pass arguments (defregset *register-arg-offsets* r0 r1 r2 r3) (defparameter *register-arg-names* '(r0 r1 r2 r3))) -- 2.11.4.GIT