From 20585b1158ba6f82925157f5413ea07ff98cac6d Mon Sep 17 00:00:00 2001 From: rlaakso Date: Fri, 12 Aug 2005 11:55:38 +0000 Subject: [PATCH] *** empty log message *** --- TODO | 19 ----- VOP.txt | 10 +++ detect-simd.lisp | 8 --- generate-sse-instructions.lisp | 38 ++++++---- load.lisp | 4 +- sbcl-src/src/compiler/x86/insts.lisp | 134 ++++++++++++++++++++++++++++------ sbcl-src/src/compiler/x86/vm.lisp | 30 ++++---- sse-seq.lisp | 136 +++++++++++++++++++++++++++++++++++ test-seq.lisp | 64 +++++++++++++++++ 9 files changed, 367 insertions(+), 76 deletions(-) create mode 100644 VOP.txt create mode 100644 sse-seq.lisp create mode 100644 test-seq.lisp diff --git a/TODO b/TODO index 0260311..8b13789 100644 --- a/TODO +++ b/TODO @@ -1,20 +1 @@ -operations from AMD manual: - - -sign: - -andps xmm0, #x8000 0000 8000 0000 .. - -neg: - -xorps xmm0, #x8000 0000 8000 0000 .. - -abs: - -andps xmm0, #x7FFF FFFF 7FFF FFFF .. - -clear: - -xorps xmm0, xmm0 - diff --git a/VOP.txt b/VOP.txt new file mode 100644 index 0000000..d950d13 --- /dev/null +++ b/VOP.txt @@ -0,0 +1,10 @@ + +(loadw length seq1 vector-length-slot other-pointer-lowtag) +=> length will be length of seq1. This is fixnum, so (inst shr length 2) to get real length + +(psrldq xmm0 32) +=> this doesn't work ? because immediate is number of bytes, not bits (like shr). + +(inst mov result 1) +=> page fault ? results should be fixnumized, (inst shl result 2) + diff --git a/detect-simd.lisp b/detect-simd.lisp index cb4bd58..92161d7 100644 --- a/detect-simd.lisp +++ b/detect-simd.lisp @@ -57,11 +57,3 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (inst mov res edx))) - -(eval-when (:load-toplevel) - (let ((res (sb-sys:%primitive sb-vm::%detect-simd/x86))) -;; (format t "res is ~A~%" res) - (if (/= (logand res #b001) 0) (pushnew :sse sb-vm::*backend-subfeatures*)) - (if (/= (logand res #b010) 0) (pushnew :sse2 sb-vm::*backend-subfeatures*)) - (if (/= (logand res #b100) 0) (pushnew :sse3 sb-vm::*backend-subfeatures*)))) - \ No newline at end of file diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index 5de9881..2bea781 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -42,18 +42,6 @@ MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208 -(ib-forms:) -PSLLD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 323 -PSLLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 326 -PSLLQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 328 -PSLLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 330 -PSRAD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 333 -PSRAW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 336 -PSRLD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 339 -PSRLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 342 -PSRLQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 344 -PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 347 - STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410 @@ -339,7 +327,31 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ,@(emit-ops ops-m2r) (emit-ea segment src (reg-tn-encoding dst))) (t ,@(emit-ops ops-r2m) - (emit-ea segment dst (reg-tn-encoding src))))))))) + (emit-ea segment dst (reg-tn-encoding src)))))))) + + ;; misc + (loop for (name mode . opcodes) in + '( + (pslld-ib 6 #x66 #x0F #x72) + (pslldq-ib 7 #x66 #x0F #x73) + (psllq-ib 6 #x66 #x0F #x73) + (psllw-ib 6 #x66 #x0F #x71) + (psrad-ib 4 #x66 #x0F #x72) + (psraw-ib 4 #x66 #x0F #x71) + (psrld-ib 2 #x66 #x0F #x72) + (psrldq-ib 3 #x66 #x0F #x73) + (psrlq-ib 2 #x66 #x0F #x73) + (psrlw-ib 2 #x66 #x0F #x71) + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name name)) (segment dst amount) + (:emitter + ,@(emit-ops opcodes) + (emit-ea segment dst ,mode) + (emit-byte segment amount))))) + + ) (defun gen-ops-to-file (filename) (with-open-file (stream filename :direction :output :if-exists :supersede) diff --git a/load.lisp b/load.lisp index d17b614..e71fcf9 100644 --- a/load.lisp +++ b/load.lisp @@ -14,6 +14,8 @@ (if t (progn (load (compile-file "detect-simd.lisp")) - (load (compile-file "push-simd-features.lisp")) + (load (compile-file "timing.lisp")) + (load (compile-file "sse-seq.lisp")) + (load (compile-file "test-seq.lisp")) )) \ No newline at end of file diff --git a/sbcl-src/src/compiler/x86/insts.lisp b/sbcl-src/src/compiler/x86/insts.lisp index b64678c..3acaa01 100644 --- a/sbcl-src/src/compiler/x86/insts.lisp +++ b/sbcl-src/src/compiler/x86/insts.lisp @@ -39,13 +39,16 @@ #(ax cx dx bx sp bp si di)) (defparameter *dword-reg-names* #(eax ecx edx ebx esp ebp esi edi)) +(defparameter *xmmword-reg-names* + #(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) (defun print-reg-with-width (value width stream dstate) (declare (ignore dstate)) (princ (aref (ecase width (:byte *byte-reg-names*) (:word *word-reg-names*) - (:dword *dword-reg-names*)) + (:dword *dword-reg-names*) + (:xmmword *xmmword-reg-names*)) value) stream) ;; XXX plus should do some source-var notes @@ -193,7 +196,7 @@ (:word 16) (:dword 32) (:qword 64) - (:dqword 128) + (:xmmword 128) (:float 32) (:double 64))) @@ -680,7 +683,7 @@ (defstruct (ea (:constructor make-ea (size &key base index scale disp)) (:copier nil)) - (size nil :type (member :byte :word :dword)) + (size nil :type (member :byte :word :dword :xmmword)) (base nil :type (or tn null)) (index nil :type (or tn null)) (scale 1 :type (member 1 2 4 8)) @@ -720,7 +723,7 @@ (ecase (sb-name (sc-sb (tn-sc thing))) (registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) - (sse-registers + (xmm-registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) (stack ;; Convert stack tns into an index off of EBP. @@ -834,9 +837,18 @@ (and (tn-p thing) (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) -(defun sse-register-p (thing) +(defun xmm-register-p (thing) (and (tn-p thing) - (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) + (eq (sb-name (sc-sb (tn-sc thing))) 'xmm-registers) + (member (sc-name (tn-sc thing)) *xmmword-sc-names*) + t)) + +(defun xmm-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :xmmword)) + (tn + (and (member (sc-name (tn-sc thing)) *xmmword-sc-names*) t)) + (t nil))) (defun accumulator-p (thing) (and (register-p thing) @@ -867,6 +879,8 @@ :float) (#.*double-sc-names* :double) + (#.*xmmword-sc-names* + :xmmword) (t (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) (ea @@ -3210,7 +3224,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3222,7 +3236,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) @@ -3233,7 +3247,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3245,7 +3259,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3257,7 +3271,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 243) @@ -3269,7 +3283,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3281,7 +3295,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) @@ -3292,7 +3306,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3304,7 +3318,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) @@ -3315,7 +3329,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3327,7 +3341,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 242) @@ -3339,7 +3353,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 243) @@ -3351,7 +3365,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3363,13 +3377,93 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 17) (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) +(DEFINE-INSTRUCTION PSLLD-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 114) + (EMIT-EA SEGMENT DST 6) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSLLDQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 7) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSLLQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 6) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSLLW-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 113) + (EMIT-EA SEGMENT DST 6) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRAD-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 114) + (EMIT-EA SEGMENT DST 4) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRAW-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 113) + (EMIT-EA SEGMENT DST 4) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLD-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 114) + (EMIT-EA SEGMENT DST 2) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLDQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 3) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 2) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLW-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 113) + (EMIT-EA SEGMENT DST 2) + (EMIT-BYTE SEGMENT AMOUNT))) + ;;; CPUID diff --git a/sbcl-src/src/compiler/x86/vm.lisp b/sbcl-src/src/compiler/x86/vm.lisp index 1bab6a3..d3fde75 100644 --- a/sbcl-src/src/compiler/x86/vm.lisp +++ b/sbcl-src/src/compiler/x86/vm.lisp @@ -22,7 +22,7 @@ (defvar *word-register-names* (make-array 16 :initial-element nil)) (defvar *dword-register-names* (make-array 16 :initial-element nil)) (defvar *float-register-names* (make-array 8 :initial-element nil)) - (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + (defvar *xmmword-register-names* (make-array 8 :initial-element nil))) (macrolet ((defreg (name offset size) (let ((offset-sym (symbolicate name "-OFFSET")) @@ -93,15 +93,15 @@ (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) ;; sse registers - (defreg xmm0 0 :dqword) - (defreg xmm1 1 :dqword) - (defreg xmm2 2 :dqword) - (defreg xmm3 3 :dqword) - (defreg xmm4 4 :dqword) - (defreg xmm5 5 :dqword) - (defreg xmm6 6 :dqword) - (defreg xmm7 7 :dqword) - (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) + (defreg xmm0 0 :xmmword) + (defreg xmm1 1 :xmmword) + (defreg xmm2 2 :xmmword) + (defreg xmm3 3 :xmmword) + (defreg xmm4 4 :xmmword) + (defreg xmm5 5 :xmmword) + (defreg xmm6 6 :xmmword) + (defreg xmm7 7 :xmmword) + (defregset *xmm-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ;; registers used to pass arguments ;; @@ -130,7 +130,7 @@ ;;; the new way: (define-storage-base float-registers :finite :size 8) -(define-storage-base sse-registers :finite :size 8) +(define-storage-base xmm-registers :finite :size 8) (define-storage-base stack :unbounded :size 8) (define-storage-base constant :non-packed) @@ -334,8 +334,8 @@ :save-p t :alternate-scs (complex-long-stack)) - (sse-reg sse-registers - :locations #.*sse-regs*) + (xmm-reg xmm-registers + :locations #.*xmm-regs*) ;; a catch or unwind block (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) @@ -353,7 +353,7 @@ ;;; These are used to (at least) determine operand size. (defparameter *float-sc-names* '(single-reg)) (defparameter *double-sc-names* '(double-reg double-stack)) -(defparameter *dqword-sc-names* '(sse-reg)) +(defparameter *xmmword-sc-names* '(xmm-reg)) ) ; EVAL-WHEN ;;;; miscellaneous TNs for the various registers @@ -461,7 +461,7 @@ ;; FIXME: Shouldn't this be an ERROR? (format nil "" offset sc-name)))) (float-registers (format nil "FR~D" offset)) - (sse-registers (format nil "XMM~D" offset)) + (xmm-registers (format nil "XMM~D" offset)) (stack (format nil "S~D" offset)) (constant (format nil "Const~D" offset)) (immediate-constant "Immed") diff --git a/sse-seq.lisp b/sse-seq.lisp new file mode 100644 index 0000000..aa43257 --- /dev/null +++ b/sse-seq.lisp @@ -0,0 +1,136 @@ +(in-package :sb-vm) + +(defmacro vect-ea (base &optional idx (width :dword)) + (let ((disp + (if (and idx (numberp idx)) + `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx) + `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)))) + +;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx))) + (if (and idx (symbolp idx)) + `(make-ea ,width :base ,base :index ,idx :disp ,disp) + `(make-ea ,width :base ,base :disp ,disp)))) + + +(DEFINE-VOP (%sse-seq=) + (:POLICY :FAST-SAFE) + (:ARGS (seq1 :SCS (DESCRIPTOR-REG)) + (seq2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES simple-array-unsigned-byte-8 simple-array-unsigned-byte-8 ) + + (:results (RESULT :SCS (DESCRIPTOR-REG))) + + (:result-types fixnum) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + (:TEMPORARY (:SC XMM-REG) X3) + (:TEMPORARY (:SC XMM-REG) X4) + (:TEMPORARY (:SC XMM-REG) X5) + +;; (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx) + + (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index) + (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length) + + (:GENERATOR 10 + + (let ((top (gen-label)) +;; (top2 (gen-label)) + (length-ok (gen-label)) + (fail (gen-label)) + (the-end (gen-label)) + (end (gen-label))) + + (loadw index seq1 vector-length-slot other-pointer-lowtag) + (loadw length seq2 vector-length-slot other-pointer-lowtag) + + ;; same length ? + (inst cmp index length) + (inst jmp :eq length-ok) + + ;; not same length, fail + (inst mov result -1) + (inst jmp end) + + (emit-label length-ok) + + ;; un-fixnumize length + (inst shr length 2) + + ;; calc number of 256bit blocks + (inst shr length (floor (log (/ 256 8) 2))) + + ;; init indices + (inst xor index index) + + ;; zero eq-regs + (inst pxor x4 x4) + (inst pxor x5 x5) + + (emit-label top) + + ;; load first blocks + (inst movdqu x0 (vect-ea seq1 index :xmmword)) + (inst movdqu x1 (vect-ea seq2 index :xmmword)) + + ;; load second blocks + (inst movdqu x2 + (make-ea :xmmword :base seq1 :index index + :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16))) + (inst movdqu x3 + (make-ea :xmmword :base seq2 :index index + :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16))) + + + ;; xor first/second blocks (i.e. if equal, xor will be zero) + (inst pxor x0 x1) + (inst pxor x2 x3) + + ;; add index + (inst add index 32) + + ;; or bits to eq-regs (if not eq, some bits will be nonzero) + (inst por x4 x0) + (inst por x5 x2) + + ;; loop + (inst dec length) + (inst jmp :nz top) + + + ;; all 256bit blocks done + + ;; or each 32bit word from x4 to x5 + (inst por x4 x5) + (inst movdqa x0 x4) + + (inst psrldq-ib x4 4) ;; this is number of bytes, not bits + (inst por x0 x4) + + (inst psrldq-ib x4 4) + (inst por x0 x4) + + (inst psrldq-ib x4 4) + (inst por x0 x4) + + ;; now low 32bits of x0 will be non-zero if seq's not equal + + (inst movd result x0) + + ;; end + (emit-label end) + + (inst test result result) + (inst jmp :nz fail) + + (inst mov result (fixnumize 0)) + (inst jmp the-end) + + (emit-label fail) + (inst mov result (fixnumize 1)) + + (emit-label the-end) + + ))) diff --git a/test-seq.lisp b/test-seq.lisp new file mode 100644 index 0000000..3f15be2 --- /dev/null +++ b/test-seq.lisp @@ -0,0 +1,64 @@ +(in-package :cl-user) + +(declaim (optimize (speed 3) (safety 0) (space 0) (debug 0))) + +(defun sse-seq= (seq1 seq2) + (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) + (multiple-value-bind (256blocks rest) (truncate (length seq1) (floor (log (/ 256 8) 2))) + (declare (ignore rest)) + (and (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0) + (loop for equal = t + for i from (* 256blocks 32) below (length seq1) + when (/= (aref seq1 i) (aref seq2 i)) do (setq equal nil) + finally (return equal))))) + +(defun seq= (seq1 seq2) + (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) + (and (= (length seq1) (length seq2)) + (loop for equal = t + for s1 of-type unsigned-byte across seq1 + for s2 of-type unsigned-byte across seq2 + when (/= s1 s2) do (setq equal nil) (return nil) + finally (return equal)))) + + +(defun test-seq (&optional (test-count 100000)) + (let ((arr1 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr2 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr3 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr4 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + res) + + (loop for i from 0 below (length arr1) + do (setf (aref arr1 i) (mod (* (1+ i) 10) 256) + (aref arr2 i) (aref arr1 i) + (aref arr3 i) (aref arr1 i) + (aref arr4 i) (aref arr1 i) + )) + + (setf (aref arr3 1200) (mod (1+ (aref arr3 1200)) 256) + (aref arr4 256000) (mod (1+ (aref arr4 256000)) 256)) + +;; (time (dotimes (i 100000) (sse-seq= arr1 arr2))) +;; (time (dotimes (i #.(/ 100000 30)) (seq= arr1 arr2))) + + (format t "; seq= a1 a2~%") + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr2))))) + + (format t "; seq= a1 a3~%") + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr3))))) + + (format t "; seq= a2 a4~%") + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr2 arr4))))) + + + (format t "; sse-seq= a1 a2~%") + (time-sample-form #'(lambda () (dotimes (i test-count) (setf res (sse-seq= arr1 arr2))))) + + (format t "; sse-seq= a1 a3~%") + (time-sample-form #'(lambda () (dotimes (i test-count) (setf res (sse-seq= arr1 arr3))))) + + (format t "; sse-seq= a2 a4~%") + (time-sample-form #'(lambda () (dotimes (i test-count) (setf res (sse-seq= arr2 arr4))))) + + )) -- 2.11.4.GIT