From e3bd32ef9364fddd4afebd711aa527366a00882b Mon Sep 17 00:00:00 2001 From: rlaakso Date: Fri, 12 Aug 2005 14:09:53 +0000 Subject: [PATCH] *** empty log message *** --- expand-parse-operand-temp-count.lisp | 8 ++++ load.lisp | 1 + push-simd-features.lisp | 9 +++++ sse-seq.lisp | 72 +++++++++++++++++++++++------------- test-seq.lisp | 27 +++++--------- 5 files changed, 75 insertions(+), 42 deletions(-) create mode 100644 expand-parse-operand-temp-count.lisp create mode 100644 push-simd-features.lisp diff --git a/expand-parse-operand-temp-count.lisp b/expand-parse-operand-temp-count.lisp new file mode 100644 index 0000000..4bfaba1 --- /dev/null +++ b/expand-parse-operand-temp-count.lisp @@ -0,0 +1,8 @@ +(in-package :sb-c) + +(setf *parse-vop-operand-count* 1) +(dotimes (i 20) + (make-operand-parse-temp) + (make-operand-parse-load-tn) + (incf *parse-vop-operand-count*)) + diff --git a/load.lisp b/load.lisp index e71fcf9..673a676 100644 --- a/load.lisp +++ b/load.lisp @@ -14,6 +14,7 @@ (if t (progn (load (compile-file "detect-simd.lisp")) + (load (compile-file "expand-parse-operand-temp-count.lisp")) (load (compile-file "timing.lisp")) (load (compile-file "sse-seq.lisp")) (load (compile-file "test-seq.lisp")) diff --git a/push-simd-features.lisp b/push-simd-features.lisp new file mode 100644 index 0000000..c9cbbb7 --- /dev/null +++ b/push-simd-features.lisp @@ -0,0 +1,9 @@ +(in-package :sb-vm) + +(eval-when (:load-toplevel) + (let ((res (sb-sys:%primitive sb-vm::%detect-simd/x86))) +;; (format t "res is ~A~%" res) + (if (/= (logand res #b001) 0) (pushnew :sse sb-vm::*backend-subfeatures*)) + (if (/= (logand res #b010) 0) (pushnew :sse2 sb-vm::*backend-subfeatures*)) + (if (/= (logand res #b100) 0) (pushnew :sse3 sb-vm::*backend-subfeatures*)))) + \ No newline at end of file diff --git a/sse-seq.lisp b/sse-seq.lisp index aa43257..7303a60 100644 --- a/sse-seq.lisp +++ b/sse-seq.lisp @@ -29,15 +29,15 @@ (:TEMPORARY (:SC XMM-REG) X4) (:TEMPORARY (:SC XMM-REG) X5) -;; (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx) - + (:TEMPORARY (:SC unsigned-reg :offset eax-offset :to (:result 0)) temp1) + (:TEMPORARY (:SC unsigned-reg :offset edx-offset) temp2) (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index) (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length) (:GENERATOR 10 (let ((top (gen-label)) -;; (top2 (gen-label)) + (top2 (gen-label)) (length-ok (gen-label)) (fail (gen-label)) (the-end (gen-label)) @@ -51,8 +51,7 @@ (inst jmp :eq length-ok) ;; not same length, fail - (inst mov result -1) - (inst jmp end) + (inst jmp fail) (emit-label length-ok) @@ -66,8 +65,8 @@ (inst xor index index) ;; zero eq-regs - (inst pxor x4 x4) - (inst pxor x5 x5) +;; (inst pxor x4 x4) +;; (inst pxor x5 x5) (emit-label top) @@ -75,6 +74,9 @@ (inst movdqu x0 (vect-ea seq1 index :xmmword)) (inst movdqu x1 (vect-ea seq2 index :xmmword)) + (inst pxor x4 x4) + (inst pxor x5 x5) + ;; load second blocks (inst movdqu x2 (make-ea :xmmword :base seq1 :index index @@ -91,9 +93,18 @@ ;; add index (inst add index 32) - ;; or bits to eq-regs (if not eq, some bits will be nonzero) - (inst por x4 x0) - (inst por x5 x2) + ;; check for non-equality + (inst pcmpeqd x4 x0) + (inst pcmpeqd x5 x2) + + (inst pmovmskb temp1 x4) + (inst pmovmskb temp2 x5) + + (inst cmp temp1 #x0000FFFF) + (inst jmp :ne fail) + + (inst cmp temp2 #x0000FFFF) + (inst jmp :ne fail) ;; loop (inst dec length) @@ -102,35 +113,46 @@ ;; all 256bit blocks done - ;; or each 32bit word from x4 to x5 - (inst por x4 x5) - (inst movdqa x0 x4) - (inst psrldq-ib x4 4) ;; this is number of bytes, not bits - (inst por x0 x4) + ;; check remaining bytes + (loadw length seq1 vector-length-slot other-pointer-lowtag) + (inst shr length 2) + (inst and length (1- (/ 256 8))) + + ;; no bytes left ? + (inst test length length) + (inst jmp :z end) - (inst psrldq-ib x4 4) - (inst por x0 x4) + (inst xor temp1 temp1) + (inst xor temp2 temp2) - (inst psrldq-ib x4 4) - (inst por x0 x4) + (emit-label top2) - ;; now low 32bits of x0 will be non-zero if seq's not equal + ;; test bytes + (inst movzx temp1 (vect-ea seq1 index :byte)) + (inst movzx temp2 (vect-ea seq2 index :byte)) + (inst xor temp1 temp2) + (inst inc index) - (inst movd result x0) + ;; if not zero, fail + (inst test temp1 temp1) + (inst jmp :nz fail) + + ;; loop + (inst dec length) + (inst jmp :nz top2) ;; end (emit-label end) - (inst test result result) - (inst jmp :nz fail) - (inst mov result (fixnumize 0)) (inst jmp the-end) + ;; fail (emit-label fail) (inst mov result (fixnumize 1)) - + + ;; the-end (emit-label the-end) ))) diff --git a/test-seq.lisp b/test-seq.lisp index 3f15be2..4a630cf 100644 --- a/test-seq.lisp +++ b/test-seq.lisp @@ -3,14 +3,7 @@ (declaim (optimize (speed 3) (safety 0) (space 0) (debug 0))) (defun sse-seq= (seq1 seq2) - (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) - (multiple-value-bind (256blocks rest) (truncate (length seq1) (floor (log (/ 256 8) 2))) - (declare (ignore rest)) - (and (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0) - (loop for equal = t - for i from (* 256blocks 32) below (length seq1) - when (/= (aref seq1 i) (aref seq2 i)) do (setq equal nil) - finally (return equal))))) + (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0)) (defun seq= (seq1 seq2) (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) @@ -22,11 +15,11 @@ finally (return equal)))) -(defun test-seq (&optional (test-count 100000)) - (let ((arr1 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) - (arr2 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) - (arr3 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) - (arr4 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) +(defun test-seq (&optional (test-count 50000)) + (let ((arr1 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr2 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr3 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr4 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) res) (loop for i from 0 below (length arr1) @@ -37,19 +30,19 @@ )) (setf (aref arr3 1200) (mod (1+ (aref arr3 1200)) 256) - (aref arr4 256000) (mod (1+ (aref arr4 256000)) 256)) + (aref arr4 (- (length arr4) 2)) (mod (1+ (aref arr4 (- (length arr4) 2))) 256)) ;; (time (dotimes (i 100000) (sse-seq= arr1 arr2))) ;; (time (dotimes (i #.(/ 100000 30)) (seq= arr1 arr2))) (format t "; seq= a1 a2~%") - (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr2))))) + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr1 arr2))))) (format t "; seq= a1 a3~%") - (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr3))))) + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr1 arr3))))) (format t "; seq= a2 a4~%") - (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr2 arr4))))) + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr2 arr4))))) (format t "; sse-seq= a1 a2~%") -- 2.11.4.GIT