*** empty log message ***
[sb-simd.git] / sse-seq.lisp
blob7303a600ea1864e03f98a3c431943928e5555109
1 (in-package :sb-vm)
3 (defmacro vect-ea (base &optional idx (width :dword))
4 (let ((disp
5 (if (and idx (numberp idx))
6 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx)
7 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))
9 ;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx)))
10 (if (and idx (symbolp idx))
11 `(make-ea ,width :base ,base :index ,idx :disp ,disp)
12 `(make-ea ,width :base ,base :disp ,disp))))
15 (DEFINE-VOP (%sse-seq=)
16 (:POLICY :FAST-SAFE)
17 (:ARGS (seq1 :SCS (DESCRIPTOR-REG))
18 (seq2 :SCS (DESCRIPTOR-REG)))
19 (:ARG-TYPES simple-array-unsigned-byte-8 simple-array-unsigned-byte-8 )
21 (:results (RESULT :SCS (DESCRIPTOR-REG)))
23 (:result-types fixnum)
25 (:TEMPORARY (:SC XMM-REG) X0)
26 (:TEMPORARY (:SC XMM-REG) X1)
27 (:TEMPORARY (:SC XMM-REG) X2)
28 (:TEMPORARY (:SC XMM-REG) X3)
29 (:TEMPORARY (:SC XMM-REG) X4)
30 (:TEMPORARY (:SC XMM-REG) X5)
32 (:TEMPORARY (:SC unsigned-reg :offset eax-offset :to (:result 0)) temp1)
33 (:TEMPORARY (:SC unsigned-reg :offset edx-offset) temp2)
34 (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index)
35 (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length)
37 (:GENERATOR 10
39 (let ((top (gen-label))
40 (top2 (gen-label))
41 (length-ok (gen-label))
42 (fail (gen-label))
43 (the-end (gen-label))
44 (end (gen-label)))
46 (loadw index seq1 vector-length-slot other-pointer-lowtag)
47 (loadw length seq2 vector-length-slot other-pointer-lowtag)
49 ;; same length ?
50 (inst cmp index length)
51 (inst jmp :eq length-ok)
53 ;; not same length, fail
54 (inst jmp fail)
56 (emit-label length-ok)
58 ;; un-fixnumize length
59 (inst shr length 2)
61 ;; calc number of 256bit blocks
62 (inst shr length (floor (log (/ 256 8) 2)))
64 ;; init indices
65 (inst xor index index)
67 ;; zero eq-regs
68 ;; (inst pxor x4 x4)
69 ;; (inst pxor x5 x5)
71 (emit-label top)
73 ;; load first blocks
74 (inst movdqu x0 (vect-ea seq1 index :xmmword))
75 (inst movdqu x1 (vect-ea seq2 index :xmmword))
77 (inst pxor x4 x4)
78 (inst pxor x5 x5)
80 ;; load second blocks
81 (inst movdqu x2
82 (make-ea :xmmword :base seq1 :index index
83 :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16)))
84 (inst movdqu x3
85 (make-ea :xmmword :base seq2 :index index
86 :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16)))
89 ;; xor first/second blocks (i.e. if equal, xor will be zero)
90 (inst pxor x0 x1)
91 (inst pxor x2 x3)
93 ;; add index
94 (inst add index 32)
96 ;; check for non-equality
97 (inst pcmpeqd x4 x0)
98 (inst pcmpeqd x5 x2)
100 (inst pmovmskb temp1 x4)
101 (inst pmovmskb temp2 x5)
103 (inst cmp temp1 #x0000FFFF)
104 (inst jmp :ne fail)
106 (inst cmp temp2 #x0000FFFF)
107 (inst jmp :ne fail)
109 ;; loop
110 (inst dec length)
111 (inst jmp :nz top)
114 ;; all 256bit blocks done
117 ;; check remaining bytes
118 (loadw length seq1 vector-length-slot other-pointer-lowtag)
119 (inst shr length 2)
120 (inst and length (1- (/ 256 8)))
122 ;; no bytes left ?
123 (inst test length length)
124 (inst jmp :z end)
126 (inst xor temp1 temp1)
127 (inst xor temp2 temp2)
129 (emit-label top2)
131 ;; test bytes
132 (inst movzx temp1 (vect-ea seq1 index :byte))
133 (inst movzx temp2 (vect-ea seq2 index :byte))
134 (inst xor temp1 temp2)
135 (inst inc index)
137 ;; if not zero, fail
138 (inst test temp1 temp1)
139 (inst jmp :nz fail)
141 ;; loop
142 (inst dec length)
143 (inst jmp :nz top2)
145 ;; end
146 (emit-label end)
148 (inst mov result (fixnumize 0))
149 (inst jmp the-end)
151 ;; fail
152 (emit-label fail)
153 (inst mov result (fixnumize 1))
155 ;; the-end
156 (emit-label the-end)