*** empty log message ***
[sb-simd.git] / sse-seq.lisp
blobaa43257183568de3e7c1d7bf1c77605596097acd
1 (in-package :sb-vm)
3 (defmacro vect-ea (base &optional idx (width :dword))
4 (let ((disp
5 (if (and idx (numberp idx))
6 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx)
7 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))
9 ;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx)))
10 (if (and idx (symbolp idx))
11 `(make-ea ,width :base ,base :index ,idx :disp ,disp)
12 `(make-ea ,width :base ,base :disp ,disp))))
15 (DEFINE-VOP (%sse-seq=)
16 (:POLICY :FAST-SAFE)
17 (:ARGS (seq1 :SCS (DESCRIPTOR-REG))
18 (seq2 :SCS (DESCRIPTOR-REG)))
19 (:ARG-TYPES simple-array-unsigned-byte-8 simple-array-unsigned-byte-8 )
21 (:results (RESULT :SCS (DESCRIPTOR-REG)))
23 (:result-types fixnum)
25 (:TEMPORARY (:SC XMM-REG) X0)
26 (:TEMPORARY (:SC XMM-REG) X1)
27 (:TEMPORARY (:SC XMM-REG) X2)
28 (:TEMPORARY (:SC XMM-REG) X3)
29 (:TEMPORARY (:SC XMM-REG) X4)
30 (:TEMPORARY (:SC XMM-REG) X5)
32 ;; (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx)
34 (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index)
35 (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length)
37 (:GENERATOR 10
39 (let ((top (gen-label))
40 ;; (top2 (gen-label))
41 (length-ok (gen-label))
42 (fail (gen-label))
43 (the-end (gen-label))
44 (end (gen-label)))
46 (loadw index seq1 vector-length-slot other-pointer-lowtag)
47 (loadw length seq2 vector-length-slot other-pointer-lowtag)
49 ;; same length ?
50 (inst cmp index length)
51 (inst jmp :eq length-ok)
53 ;; not same length, fail
54 (inst mov result -1)
55 (inst jmp end)
57 (emit-label length-ok)
59 ;; un-fixnumize length
60 (inst shr length 2)
62 ;; calc number of 256bit blocks
63 (inst shr length (floor (log (/ 256 8) 2)))
65 ;; init indices
66 (inst xor index index)
68 ;; zero eq-regs
69 (inst pxor x4 x4)
70 (inst pxor x5 x5)
72 (emit-label top)
74 ;; load first blocks
75 (inst movdqu x0 (vect-ea seq1 index :xmmword))
76 (inst movdqu x1 (vect-ea seq2 index :xmmword))
78 ;; load second blocks
79 (inst movdqu x2
80 (make-ea :xmmword :base seq1 :index index
81 :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16)))
82 (inst movdqu x3
83 (make-ea :xmmword :base seq2 :index index
84 :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16)))
87 ;; xor first/second blocks (i.e. if equal, xor will be zero)
88 (inst pxor x0 x1)
89 (inst pxor x2 x3)
91 ;; add index
92 (inst add index 32)
94 ;; or bits to eq-regs (if not eq, some bits will be nonzero)
95 (inst por x4 x0)
96 (inst por x5 x2)
98 ;; loop
99 (inst dec length)
100 (inst jmp :nz top)
103 ;; all 256bit blocks done
105 ;; or each 32bit word from x4 to x5
106 (inst por x4 x5)
107 (inst movdqa x0 x4)
109 (inst psrldq-ib x4 4) ;; this is number of bytes, not bits
110 (inst por x0 x4)
112 (inst psrldq-ib x4 4)
113 (inst por x0 x4)
115 (inst psrldq-ib x4 4)
116 (inst por x0 x4)
118 ;; now low 32bits of x0 will be non-zero if seq's not equal
120 (inst movd result x0)
122 ;; end
123 (emit-label end)
125 (inst test result result)
126 (inst jmp :nz fail)
128 (inst mov result (fixnumize 0))
129 (inst jmp the-end)
131 (emit-label fail)
132 (inst mov result (fixnumize 1))
134 (emit-label the-end)