3 (defmacro vect-ea
(base &optional idx
(width :dword
))
5 (if (and idx
(numberp idx
))
6 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
) ,idx
)
7 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
))))
9 ;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx)))
10 (if (and idx
(symbolp idx
))
11 `(make-ea ,width
:base
,base
:index
,idx
:disp
,disp
)
12 `(make-ea ,width
:base
,base
:disp
,disp
))))
15 (DEFINE-VOP (%sse-seq
=)
17 (:ARGS
(seq1 :SCS
(DESCRIPTOR-REG))
18 (seq2 :SCS
(DESCRIPTOR-REG)))
19 (:ARG-TYPES simple-array-unsigned-byte-8 simple-array-unsigned-byte-8
)
21 (:results
(RESULT :SCS
(DESCRIPTOR-REG)))
23 (:result-types fixnum
)
25 (:TEMPORARY
(:SC XMM-REG
) X0
)
26 (:TEMPORARY
(:SC XMM-REG
) X1
)
27 (:TEMPORARY
(:SC XMM-REG
) X2
)
28 (:TEMPORARY
(:SC XMM-REG
) X3
)
29 (:TEMPORARY
(:SC XMM-REG
) X4
)
30 (:TEMPORARY
(:SC XMM-REG
) X5
)
32 ;; (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx)
34 (:TEMPORARY
(:SC unsigned-reg
:offset ebx-offset
) index
)
35 (:TEMPORARY
(:SC unsigned-reg
:offset ecx-offset
) length
)
39 (let ((top (gen-label))
41 (length-ok (gen-label))
46 (loadw index seq1 vector-length-slot other-pointer-lowtag
)
47 (loadw length seq2 vector-length-slot other-pointer-lowtag
)
50 (inst cmp index length
)
51 (inst jmp
:eq length-ok
)
53 ;; not same length, fail
57 (emit-label length-ok
)
59 ;; un-fixnumize length
62 ;; calc number of 256bit blocks
63 (inst shr length
(floor (log (/ 256 8) 2)))
66 (inst xor index index
)
75 (inst movdqu x0
(vect-ea seq1 index
:xmmword
))
76 (inst movdqu x1
(vect-ea seq2 index
:xmmword
))
80 (make-ea :xmmword
:base seq1
:index index
81 :disp
(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
) 16)))
83 (make-ea :xmmword
:base seq2
:index index
84 :disp
(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
) 16)))
87 ;; xor first/second blocks (i.e. if equal, xor will be zero)
94 ;; or bits to eq-regs (if not eq, some bits will be nonzero)
103 ;; all 256bit blocks done
105 ;; or each 32bit word from x4 to x5
109 (inst psrldq-ib x4
4) ;; this is number of bytes, not bits
112 (inst psrldq-ib x4
4)
115 (inst psrldq-ib x4
4)
118 ;; now low 32bits of x0 will be non-zero if seq's not equal
120 (inst movd result x0
)
125 (inst test result result
)
128 (inst mov result
(fixnumize 0))
132 (inst mov result
(fixnumize 1))