1 ;;; From sb-devel post by Christophe Rhodes
4 (defun %vector
+ (result vector1 vector2
)
5 (loop for x across vector1
8 do
(setf (aref result i
) (+ x y
))))
13 (ignore-errors (defknown cl-user
::%vector
+ (vector vector vector
) vector
))
17 (pushnew :sse2
*backend-subfeatures
*)
19 (define-vop (vector+/simple-array-signed-byte-30
)
20 (:translate cl-user
::%vector
+)
22 (:args
(result :scs
(descriptor-reg))
23 (vector1 :scs
(descriptor-reg))
24 (vector2 :scs
(descriptor-reg)))
25 (:arg-types simple-array-signed-byte-30 simple-array-signed-byte-30 simple-array-signed-byte-30
)
26 (:temporary
(:sc any-reg
) temp
)
27 (:temporary
(:sc unsigned-reg
) index
)
28 (:temporary
(:sc unsigned-reg
) length
)
30 (let ((top (gen-label))
32 (loadw length result vector-length-slot other-pointer-lowtag
)
33 ;; check that the result vector doesn't have length 0
37 (inst xor index index
)
39 ;; this is wasteful if one of the arguments is the same as the
40 ;; result; we can save a mov
41 (inst mov temp
(make-ea :dword
:base vector1
:index index
42 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
43 (inst add temp
(make-ea :dword
:base vector2
:index index
44 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
45 (inst mov
(make-ea :dword
:base result
:index index
46 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)) temp
)
47 (inst add index
(fixnumize 1))
48 (inst cmp index length
)
52 (define-vop (vector+/simple-array-signed-byte-30-sse
)
53 (:translate cl-user
::%vector
+)
55 (:args
(result :scs
(descriptor-reg))
56 (vector1 :scs
(descriptor-reg))
57 (vector2 :scs
(descriptor-reg)))
58 (:arg-types simple-array-signed-byte-30 simple-array-signed-byte-30 simple-array-signed-byte-30
)
59 (:temporary
(:sc any-reg
) temp
)
60 (:temporary
(:sc sse-reg
) sse-temp1
)
61 (:temporary
(:sc sse-reg
) sse-temp2
)
62 (:temporary
(:sc unsigned-reg
) index
)
63 (:temporary
(:sc unsigned-reg
) length
)
64 (:guard
(member :sse2
*backend-subfeatures
*))
66 (let ((top (gen-label))
70 (loadw length result vector-length-slot other-pointer-lowtag
)
71 ;; check that the result vector doesn't have length 0
75 (inst xor index index
)
79 (inst mov temp
(make-ea :dword
:base vector1
:index index
80 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
81 (inst add temp
(make-ea :dword
:base vector2
:index index
82 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
83 (inst mov
(make-ea :dword
:base result
:index index
84 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)) temp
)
85 (inst add index
(fixnumize 1))
86 (inst cmp index length
)
89 ;; eventually at this point we put in a quadword add, but that
90 ;; would be one more instruction to write.
92 (inst mov temp
(make-ea :dword
:base vector1
:index index
93 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
94 (inst add temp
(make-ea :dword
:base vector2
:index index
95 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
96 (inst mov
(make-ea :dword
:base result
:index index
97 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)) temp
)
98 (inst add index
(fixnumize 1))
99 (inst cmp index length
)
101 (inst mov temp
(make-ea :dword
:base vector1
:index index
102 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
103 (inst add temp
(make-ea :dword
:base vector2
:index index
104 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
105 (inst mov
(make-ea :dword
:base result
:index index
106 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)) temp
)
107 (inst add index
(fixnumize 1))
108 (inst cmp index length
)
111 ;; here, we do double quadword additions until we hit the end of
112 ;; the computation. No guarantees about alignment, so we have to
114 (inst movdqu sse-temp1
(make-ea :dword
:base vector1
:index index
115 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
116 ;; KLUDGE: We're using :dword EAs here. This is possibly non-optimal.
117 (inst movdqu sse-temp2
(make-ea :dword
:base vector2
:index index
118 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
119 (inst paddd sse-temp1 sse-temp2
)
120 (inst movdqu sse-temp1
(make-ea :dword
:base result
:index index
121 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
122 (inst add index
(fixnumize 4))
123 (inst cmp index length
)