6 (:args
(vector1 :scs
(descriptor-reg))
7 (vector2 :scs
(descriptor-reg)))
8 (:arg-types simple-array-single-float simple-array-single-float
)
10 (:temporary
(:sc unsigned-reg
) index
)
12 ;; (:temporary (:sc unsigned-reg) temp1)
13 ;; (:temporary (:sc unsigned-reg) temp2)
15 (:temporary
(:sc sse-reg
) sse-temp1
)
16 (:temporary
(:sc sse-reg
) sse-temp2
)
20 (inst xor index index
)
23 (inst movups sse-temp1
24 (make-ea :dword
:base vector1
:index index
25 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
26 (inst movups sse-temp2
27 (make-ea :dword
:base vector2
:index index
28 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
)))
30 (inst addps sse-temp1 sse-temp2
)
35 (make-ea :dword
:base vector1
:index index
36 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
))
41 (make-ea :dword
:base vector1
:index index
42 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
))
47 (make-ea :dword
:base vector1
:index index
48 :disp
(- (* vector-data-offset n-word-bytes
) other-pointer-lowtag
))
55 0: 31 c0 xor %eax
,%eax
2: 0f
10 04 c6 movups
(%esi
,%eax
,8),%xmm0
56 6: 0f
10 0c c7 movups
(%edi
,%eax
,8),%xmm1
57 a
: 0f
58 c1 addps %xmm1
,%xmm0
58 d
: 0f
11 44 c5
00 movups %xmm0
,0x0(%ebp
,%eax
,8)
61 0: 31 c0 xor %eax
,%eax
62 2: 0f
10 44 03 01 movups
0x1(%ebx
,%eax
,1),%xmm0
63 7: 0f
10 4c
01 01 movups
0x1(%ecx
,%eax
,1),%xmm1
64 c
: 0f
58 c1 addps %xmm1
,%xmm0
65 f
: 0f
11 44 01 01 movups %xmm0
,0x1(%ecx
,%eax
,1)
68 2: 0f
10 43 01 movups
0x1(%ebx
),%xmm0
69 6: 0f
10 49 01 movups
0x1(%ecx
),%xmm1
70 a
: 0f
58 c1 addps %xmm1
,%xmm0
71 d
: 0f
11 41 01 movups %xmm0
,0x1(%ecx
)
74 2: 0f
10 44 1a
01 movups
0x1(%edx
,%ebx
,1),%xmm0
75 7: 0f
10 4c
0e
01 movups
0x1(%esi
,%ecx
,1),%xmm1
76 c
: 0f
58 c1 addps %xmm1
,%xmm0
77 f
: 0f
11 41 01 movups %xmm0
,0x1(%ecx
)
82 V
= 128bit xmm reg specified by the modrm reg field.
83 W
= 128bit xmm register or mem op specified by the modrm byte.
84 ps
= 128bit single-precision float operand
86 movups xmm0
, [ebx
+ 01]
87 movups md reg r
/m sc idx bse disp8
88 0f
10 01 000 100 00 000 011 01
89 +d8 xm0 sib
*0 +0 ebx
+01
92 ; 43E: L4: 31C0 XOR EAX, EAX
97 44h
= b
0 1 0 0 0 1 0 0
98 4Ch
= b
0 1 0 0 1 1 0 0
99 64h
= b
0 1 1 0 0 1 0 0
100 E0h
= b
1 1 1 0 0 0 0 0
101 C1h
= b
1 1 0 0 0 0 0 1
102 43h
= b
0 1 0 0 0 0 1 1
103 49h
= b
0 1 0 0 1 0 0 1
105 r
/m b100
=> has sib byte
108 r
/m
= 000, 001, 010, 011, 100, 101, 110 , 111
110 00 = ax
, cx
, dx
, bx
, sib
, rip
+d32
, si
, di
111 01 = --||--
+ disp8
, bp
+disp8
, ..
113 11 = al
/ax
/eax
/mmx0
/xmm0
, 1, 2, 3, 4, 5, 6, 7
116 000 001 010 011 100 101 110 111
117 reg32 eax ecx edx ebx esp ebp esi edi
118 xmm xm0 xm1 xm2 xm3 xm4 xm5 xm6 xm7
;; actually xmm0..xmm7
121 44h
= md
01, r
/m
100, reg
000, => xmm0
, [sib
+ disp8
] => 44 03 01 : xmm0
, [ebx
+ 01], 44 01 01 : xmm0
, [ecx
+ 01]
122 64h
= md
01, r
/m
100, reg
100, => xmm4
, [sib
+ disp8
] => xmm4
, [ebx
+ 01]
123 04 C6
= md
00, r
/m
100, reg
000, => xmm0
, [sib] => xmm0, [esi*8]
124 4C 01 01 = md 01, reg 001, r/m 100 => xmm1, [sib + disp8] => [ecx + 01]
125 43h = md 01, reg 0, r/m 011 => xmm0, [ebx + 01]
126 49h = md 01, reg 1, r/m 001 => xmm1, [ecx + 01]
132 03h = b 0 0 0 0 0 0 1 1 = eax + ebx*1
133 01h = b 0 0 0 0 0 0 0 1 = eax + ecx*1
134 C6h = b 1 1 0 0 0 1 1 0 = eax + esi*8
135 1Ah = b 0 0 0 1 1 0 1 0 = ebx + edx*1
136 0Eh = b 0 0 0 0 1 1 1 0 = ecx + esi*1
139 ;; movups xmm0, ea 0F 10 44 03 01
140 ; 440: 0F 10 44 01 01 movups xmm0, [eax + ecx + 01]
142 ;; movups xmm1, ea 0F 10 4C 01 01
143 ; 445: 0F 10 64 03 01 movups xmm4, [eax + ebx + 01]
145 ;; addps xmm0, xmm1 0F 58 C1
146 ; 44A: 0F 58 E0 addps xmm0, xmm4
148 ;; movups ea, xmm0 0f 11 44 01 01
149 0F 11 44 01 01 movups [eax + ecx + 01], xmm0
151 ; 452: 83C004 ADD EAX, 4
156 c: 0f 58 c4 addps %xmm4,%xmm0
157 f: 0f 58 e0 addps %xmm0,%xmm4