2 Copyright
(c) 2005 Risto Laakso
5 Redistribution and use in source and binary forms
, with or without
6 modification
, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice
, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice
, this list of conditions and the following disclaimer in the
12 documentation and
/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR
``AS IS
'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES
, INCLUDING
, BUT NOT LIMITED TO
, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT
, INDIRECT
,
20 INCIDENTAL
, SPECIAL
, EXEMPLARY
, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT
21 NOT LIMITED TO
, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES
; LOSS OF USE,
22 DATA
, OR PROFITS
; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY
, WHETHER IN CONTRACT
, STRICT LIABILITY
, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE
) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE
, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 http
://developer.intel.com
/design
/pentiumiii
/sml
/24504501.pdf
34 (defmacro vect-ea
(base &optional idx
)
36 (if (and idx
(numberp idx
))
37 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
) ,idx
)
38 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES
) OTHER-POINTER-LOWTAG
))))
40 ;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx)))
41 (if (and idx
(symbolp idx
))
42 `(make-ea :dword
:base
,base
:index
,idx
:disp
,disp
)
43 `(make-ea :dword
:base
,base
:disp
,disp
))))
45 (DEFINE-VOP (%sse-matrix-mul-3x3
/single-float
)
47 (:ARGS
(RESULT :SCS
(DESCRIPTOR-REG))
48 (MAT1 :SCS
(DESCRIPTOR-REG))
49 (MAT2 :SCS
(DESCRIPTOR-REG)))
50 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT
51 SIMPLE-ARRAY-SINGLE-FLOAT
52 SIMPLE-ARRAY-SINGLE-FLOAT
)
54 (:TEMPORARY
(:SC XMM-REG
) X0
)
55 (:TEMPORARY
(:SC XMM-REG
) X1
)
56 (:TEMPORARY
(:SC XMM-REG
) X2
)
57 (:TEMPORARY
(:SC XMM-REG
) X3
)
58 (:TEMPORARY
(:SC XMM-REG
) X4
)
59 (:TEMPORARY
(:SC XMM-REG
) X5
)
60 (:TEMPORARY
(:SC XMM-REG
) X6
)
61 (:TEMPORARY
(:SC XMM-REG
) X7
)
64 (inst movss x2
(vect-ea mat2
32))
65 (inst movhps x2
(vect-ea mat2
24))
67 (inst movss x3
(vect-ea mat1
))
68 (inst movss x4
(vect-ea mat1
4))
70 (inst movss x0
(vect-ea mat2
))
71 (inst movhps x0
(vect-ea mat2
4))
72 (inst shufps x2 x2
#X36
)
75 (inst movss x1
(vect-ea mat2
12))
76 (inst movhps x1
(vect-ea mat2
16))
80 (inst movss x5
(vect-ea mat1
8))
81 (inst movss x6
(vect-ea mat1
12))
89 (inst movss x7
(vect-ea mat1
16))
90 (inst movss x4
(vect-ea mat1
28))
98 (inst movss x5
(vect-ea mat1
20))
105 (inst movss x1
(vect-ea mat1
24))
107 (inst movss
(Vect-ea result
) x3
)
108 (inst movhps
(vect-ea result
4) x3
)
111 (inst shufps x1 x1
0)
113 (inst movss x5
(vect-ea mat1
32))
115 (inst shufps x5 x5
0)
117 (inst movss
(vect-ea result
12) x6
)
120 (inst movhps
(vect-ea result
16) x6
)
122 (inst shufps x1 x1
#x8F
)
124 (inst movhps
(vect-ea result
24) x1
)
125 (inst movss
(vect-ea result
32) x1
)