*** empty log message ***
[sb-simd.git] / sse-matrix.lisp
blobc0f5976efeacb68fc6d96632af55b94d2a0d06e7
1 #|
2 Copyright (c) 2005 Risto Laakso
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 http://developer.intel.com/design/pentiumiii/sml/24504501.pdf
32 (in-package :sb-vm)
34 (defmacro vect-ea (base &optional idx)
35 (let ((disp
36 (if (and idx (numberp idx))
37 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx)
38 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))
40 ;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx)))
41 (if (and idx (symbolp idx))
42 `(make-ea :dword :base ,base :index ,idx :disp ,disp)
43 `(make-ea :dword :base ,base :disp ,disp))))
45 (DEFINE-VOP (%sse-matrix-mul-3x3/single-float)
46 (:POLICY :FAST-SAFE)
47 (:ARGS (RESULT :SCS (DESCRIPTOR-REG))
48 (MAT1 :SCS (DESCRIPTOR-REG))
49 (MAT2 :SCS (DESCRIPTOR-REG)))
50 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT
51 SIMPLE-ARRAY-SINGLE-FLOAT
52 SIMPLE-ARRAY-SINGLE-FLOAT)
54 (:TEMPORARY (:SC XMM-REG) X0)
55 (:TEMPORARY (:SC XMM-REG) X1)
56 (:TEMPORARY (:SC XMM-REG) X2)
57 (:TEMPORARY (:SC XMM-REG) X3)
58 (:TEMPORARY (:SC XMM-REG) X4)
59 (:TEMPORARY (:SC XMM-REG) X5)
60 (:TEMPORARY (:SC XMM-REG) X6)
61 (:TEMPORARY (:SC XMM-REG) X7)
63 (:GENERATOR 10
64 (inst movss x2 (vect-ea mat2 32))
65 (inst movhps x2 (vect-ea mat2 24))
67 (inst movss x3 (vect-ea mat1))
68 (inst movss x4 (vect-ea mat1 4))
70 (inst movss x0 (vect-ea mat2))
71 (inst movhps x0 (vect-ea mat2 4))
72 (inst shufps x2 x2 #X36)
73 (inst shufps x3 x3 0)
75 (inst movss x1 (vect-ea mat2 12))
76 (inst movhps x1 (vect-ea mat2 16))
78 (inst shufps x4 x4 0)
79 (inst mulps x3 x0)
80 (inst movss x5 (vect-ea mat1 8))
81 (inst movss x6 (vect-ea mat1 12))
82 (inst mulps x4 x1)
83 (inst shufps x5 x5 0)
84 (inst mulps x5 x2)
85 (inst shufps x6 x6 0)
86 (inst mulps x6 x0)
87 (inst addps x3 x4)
89 (inst movss x7 (vect-ea mat1 16))
90 (inst movss x4 (vect-ea mat1 28))
92 (inst shufps x7 x7 0)
93 (inst addps x3 x5)
94 (inst mulps x7 x1)
96 (inst shufps x4 x4 0)
98 (inst movss x5 (vect-ea mat1 20))
99 (inst shufps x5 x5 0)
100 (inst mulps x4 x1)
102 (inst mulps x5 x2)
103 (inst addps x6 x7)
105 (inst movss x1 (vect-ea mat1 24))
107 (inst movss (Vect-ea result) x3)
108 (inst movhpd (vect-ea result 4) x3)
110 (inst addps x6 x5)
111 (inst shufps x1 x1 0)
113 (inst movss x5 (vect-ea mat1 32))
114 (inst mulps x1 x0)
115 (inst shufps x5 x5 0)
117 (inst movss (vect-ea result 12) x6)
118 (inst mulps x5 x2)
119 (inst addps x1 x4)
120 (inst movhps (vect-ea result 16) x6)
121 (inst addps x1 x5)
122 (inst shufps x1 x1 #x8F)
124 (inst movhps (vect-ea result 24) x1)
125 (inst movss (vect-ea result 32) x1)