1 static void vneg (float *res
, float *v
)
8 static void vcopy (float *res
, float *v
)
15 static void vadd (float *res
, float *v1
, float *v2
)
17 res
[0] = v1
[0] + v2
[0];
18 res
[1] = v1
[1] + v2
[1];
19 res
[2] = v1
[2] + v2
[2];
22 static void vsub (float *res
, float *v1
, float *v2
)
24 res
[0] = v1
[0] - v2
[0];
25 res
[1] = v1
[1] - v2
[1];
26 res
[2] = v1
[2] - v2
[2];
29 static void qconjugate (float *res
, float *q
)
35 static float qmagnitude (float *q
)
37 return sqrt (q
[0]*q
[0]+q
[1]*q
[1]+q
[2]*q
[2]+q
[3]*q
[3]);
40 static void qapply (float *res
, float *q
, float *v
)
49 float t2
, t3
, t4
, t5
, t6
, t7
, t8
, t9
, t10
;
59 res
[0] = 2*( (t8
+ t10
)*v1
+ (t6
- t4
)*v2
+ (t3
+ t7
)*v3
) + v1
;
60 res
[1] = 2*( (t4
+ t6
)*v1
+ (t5
+ t10
)*v2
+ (t9
- t2
)*v3
) + v2
;
61 res
[2] = 2*( (t7
- t3
)*v1
+ (t2
+ t9
)*v2
+ (t5
+ t8
)*v3
) + v3
;
64 static void qscale (float *res
, float *q
, float scale
)
66 *res
++ = *q
++ * scale
;
67 *res
++ = *q
++ * scale
;
68 *res
++ = *q
++ * scale
;
69 *res
++ = *q
++ * scale
;
72 static void qcompose (float *res
, float *q1
, float *q2
)
74 res
[0] = q1
[3]*q2
[0] + q1
[0]*q2
[3] + q1
[1]*q2
[2] - q1
[2]*q2
[1];
75 res
[1] = q1
[3]*q2
[1] + q1
[1]*q2
[3] + q1
[2]*q2
[0] - q1
[0]*q2
[2];
76 res
[2] = q1
[3]*q2
[2] + q1
[2]*q2
[3] + q1
[0]*q2
[1] - q1
[1]*q2
[0];
77 res
[3] = q1
[3]*q2
[3] - q1
[0]*q2
[0] - q1
[1]*q2
[1] - q1
[2]*q2
[2];
80 static void q2matrix (float *mat
, float *q
, float *v
)
99 mat
[0] = 1 - 2 * ( yy
+ zz
);
100 mat
[1] = 2 * ( xy
- zw
);
101 mat
[2] = 2 * ( xz
+ yw
);
103 mat
[4] = 2 * ( xy
+ zw
);
104 mat
[5] = 1 - 2 * ( xx
+ zz
);
105 mat
[6] = 2 * ( yz
- xw
);
107 mat
[8] = 2 * ( xz
- yw
);
108 mat
[9] = 2 * ( yz
+ xw
);
109 mat
[10] = 1 - 2 * ( xx
+ yy
);
126 #define simd_alloc memalign
127 #define A16 __attribute__ ((aligned (16)))
129 static void mscale (float *res
, float *m
, float s
)
131 vector
float vs
= {s
,s
,s
,s
};
132 vector
float r0
= vec_ld (0, m
) * vs
;
133 vector
float r1
= vec_ld (16, m
) * vs
;
134 vector
float r2
= vec_ld (32, m
) * vs
;
135 vector
float r3
= vec_ld (48, m
) * vs
;
137 vec_st (r1
, 16, res
);
138 vec_st (r2
, 32, res
);
139 vec_st (r3
, 48, res
);
142 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
143 static void mapply_to_point (float *res
, float *m
, float *v
)
145 vector
float vv
= vec_ld (0, v
);
146 vector
float r0
= vec_ld (0, m
);
147 vector
float r1
= vec_ld (16, m
);
148 vector
float r2
= vec_ld (32, m
);
149 vector
float r4
= vec_ld (48, m
);
150 vector
float x
= vec_splat (vv
, 0);
151 vector
float y
= vec_splat (vv
, 1);
152 vector
float z
= vec_splat (vv
, 2);
153 vector
float vr1
= vec_madd (r0
, x
, r4
);
154 vector
float vr2
= vec_madd (r1
, y
, vr1
);
155 vector
float vr3
= vec_madd (r2
, z
, vr2
);
156 vec_st (vr3
, 0, res
);
159 static void mapply_to_vector (float *res
, float *m
, float *v
)
161 vector
float vv
= vec_ld (0, v
);
162 vector
float r0
= vec_ld (0, m
);
163 vector
float r1
= vec_ld (16, m
);
164 vector
float r2
= vec_ld (32, m
);
165 vector
float vz
= (vector
float) vec_splat_u32 (0);
166 vector
float x
= vec_splat (vv
, 0);
167 vector
float y
= vec_splat (vv
, 1);
168 vector
float z
= vec_splat (vv
, 2);
169 vector
float vr1
= vec_madd (r0
, x
, vz
);
170 vector
float vr2
= vec_madd (r1
, y
, vr1
);
171 vector
float vr3
= vec_madd (r2
, z
, vr2
);
172 vec_st (vr3
, 0, res
);
175 static void vaddto (float *v1
, float *v2
)
177 vector
float a
= vec_ld (0, v1
);
178 vector
float b
= vec_ld (0, v2
);
179 vec_st (vec_add (a
, b
), 0, v1
);
184 #define simd_alloc(a, s) stat_alloc (s)
187 static void mscale (float *res
, float *m
, float s
)
190 for (i
= 0; i
< 12; ++i
) *res
++ = *m
++ * s
;
193 static void mapply_to_point (float *res
, float *m
, float *v
)
198 res
[0] = x
*m
[0] + y
*m
[4] + z
*m
[8] + m
[3];
199 res
[1] = x
*m
[1] + y
*m
[5] + z
*m
[9] + m
[7];
200 res
[2] = x
*m
[2] + y
*m
[6] + z
*m
[10] + m
[11];
203 static void mapply_to_vector (float *res
, float *m
, float *v
)
208 res
[0] = x
*m
[0] + y
*m
[4] + z
*m
[8];
209 res
[1] = x
*m
[1] + y
*m
[5] + z
*m
[9];
210 res
[2] = x
*m
[2] + y
*m
[6] + z
*m
[10];
213 static void vaddto (float *v1
, float *v2
)