fix issue with HBC stub + 64B L2 cache (tueidj)
[libogc.git] / libogc / gu.c
blob475e8e4b0763382a370166881a69e7db17125a3a
1 #include <gu.h>
2 #include <math.h>
4 extern void __ps_guMtxRotAxisRadInternal(register Mtx mt,const register guVector *axis,register f32 sT,register f32 cT);
6 void guFrustum(Mtx44 mt,f32 t,f32 b,f32 l,f32 r,f32 n,f32 f)
8 f32 tmp;
10 tmp = 1.0f/(r-l);
11 mt[0][0] = (2*n)*tmp;
12 mt[0][1] = 0.0f;
13 mt[0][2] = (r+l)*tmp;
14 mt[0][3] = 0.0f;
16 tmp = 1.0f/(t-b);
17 mt[1][0] = 0.0f;
18 mt[1][1] = (2*n)*tmp;
19 mt[1][2] = (t+b)*tmp;
20 mt[1][3] = 0.0f;
22 tmp = 1.0f/(f-n);
23 mt[2][0] = 0.0f;
24 mt[2][1] = 0.0f;
25 mt[2][2] = -(n)*tmp;
26 mt[2][3] = -(f*n)*tmp;
28 mt[3][0] = 0.0f;
29 mt[3][1] = 0.0f;
30 mt[3][2] = -1.0f;
31 mt[3][3] = 0.0f;
34 void guPerspective(Mtx44 mt,f32 fovy,f32 aspect,f32 n,f32 f)
36 f32 cot,angle,tmp;
38 angle = fovy*0.5f;
39 angle = DegToRad(angle);
41 cot = 1.0f/tanf(angle);
43 mt[0][0] = cot/aspect;
44 mt[0][1] = 0.0f;
45 mt[0][2] = 0.0f;
46 mt[0][3] = 0.0f;
48 mt[1][0] = 0.0f;
49 mt[1][1] = cot;
50 mt[1][2] = 0.0f;
51 mt[1][3] = 0.0f;
53 tmp = 1.0f/(f-n);
54 mt[2][0] = 0.0f;
55 mt[2][1] = 0.0f;
56 mt[2][2] = -(n)*tmp;
57 mt[2][3] = -(f*n)*tmp;
59 mt[3][0] = 0.0f;
60 mt[3][1] = 0.0f;
61 mt[3][2] = -1.0f;
62 mt[3][3] = 0.0f;
65 void guOrtho(Mtx44 mt,f32 t,f32 b,f32 l,f32 r,f32 n,f32 f)
67 f32 tmp;
69 tmp = 1.0f/(r-l);
70 mt[0][0] = 2.0f*tmp;
71 mt[0][1] = 0.0f;
72 mt[0][2] = 0.0f;
73 mt[0][3] = -(r+l)*tmp;
75 tmp = 1.0f/(t-b);
76 mt[1][0] = 0.0f;
77 mt[1][1] = 2.0f*tmp;
78 mt[1][2] = 0.0f;
79 mt[1][3] = -(t+b)*tmp;
81 tmp = 1.0f/(f-n);
82 mt[2][0] = 0.0f;
83 mt[2][1] = 0.0f;
84 mt[2][2] = -(1.0f)*tmp;
85 mt[2][3] = -(f)*tmp;
87 mt[3][0] = 0.0f;
88 mt[3][1] = 0.0f;
89 mt[3][2] = 0.0f;
90 mt[3][3] = 1.0f;
93 void guLightPerspective(Mtx mt,f32 fovY,f32 aspect,f32 scaleS,f32 scaleT,f32 transS,f32 transT)
95 f32 angle;
96 f32 cot;
98 angle = fovY*0.5f;
99 angle = DegToRad(angle);
101 cot = 1.0f/tanf(angle);
103 mt[0][0] = (cot / aspect) * scaleS;
104 mt[0][1] = 0.0f;
105 mt[0][2] = -transS;
106 mt[0][3] = 0.0f;
108 mt[1][0] = 0.0f;
109 mt[1][1] = cot * scaleT;
110 mt[1][2] = -transT;
111 mt[1][3] = 0.0f;
113 mt[2][0] = 0.0f;
114 mt[2][1] = 0.0f;
115 mt[2][2] = -1.0f;
116 mt[2][3] = 0.0f;
119 void guLightOrtho(Mtx mt,f32 t,f32 b,f32 l,f32 r,f32 scaleS,f32 scaleT,f32 transS,f32 transT)
121 f32 tmp;
123 tmp = 1.0f / (r - l);
124 mt[0][0] = (2.0f * tmp * scaleS);
125 mt[0][1] = 0.0f;
126 mt[0][2] = 0.0f;
127 mt[0][3] = ((-(r + l) * tmp) * scaleS) + transS;
129 tmp = 1.0f / (t - b);
130 mt[1][0] = 0.0f;
131 mt[1][1] = (2.0f * tmp) * scaleT;
132 mt[1][2] = 0.0f;
133 mt[1][3] = ((-(t + b) * tmp)* scaleT) + transT;
135 mt[2][0] = 0.0f;
136 mt[2][1] = 0.0f;
137 mt[2][2] = 0.0f;
138 mt[2][3] = 1.0f;
141 void guLightFrustum(Mtx mt,f32 t,f32 b,f32 l,f32 r,f32 n,f32 scaleS,f32 scaleT,f32 transS,f32 transT)
143 f32 tmp;
145 tmp = 1.0f / (r - l);
146 mt[0][0] = ((2*n) * tmp) * scaleS;
147 mt[0][1] = 0.0f;
148 mt[0][2] = (((r + l) * tmp) * scaleS) - transS;
149 mt[0][3] = 0.0f;
151 tmp = 1.0f / (t - b);
152 mt[1][0] = 0.0f;
153 mt[1][1] = ((2*n) * tmp) * scaleT;
154 mt[1][2] = (((t + b) * tmp) * scaleT) - transT;
155 mt[1][3] = 0.0f;
157 mt[2][0] = 0.0f;
158 mt[2][1] = 0.0f;
159 mt[2][2] = -1.0f;
160 mt[2][3] = 0.0f;
163 void guLookAt(Mtx mt,guVector *camPos,guVector *camUp,guVector *target)
165 guVector vLook,vRight,vUp;
167 vLook.x = camPos->x - target->x;
168 vLook.y = camPos->y - target->y;
169 vLook.z = camPos->z - target->z;
170 guVecNormalize(&vLook);
172 guVecCross(camUp,&vLook,&vRight);
173 guVecNormalize(&vRight);
175 guVecCross(&vLook,&vRight,&vUp);
177 mt[0][0] = vRight.x;
178 mt[0][1] = vRight.y;
179 mt[0][2] = vRight.z;
180 mt[0][3] = -( camPos->x * vRight.x + camPos->y * vRight.y + camPos->z * vRight.z );
182 mt[1][0] = vUp.x;
183 mt[1][1] = vUp.y;
184 mt[1][2] = vUp.z;
185 mt[1][3] = -( camPos->x * vUp.x + camPos->y * vUp.y + camPos->z * vUp.z );
187 mt[2][0] = vLook.x;
188 mt[2][1] = vLook.y;
189 mt[2][2] = vLook.z;
190 mt[2][3] = -( camPos->x * vLook.x + camPos->y * vLook.y + camPos->z * vLook.z );
193 void c_guMtxIdentity(Mtx mt)
195 s32 i,j;
197 for(i=0;i<3;i++) {
198 for(j=0;j<4;j++) {
199 if(i==j) mt[i][j] = 1.0;
200 else mt[i][j] = 0.0;
205 void c_guMtxRotRad(Mtx mt,const char axis,f32 rad)
207 f32 sinA,cosA;
209 sinA = sinf(rad);
210 cosA = cosf(rad);
212 c_guMtxRotTrig(mt,axis,sinA,cosA);
215 #ifdef GEKKO
216 void ps_guMtxRotRad(register Mtx mt,const register char axis,register f32 rad)
218 register f32 sinA,cosA;
220 sinA = sinf(rad);
221 cosA = cosf(rad);
223 ps_guMtxRotTrig(mt,axis,sinA,cosA);
226 void ps_guMtxRotAxisRad(Mtx mt,guVector *axis,f32 rad)
228 f32 sinT,cosT;
230 sinT = sinf(rad);
231 cosT = cosf(rad);
233 __ps_guMtxRotAxisRadInternal(mt,axis,sinT,cosT);
236 #endif
238 void c_guMtxRotTrig(Mtx mt,const char axis,f32 sinA,f32 cosA)
240 switch(axis) {
241 case 'x':
242 case 'X':
243 mt[0][0] = 1.0f; mt[0][1] = 0.0f; mt[0][2] = 0.0f; mt[0][3] = 0.0f;
244 mt[1][0] = 0.0f; mt[1][1] = cosA; mt[1][2] = -sinA; mt[1][3] = 0.0f;
245 mt[2][0] = 0.0f; mt[2][1] = sinA; mt[2][2] = cosA; mt[2][3] = 0.0f;
246 break;
247 case 'y':
248 case 'Y':
249 mt[0][0] = cosA; mt[0][1] = 0.0f; mt[0][2] = sinA; mt[0][3] = 0.0f;
250 mt[1][0] = 0.0f; mt[1][1] = 1.0f; mt[1][2] = 0.0f; mt[1][3] = 0.0f;
251 mt[2][0] = -sinA; mt[2][1] = 0.0f; mt[2][2] = cosA; mt[2][3] = 0.0f;
252 break;
253 case 'z':
254 case 'Z':
255 mt[0][0] = cosA; mt[0][1] = -sinA; mt[0][2] = 0.0f; mt[0][3] = 0.0f;
256 mt[1][0] = sinA; mt[1][1] = cosA; mt[1][2] = 0.0f; mt[1][3] = 0.0f;
257 mt[2][0] = 0.0f; mt[2][1] = 0.0f; mt[2][2] = 1.0f; mt[2][3] = 0.0f;
258 break;
259 default:
260 break;
264 void c_guMtxRotAxisRad(Mtx mt,guVector *axis,f32 rad)
266 f32 s,c;
267 f32 t;
268 f32 x,y,z;
269 f32 xSq,ySq,zSq;
271 s = sinf(rad);
272 c = cosf(rad);
273 t = 1.0f-c;
275 c_guVecNormalize(axis);
277 x = axis->x;
278 y = axis->y;
279 z = axis->z;
281 xSq = x*x;
282 ySq = y*y;
283 zSq = z*z;
285 mt[0][0] = ( t * xSq ) + ( c );
286 mt[0][1] = ( t * x * y ) - ( s * z );
287 mt[0][2] = ( t * x * z ) + ( s * y );
288 mt[0][3] = 0.0f;
290 mt[1][0] = ( t * x * y ) + ( s * z );
291 mt[1][1] = ( t * ySq ) + ( c );
292 mt[1][2] = ( t * y * z ) - ( s * x );
293 mt[1][3] = 0.0f;
295 mt[2][0] = ( t * x * z ) - ( s * y );
296 mt[2][1] = ( t * y * z ) + ( s * x );
297 mt[2][2] = ( t * zSq ) + ( c );
298 mt[2][3] = 0.0f;
302 void c_guMtxCopy(Mtx src,Mtx dst)
304 if(src==dst) return;
306 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2]; dst[0][3] = src[0][3];
307 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2]; dst[1][3] = src[1][3];
308 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2]; dst[2][3] = src[2][3];
311 void c_guMtxConcat(Mtx a,Mtx b,Mtx ab)
313 Mtx tmp;
314 MtxP m;
316 if(ab==b || ab==a)
317 m = tmp;
318 else
319 m = ab;
321 m[0][0] = a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0];
322 m[0][1] = a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1];
323 m[0][2] = a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2];
324 m[0][3] = a[0][0]*b[0][3] + a[0][1]*b[1][3] + a[0][2]*b[2][3] + a[0][3];
326 m[1][0] = a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0];
327 m[1][1] = a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1];
328 m[1][2] = a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2];
329 m[1][3] = a[1][0]*b[0][3] + a[1][1]*b[1][3] + a[1][2]*b[2][3] + a[1][3];
331 m[2][0] = a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0];
332 m[2][1] = a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1];
333 m[2][2] = a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2];
334 m[2][3] = a[2][0]*b[0][3] + a[2][1]*b[1][3] + a[2][2]*b[2][3] + a[2][3];
336 if(m==tmp)
337 c_guMtxCopy(tmp,ab);
340 void c_guMtxScale(Mtx mt,f32 xS,f32 yS,f32 zS)
342 mt[0][0] = xS; mt[0][1] = 0.0f; mt[0][2] = 0.0f; mt[0][3] = 0.0f;
343 mt[1][0] = 0.0f; mt[1][1] = yS; mt[1][2] = 0.0f; mt[1][3] = 0.0f;
344 mt[2][0] = 0.0f; mt[2][1] = 0.0f; mt[2][2] = zS; mt[2][3] = 0.0f;
347 void c_guMtxScaleApply(Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)
349 dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * xS;
350 dst[0][2] = src[0][2] * xS; dst[0][3] = src[0][3] * xS;
352 dst[1][0] = src[1][0] * yS; dst[1][1] = src[1][1] * yS;
353 dst[1][2] = src[1][2] * yS; dst[1][3] = src[1][3] * yS;
355 dst[2][0] = src[2][0] * zS; dst[2][1] = src[2][1] * zS;
356 dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3] * zS;
359 void c_guMtxApplyScale(Mtx src,Mtx dst,f32 xS,f32 yS,f32 zS)
361 dst[0][0] = src[0][0] * xS; dst[0][1] = src[0][1] * yS;
362 dst[0][2] = src[0][2] * zS; dst[0][3] = src[0][3];
364 dst[1][0] = src[1][0] * xS; dst[1][1] = src[1][1] * yS;
365 dst[1][2] = src[1][2] * zS; dst[1][3] = src[1][3];
367 dst[2][0] = src[2][0] * xS; dst[2][1] = src[2][1] * yS;
368 dst[2][2] = src[2][2] * zS; dst[2][3] = src[2][3];
371 void c_guMtxTrans(Mtx mt,f32 xT,f32 yT,f32 zT)
373 mt[0][0] = 1.0f; mt[0][1] = 0.0f; mt[0][2] = 0.0f; mt[0][3] = xT;
374 mt[1][0] = 0.0f; mt[1][1] = 1.0f; mt[1][2] = 0.0f; mt[1][3] = yT;
375 mt[2][0] = 0.0f; mt[2][1] = 0.0f; mt[2][2] = 1.0f; mt[2][3] = zT;
378 void c_guMtxTransApply(Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)
380 if ( src != dst )
382 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2];
383 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2];
384 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2];
387 dst[0][3] = src[0][3] + xT;
388 dst[1][3] = src[1][3] + yT;
389 dst[2][3] = src[2][3] + zT;
392 void c_guMtxApplyTrans(Mtx src,Mtx dst,f32 xT,f32 yT,f32 zT)
394 if ( src != dst )
396 dst[0][0] = src[0][0]; dst[0][1] = src[0][1]; dst[0][2] = src[0][2];
397 dst[1][0] = src[1][0]; dst[1][1] = src[1][1]; dst[1][2] = src[1][2];
398 dst[2][0] = src[2][0]; dst[2][1] = src[2][1]; dst[2][2] = src[2][2];
401 dst[0][3] = src[0][0]*xT + src[0][1]*yT + src[0][2]*zT + src[0][3];
402 dst[1][3] = src[1][0]*xT + src[1][1]*yT + src[1][2]*zT + src[1][3];
403 dst[2][3] = src[2][0]*xT + src[2][1]*yT + src[2][2]*zT + src[2][3];
406 u32 c_guMtxInverse(Mtx src,Mtx inv)
408 Mtx mTmp;
409 MtxP m;
410 f32 det;
412 if(src==inv)
413 m = mTmp;
414 else
415 m = inv;
418 // compute the determinant of the upper 3x3 submatrix
419 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
420 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
423 // check if matrix is singular
424 if(det==0.0f)return 0;
427 // compute the inverse of the upper submatrix:
429 // find the transposed matrix of cofactors of the upper submatrix
430 // and multiply by (1/det)
432 det = 1.0f / det;
435 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
436 m[0][1] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
437 m[0][2] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
439 m[1][0] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
440 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
441 m[1][2] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
443 m[2][0] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
444 m[2][1] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
445 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
448 // compute (invA)*(-C)
449 m[0][3] = -m[0][0]*src[0][3] - m[0][1]*src[1][3] - m[0][2]*src[2][3];
450 m[1][3] = -m[1][0]*src[0][3] - m[1][1]*src[1][3] - m[1][2]*src[2][3];
451 m[2][3] = -m[2][0]*src[0][3] - m[2][1]*src[1][3] - m[2][2]*src[2][3];
453 // copy back if needed
454 if( m == mTmp )
455 c_guMtxCopy(mTmp,inv);
457 return 1;
460 void c_guMtxTranspose(Mtx src,Mtx xPose)
462 Mtx mTmp;
463 MtxP m;
465 if(src==xPose)
466 m = mTmp;
467 else
468 m = xPose;
471 m[0][0] = src[0][0]; m[0][1] = src[1][0]; m[0][2] = src[2][0]; m[0][3] = 0.0f;
472 m[1][0] = src[0][1]; m[1][1] = src[1][1]; m[1][2] = src[2][1]; m[1][3] = 0.0f;
473 m[2][0] = src[0][2]; m[2][1] = src[1][2]; m[2][2] = src[2][2]; m[2][3] = 0.0f;
476 // copy back if needed
477 if(m==mTmp)
478 c_guMtxCopy(mTmp,xPose);
481 u32 c_guMtxInvXpose(Mtx src, Mtx xPose)
483 Mtx mTmp;
484 MtxP m;
485 f32 det;
487 if(src == xPose)
488 m = mTmp;
489 else
490 m = xPose;
492 // Compute the determinant of the upper 3x3 submatrix
493 det = src[0][0]*src[1][1]*src[2][2] + src[0][1]*src[1][2]*src[2][0] + src[0][2]*src[1][0]*src[2][1]
494 - src[2][0]*src[1][1]*src[0][2] - src[1][0]*src[0][1]*src[2][2] - src[0][0]*src[2][1]*src[1][2];
496 // Check if matrix is singular
497 if(det == 0.0f) return 0;
499 // Compute the inverse of the upper submatrix:
501 // Find the transposed matrix of cofactors of the upper submatrix
502 // and multiply by (1/det)
504 det = 1.0f / det;
506 m[0][0] = (src[1][1]*src[2][2] - src[2][1]*src[1][2]) * det;
507 m[0][1] = -(src[1][0]*src[2][2] - src[2][0]*src[1][2]) * det;
508 m[0][2] = (src[1][0]*src[2][1] - src[2][0]*src[1][1]) * det;
510 m[1][0] = -(src[0][1]*src[2][2] - src[2][1]*src[0][2]) * det;
511 m[1][1] = (src[0][0]*src[2][2] - src[2][0]*src[0][2]) * det;
512 m[1][2] = -(src[0][0]*src[2][1] - src[2][0]*src[0][1]) * det;
514 m[2][0] = (src[0][1]*src[1][2] - src[1][1]*src[0][2]) * det;
515 m[2][1] = -(src[0][0]*src[1][2] - src[1][0]*src[0][2]) * det;
516 m[2][2] = (src[0][0]*src[1][1] - src[1][0]*src[0][1]) * det;
519 // The 4th columns should be zero
520 m[0][3] = 0.0F;
521 m[1][3] = 0.0F;
522 m[2][3] = 0.0F;
524 // Copy back if needed
525 if(m == mTmp)
526 c_guMtxCopy(mTmp, xPose);
528 return 1;
531 void c_guMtxReflect(Mtx m,guVector *p,guVector *n)
533 f32 vxy, vxz, vyz, pdotn;
535 vxy = -2.0f * n->x * n->y;
536 vxz = -2.0f * n->x * n->z;
537 vyz = -2.0f * n->y * n->z;
538 pdotn = 2.0f * c_guVecDotProduct(p,n);
540 m[0][0] = 1.0f - 2.0f * n->x * n->x;
541 m[0][1] = vxy;
542 m[0][2] = vxz;
543 m[0][3] = pdotn * n->x;
545 m[1][0] = vxy;
546 m[1][1] = 1.0f - 2.0f * n->y * n->y;
547 m[1][2] = vyz;
548 m[1][3] = pdotn * n->y;
550 m[2][0] = vxz;
551 m[2][1] = vyz;
552 m[2][2] = 1.0f - 2.0f * n->z * n->z;
553 m[2][3] = pdotn * n->z;
557 void c_guVecAdd(guVector *a,guVector *b,guVector *ab)
559 ab->x = a->x + b->x;
560 ab->y = a->y + b->y;
561 ab->z = a->z + b->z;
564 void c_guVecSub(guVector *a,guVector *b,guVector *ab)
566 ab->x = a->x - b->x;
567 ab->y = a->y - b->y;
568 ab->z = a->z - b->z;
571 void c_guVecScale(guVector *src,guVector *dst,f32 scale)
573 dst->x = src->x * scale;
574 dst->y = src->y * scale;
575 dst->z = src->z * scale;
579 void c_guVecNormalize(guVector *v)
581 f32 m;
583 m = ((v->x)*(v->x)) + ((v->y)*(v->y)) + ((v->z)*(v->z));
584 m = 1/sqrtf(m);
585 v->x *= m;
586 v->y *= m;
587 v->z *= m;
590 void c_guVecCross(guVector *a,guVector *b,guVector *axb)
592 guVector vTmp;
594 vTmp.x = (a->y*b->z)-(a->z*b->y);
595 vTmp.y = (a->z*b->x)-(a->x*b->z);
596 vTmp.z = (a->x*b->y)-(a->y*b->x);
598 axb->x = vTmp.x;
599 axb->y = vTmp.y;
600 axb->z = vTmp.z;
603 void c_guVecMultiply(Mtx mt,guVector *src,guVector *dst)
605 guVector tmp;
607 tmp.x = mt[0][0]*src->x + mt[0][1]*src->y + mt[0][2]*src->z + mt[0][3];
608 tmp.y = mt[1][0]*src->x + mt[1][1]*src->y + mt[1][2]*src->z + mt[1][3];
609 tmp.z = mt[2][0]*src->x + mt[2][1]*src->y + mt[2][2]*src->z + mt[2][3];
611 dst->x = tmp.x;
612 dst->y = tmp.y;
613 dst->z = tmp.z;
616 void c_guVecMultiplySR(Mtx mt,guVector *src,guVector *dst)
618 guVector tmp;
620 tmp.x = mt[0][0]*src->x + mt[0][1]*src->y + mt[0][2]*src->z;
621 tmp.y = mt[1][0]*src->x + mt[1][1]*src->y + mt[1][2]*src->z;
622 tmp.z = mt[2][0]*src->x + mt[2][1]*src->y + mt[2][2]*src->z;
624 // copy back
625 dst->x = tmp.x;
626 dst->y = tmp.y;
627 dst->z = tmp.z;
630 f32 c_guVecDotProduct(guVector *a,guVector *b)
632 f32 dot;
634 dot = (a->x * b->x) + (a->y * b->y) + (a->z * b->z);
636 return dot;
639 void c_guQuatAdd(guQuaternion *a,guQuaternion *b,guQuaternion *ab)
641 ab->x = a->x + b->x;
642 ab->y = a->x + b->y;
643 ab->z = a->x + b->z;
644 ab->w = a->x + b->w;
647 #ifdef GEKKO
648 void ps_guQuatAdd(register guQuaternion *a,register guQuaternion *b,register guQuaternion *ab)
650 register f32 tmp0,tmp1;
652 __asm__ __volatile__ (
653 "psq_l %0,0(%2),0,0\n" // [ax][ay]
654 "psq_l %1,0(%3),0,0\n" // [bx][by]
655 "ps_add %1,%0,%1\n" // [ax+bx][ay+by]
656 "psq_st %1,0(%4),0,0\n" // X = [ax+bx], Y = [ay+by]
657 "psq_l %0,8(%2),0,0\n" // [az][aw]
658 "psq_l %1,8(%3),0,0\n" // [bz][bw]
659 "ps_add %1,%0,%1\n" // [az+bz][aw+bw]
660 "psq_st %1,8(%4),0,0" // Z = [az+bz], W = [aw+bw]
661 : "=&f"(tmp0),"=&f"(tmp1)
662 : "b"(a),"b"(b),"b"(ab)
663 : "memory"
666 #endif
668 void c_guQuatSub(guQuaternion *a,guQuaternion *b,guQuaternion *ab)
670 ab->x = a->x - b->x;
671 ab->y = a->x - b->y;
672 ab->z = a->x - b->z;
673 ab->w = a->x - b->w;
676 #ifdef GEKKO
677 void ps_guQuatSub(register guQuaternion *a,register guQuaternion *b,register guQuaternion *ab)
679 register f32 tmp0,tmp1;
681 __asm__ __volatile__ (
682 "psq_l %0,0(%2),0,0\n" // [ax][ay]
683 "psq_l %1,0(%3),0,0\n" // [bx][by]
684 "ps_sub %1,%0,%1\n" // [ax-bx][ay-by]
685 "psq_st %1,0(%4),0,0\n" // X = [ax-bx], Y = [ay-by]
686 "psq_l %0,8(%2),0,0\n" // [az][aw]
687 "psq_l %1,8(%3),0,0\n" // [bz][bw]
688 "ps_sub %1,%0,%1\n" // [az-bz][aw-bw]
689 "psq_st %1,8(%4),0,0" // Z = [az-bz], W = [aw-bw]
690 : "=&f"(tmp0),"=&f"(tmp1)
691 : "b"(a),"b"(b),"b"(ab)
692 : "memory"
695 #endif
697 void c_guQuatMultiply(guQuaternion *a,guQuaternion *b,guQuaternion *ab)
699 guQuaternion *r;
700 guQuaternion ab_tmp;
702 if(a==ab || b==ab) r = &ab_tmp;
703 else r = ab;
705 r->w = a->w*b->w - a->x*b->x - a->y*b->y - a->z*b->z;
706 r->x = a->w*b->x + a->x*b->w + a->y*b->z - a->z*b->y;
707 r->y = a->w*b->y + a->y*b->w + a->z*b->x - a->x*b->z;
708 r->z = a->w*b->z + a->z*b->w + a->x*b->y - a->y*b->x;
710 if(r==&ab_tmp) *ab = ab_tmp;
713 #ifdef GEKKO
714 void ps_guQuatMultiply(register guQuaternion *a,register guQuaternion *b,register guQuaternion *ab)
716 register f32 aXY,aZW,bXY,bZW;
717 register f32 tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6,tmp7;
719 __asm__ __volatile__ (
720 "psq_l %0,0(%12),0,0\n" // [px][py]
721 "psq_l %1,8(%12),0,0\n" // [pz][pw]
722 "psq_l %2,0(%13),0,0\n" // [qx][qy]
723 "ps_neg %4,%0\n" // [-px][-py]
724 "psq_l %3,8(%13),0,0\n" // [qz][qw]
725 "ps_neg %5,%1\n" // [-pz][-pw]
726 "ps_merge01 %6,%4,%0\n" // [-px][py]
727 "ps_muls0 %8,%1,%2\n" // [pz*qx][pw*qx]
728 "ps_muls0 %9,%4,%2\n" // [-px*qx][-py*qx]
729 "ps_merge01 %7,%5,%1\n" // [-pz][pw]
730 "ps_muls1 %11,%6,%2\n" // [-px*qy][py*qy]
731 "ps_madds0 %8,%6,%3,%8\n" // [-px*qz+pz*qx][py*qz+pw*qx]
732 "ps_muls1 %10,%7,%2\n" // [-pz*qy][pw*qy]
733 "ps_madds0 %9,%7,%3,%9\n" // [-pz*qz+-px*qx][pw*qz+-py*qx]
734 "ps_madds1 %11,%5,%3,%11\n" // [-pz*qw+-px*qy][-pw*qw+py*qy]
735 "ps_merge10 %8,%8,%8\n" // [py*qz+pw*qx][-px*qz+pz*qx]
736 "ps_madds1 %10,%0,%3,%10\n" // [px*qw+-pz*qy][py*qw+pw*qy]
737 "ps_merge10 %9,%9,%9\n" // [pw*qz+-py*qx][-pz*qz+-px*qx]
738 "ps_add %8,%8,%10\n" // [py*qz+pw*qx+px*qw+-pz*qy][-px*qz+pz*qx+py*qw+pw*qy]
739 "psq_st %8,0(%14),0,0\n" // X = [py*qz+pw*qx+px*qw+-pz*qy], Y = [-px*qz+pz*qx+py*qw+pw*qy]
740 "ps_sub %9,%9,%11\n" // [pw*qz+-py*qx--pz*qw+-px*qy][-pz*qz+-px*qx--pw*qw+py*qy]
741 "psq_st %9,8(%14),0,0" // Z = [pw*qz+-py*qx--pz*qw+-px*qy], W = [-pz*qz+-px*qx--pw*qw+py*qy]
742 : "=&f"(aXY),"=&f"(aZW),"=&f"(bXY),"=&f"(bZW),"=&f"(tmp0),"=&f"(tmp1),"=&f"(tmp2),"=&f"(tmp3),"=&f"(tmp4),"=&f"(tmp5),"=&f"(tmp6),"=&f"(tmp7)
743 : "b"(a),"b"(b),"b"(ab)
744 : "memory"
747 #endif
749 void c_guQuatNormalize(guQuaternion *a,guQuaternion *d)
751 f32 dot,scale;
753 dot = (a->x*a->x) + (a->y*a->y) + (a->z*a->z) + (a->w*a->w);
754 if(dot==0.0f) d->x = d->y = d->z = d->w = 0.0f;
755 else {
756 scale = 1.0f/sqrtf(dot);
757 d->x = a->x*scale;
758 d->y = a->y*scale;
759 d->z = a->z*scale;
760 d->w = a->w*scale;
764 #ifdef GEKKO
765 void ps_guQuatNormalize(register guQuaternion *a,register guQuaternion *d)
767 register f32 c_zero = 0.0f;
768 register f32 c_half = 0.5f;
769 register f32 c_three = 3.0f;
770 register f32 axy,azw,tmp0,tmp1,tmp2,tmp3;
772 __asm__ __volatile__ (
773 "psq_l %0,0(%6),0,0\n" // [ax][ay]
774 "ps_mul %2,%0,%0\n" // [ax*ax][ay*ay]
775 "psq_l %1,8(%6),0,0\n" // [az][aw]
776 "ps_madd %2,%1,%1,%2\n" // [az*az+ax*ax][aw*aw+ay*ay]
777 "ps_sum0 %2,%2,%2,%2\n" // [az*az+ax*ax+aw*aw+ay*ay][aw*aw+ay*ay]
778 "frsqrte %3,%2\n" // reciprocal sqrt estimated
779 //Newton-Raphson refinement 1 step: (E/2)*(3 - x*E*E)
780 "fmul %4,%3,%3\n" // E*E
781 "fmul %5,%3,%8\n" // E*0.5 = E/2
782 "fnmsub %4,%4,%2,%9\n" // -(E*E*x - 3) = (3 - x*E*E)
783 "fmul %3,%4,%5\n" // (E/2)*(3 - x*E*E)
784 "ps_sel %3,%2,%3,%10\n" // NaN check: if(mag==0.0f)
785 "ps_muls0 %0,%0,%3\n" // [ax*rsqmag][ay*rsqmag]
786 "ps_muls0 %1,%1,%3\n" // [az*rsqmag][aw*rsqmag]
787 "psq_st %0,0(%7),0,0\n" // X = [az*rsqmag], Y = [aw*rsqmag]
788 "psq_st %1,8(%7),0,0\n" // Z = [az*rsqmag], W = [aw*rsqmag]
789 : "=&f"(axy),"=&f"(azw),"=&f"(tmp0),"=&f"(tmp1),"=&f"(tmp2),"=&f"(tmp3)
790 : "b"(a),"b"(d),"f"(c_half),"f"(c_three),"f"(c_zero)
791 : "memory"
794 #endif
796 void c_guQuatInverse(guQuaternion *a,guQuaternion *d)
798 f32 mag,nrminv;
800 mag = (a->x*a->x) + (a->y*a->y) + (a->z*a->z) + (a->w*a->w);
801 if(mag==0.0f) mag = 1.0f;
803 nrminv = 1.0f/mag;
804 d->x = -a->x*nrminv;
805 d->y = -a->y*nrminv;
806 d->z = -a->z*nrminv;
807 d->w = a->w*nrminv;
810 #ifdef GEKKO
811 void ps_guQuatInverse(register guQuaternion *a,register guQuaternion *d)
813 register f32 c_one = 1.0f;
814 register f32 axy,azw,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5;
816 __asm__ __volatile__ (
817 "psq_l %0,0(%8),0,0\n" // [ax][ay]
818 "ps_mul %2,%0,%0\n" // [ax*ax][ay*ay]
819 "ps_sub %3,%10,%10\n" // [1 - 1][1 - 1]
820 "psq_l %1,8(%8),0,0\n" // [az][aw]
821 "ps_madd %2,%1,%1,%2\n" // [az*az+ax*ax][aw*aw+ay*ay]
822 "ps_add %7,%0,%10\n" // [1 + 1][1 + 1]
823 "ps_sum0 %2,%2,%2,%2\n" // [az*az+ax*ax+aw*aw+ay*ay][aw*aw+ay*ay]
824 "fcmpu cr0,%2,%3\n" // [az*az+ax*ax+aw*aw+ay*ay] == 0.0f
825 "beq- 1f\n"
826 "fres %4,%2\n" // 1.0f/mag
827 "ps_neg %5,%2\n" // -mag
828 // Newton-Rapson refinement (x1) : E' = 2E-X*E*E
829 "ps_nmsub %6,%2,%4,%7\n" //
830 "ps_mul %4,%4,%6\n" //
831 "b 2f\n"
832 "1:\n"
833 "fmr %4,%10\n"
834 "2:\n"
835 "ps_neg %7,%4\n"
836 "ps_muls1 %5,%4,%1\n"
837 "ps_muls0 %0,%0,%7\n"
838 "psq_st %5,12(%9),1,0\n"
839 "ps_muls0 %6,%1,%7\n"
840 "psq_st %0,0(%9),0,0\n"
841 "psq_st %6,8(%9),1,0\n"
842 : "=&f"(axy),"=&f"(azw),"=&f"(tmp0),"=&f"(tmp1),"=&f"(tmp2),"=&f"(tmp3),"=&f"(tmp4),"=&f"(tmp5)
843 : "b"(a),"b"(d),"f"(c_one)
846 #endif
848 void c_guQuatMtx(guQuaternion *a,Mtx m)
850 const f32 diag = guMtxRowCol(m,0,0) + guMtxRowCol(m,1,1) + guMtxRowCol(m,2,2) + 1;
852 if(diag>0.0f) {
853 const f32 scale = sqrtf(diag)*2.0f;
855 a->x = (guMtxRowCol(m,2,1) - guMtxRowCol(m,1,2))/scale;
856 a->y = (guMtxRowCol(m,0,2) - guMtxRowCol(m,2,0))/scale;
857 a->z = (guMtxRowCol(m,1,0) - guMtxRowCol(m,0,1))/scale;
858 a->w = 0.25f*scale;
859 } else {
860 if(guMtxRowCol(m,0,0)>guMtxRowCol(m,1,1) && guMtxRowCol(m,0,0)>guMtxRowCol(m,2,2)) {
861 const f32 scale = sqrtf(1.0f + guMtxRowCol(m,0,0) + guMtxRowCol(m,1,1) + guMtxRowCol(m,2,2))*2.0f;
863 a->x = 0.25f*scale;
864 a->y = (guMtxRowCol(m,0,1) + guMtxRowCol(m,1,0))/scale;
865 a->z = (guMtxRowCol(m,2,0) + guMtxRowCol(m,0,2))/scale;
866 a->w = (guMtxRowCol(m,2,1) - guMtxRowCol(m,1,2))/scale;
867 } else if(guMtxRowCol(m,1,1)>guMtxRowCol(m,2,2)) {
868 const f32 scale = sqrtf(1.0f + guMtxRowCol(m,0,0) + guMtxRowCol(m,1,1) + guMtxRowCol(m,2,2))*2.0f;
870 a->x = (guMtxRowCol(m,0,1) + guMtxRowCol(m,1,0))/scale;
871 a->y = 0.25f*scale;
872 a->z = (guMtxRowCol(m,1,2) + guMtxRowCol(m,2,1))/scale;
873 a->w = (guMtxRowCol(m,0,2) - guMtxRowCol(m,2,0))/scale;
874 } else {
875 const f32 scale = sqrtf(1.0f + guMtxRowCol(m,0,0) + guMtxRowCol(m,1,1) + guMtxRowCol(m,2,2))*2.0f;
877 a->x = (guMtxRowCol(m,0,2) + guMtxRowCol(m,2,0))/scale;
878 a->y = (guMtxRowCol(m,1,2) + guMtxRowCol(m,2,1))/scale;
879 a->z = 0.25f*scale;
880 a->w = (guMtxRowCol(m,1,0) - guMtxRowCol(m,0,1))/scale;
883 c_guQuatNormalize(a,a);
886 void c_guMtxQuat(Mtx m,guQuaternion *a)
888 guMtxRowCol(m,0,0) = 1.0f - 2.0f*a->y*a->y - 2.0f*a->z*a->z;
889 guMtxRowCol(m,1,0) = 2.0f*a->x*a->y - 2.0f*a->z*a->w;
890 guMtxRowCol(m,2,0) = 2.0f*a->x*a->z + 2.0f*a->y*a->w;
892 guMtxRowCol(m,0,1) = 2.0f*a->x*a->y + 2.0f*a->z*a->w;
893 guMtxRowCol(m,1,1) = 1.0f - 2.0f*a->x*a->x - 2.0f*a->z*a->z;
894 guMtxRowCol(m,2,1) = 2.0f*a->z*a->y - 2.0f*a->x*a->w;
896 guMtxRowCol(m,0,2) = 2.0f*a->x*a->z - 2.0f*a->y*a->w;
897 guMtxRowCol(m,1,2) = 2.0f*a->z*a->y + 2.0f*a->x*a->w;
898 guMtxRowCol(m,2,2) = 1.0f - 2.0f*a->x*a->x - 2.0f*a->y*a->y;
901 void guVecHalfAngle(guVector *a,guVector *b,guVector *half)
903 guVector tmp1,tmp2,tmp3;
905 tmp1.x = -a->x;
906 tmp1.y = -a->y;
907 tmp1.z = -a->z;
909 tmp2.x = -b->x;
910 tmp2.y = -b->y;
911 tmp2.z = -b->z;
913 guVecNormalize(&tmp1);
914 guVecNormalize(&tmp2);
916 guVecAdd(&tmp1,&tmp2,&tmp3);
917 if(guVecDotProduct(&tmp3,&tmp3)>0.0f) guVecNormalize(&tmp3);
919 *half = tmp3;