From 8b5c397a96b4bfb760892f7ca5ba8534ac836b93 Mon Sep 17 00:00:00 2001 From: malc Date: Mon, 10 Nov 2008 00:32:52 +0300 Subject: [PATCH] Altivec support Too bad it's slower than plain code... --- skin.c | 32 ++++++++++++++++--------- vec.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 99 insertions(+), 18 deletions(-) diff --git a/skin.c b/skin.c index bd3891f..77afd13 100644 --- a/skin.c +++ b/skin.c @@ -20,18 +20,19 @@ struct skin { struct bone { int parent; - float v[3]; - float q[4]; - float mv[3]; + float v[4] A16; + float q[4] A16; + + float mv[4] A16; float mq[4]; float aq[4]; float amq[4]; - float amv[3]; + float amv[4]; - float am[12]; - float im[12]; + float am[16] A16; + float im[16] A16; }; typedef struct { @@ -173,7 +174,16 @@ static void translate (State *s, float *vdst, float *ndst) vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin) { int z = 0; - float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[3], v1[3], v2[3], w, m[12]; +#ifdef USE_ALTIVEC + float v[4] A16 = {0,0,0,0}, n[4] A16= {0,0,0,0}; + float v0[4] A16, v1[4] A16, m[16] A16, n1[4]; + float w; + + vcopy (n1, nsrc); +#else + float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[3], v1[3], w, m[12]; + float *n1 = nsrc; +#endif for (j = 0; j < skin->num_bones; ++j) { w = skin->weights[j]; @@ -184,10 +194,10 @@ static void translate (State *s, float *vdst, float *ndst) mapply_to_vector (v1, b->im, v0); mscale (m, b->am, w); - mapply_to_point (v2, m, v1); - vaddto (v, v2); + mapply_to_point (v0, m, v1); + vaddto (v, v0); - mapply_to_vector (v0, b->im, nsrc); + mapply_to_vector (v0, b->im, n1); mapply_to_vector (v1, m, v0); vaddto (n, v1); } @@ -211,7 +221,7 @@ CAMLprim value ml_skin_set_skel (value skel_v) s->num_bones = Wosize_val (skel_v); size = (s->num_bones + 1) * sizeof (struct bone); - s->bones = b = stat_alloc (size); + s->bones = b = simd_alloc (16, size); memset (b, 0, size); b->parent = -1; diff --git a/vec.c b/vec.c index 1a7fe9d..9b80c2f 100644 --- a/vec.c +++ b/vec.c @@ -12,13 +12,6 @@ static void vcopy (float *res, float *v) *res++ = *v++; } -static void vaddto (float *v1, float *v2) -{ - v1[0] += v2[0]; - v1[1] += v2[1]; - v1[2] += v2[2]; -} - static void vadd (float *res, float *v1, float *v2) { res[0] = v1[0] + v2[0]; @@ -115,11 +108,81 @@ static void q2matrix (float *mat, float *q, float *v) mat[9] = 2 * ( yz + xw ); mat[10] = 1 - 2 * ( xx + yy ); +#ifdef USE_ALTIVEC + mat[12] = v[0]; + mat[13] = v[1]; + mat[14] = v[2]; +#else mat[3] = v[0]; mat[7] = v[1]; mat[11] = v[2]; +#endif +} + +#ifdef USE_ALTIVEC +#include +#include + +#define simd_alloc memalign +#define A16 __attribute__ ((aligned (16))) + +static void mscale (float *res, float *m, float s) +{ + vector float vs = {s,s,s,s}; + vector float r0 = vec_ld (0, m) * vs; + vector float r1 = vec_ld (16, m) * vs; + vector float r2 = vec_ld (32, m) * vs; + vector float r3 = vec_ld (48, m) * vs; + vec_st (r0, 0, res); + vec_st (r1, 16, res); + vec_st (r2, 32, res); + vec_st (r3, 48, res); +} + +static void mapply_to_point (float *res, float *m, float *v) +{ + vector float vv = vec_ld (0, v); + vector float r0 = vec_ld (0, m); + vector float r1 = vec_ld (16, m); + vector float r2 = vec_ld (32, m); + vector float r4 = vec_ld (48, m); + vector float x = vec_splat (vv, 0); + vector float y = vec_splat (vv, 1); + vector float z = vec_splat (vv, 2); + vector float vr1 = vec_madd (r0, x, r4); + vector float vr2 = vec_madd (r1, y, vr1); + vector float vr3 = vec_madd (r2, z, vr2); + vec_st (vr3, 0, res); } +static void mapply_to_vector (float *res, float *m, float *v) +{ + vector float vv = vec_ld (0, v); + vector float r0 = vec_ld (0, m); + vector float r1 = vec_ld (16, m); + vector float r2 = vec_ld (32, m); + vector float vz = (vector float) vec_splat_u32 (0); + vector float x = vec_splat (vv, 0); + vector float y = vec_splat (vv, 1); + vector float z = vec_splat (vv, 2); + vector float vr1 = vec_madd (r0, x, vz); + vector float vr2 = vec_madd (r1, y, vr1); + vector float vr3 = vec_madd (r2, z, vr2); + vec_st (vr3, 0, res); +} + +static void vaddto (float *v1, float *v2) +{ + vector float a = vec_ld (0, v1); + vector float b = vec_ld (0, v2); + vec_st (vec_add (a, b), 0, v1); +} + +#else + +#define simd_alloc(a, s) stat_alloc (s) +#define A16 + static void mscale (float *res, float *m, float s) { int i; @@ -145,3 +208,11 @@ static void mapply_to_vector (float *res, float *m, float *v) res[1] = x*m[1] + y*m[5] + z*m[9]; res[2] = x*m[2] + y*m[6] + z*m[10]; } + +static void vaddto (float *v1, float *v2) +{ + v1[0] += v2[0]; + v1[1] += v2[1]; + v1[2] += v2[2]; +} +#endif -- 2.11.4.GIT