Make FPU version run fast
[dormin.git] / skin.c
blob8209e7ec00be9611172f3b2e242f1c0c74417266
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #ifdef USE_ALTIVEC
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
19 #include <altivec.h>
20 #ifndef __APPLE__
21 #include <malloc.h>
22 #define simd_alloc(s) memalign (16, s)
23 #else
24 #define simd_alloc malloc
25 #endif
26 #define A16 __attribute__ ((aligned (16)))
27 #define STRIDE 16
28 #define V_ELEMS 4
29 #define AL16(i) (((i)+15)&~15)
31 #else
33 #define STRIDE 0
34 #define V_ELEMS 3
35 #define simd_alloc(s) malloc (s)
36 #define A16
37 #define AL16(i) (i)
39 #endif
41 struct skin {
42 #ifdef USE_ALTIVEC
43 float weights[12];
44 #else
45 float weights[3];
46 #endif
47 int boneindices[3];
48 int num_bones;
49 } A16;
51 struct bone {
52 float v[4];
53 float q[4];
55 float mv[4];
56 float mq[4];
58 float aq[4];
59 float amq[4];
60 float amv[4];
62 float cm[16];
63 int parent;
64 } A16;
66 typedef struct {
67 int num_bones;
68 int num_vertices;
69 GLuint bufid[2];
70 void *ptrs[2];
71 void *bufs[COUNT];
72 struct skin *skin;
73 struct bone *bones;
74 } State;
76 static State glob_state;
77 static int use_vbo;
79 static void copy_vertices (float *p, int num_vertices, value a_v)
81 int i, k;
83 for (i = 0, k = 0; i < num_vertices; ++i, p += V_ELEMS) {
84 p[0] = Double_field (a_v, k++);
85 p[1] = Double_field (a_v, k++);
86 p[2] = Double_field (a_v, k++);
87 #ifdef USE_ALTIVEC
88 p[3] = 1.0;
89 #endif
93 static void set_geom (State *s, void **ptrs, value vertexa_v, value normala_v,
94 value uva_v, value skin_v, value colors_v)
96 int i;
97 float *p;
98 int num_vertices;
99 struct skin *skin;
101 num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
103 copy_vertices (ptrs[V_IDX], num_vertices, vertexa_v);
104 copy_vertices (ptrs[N_IDX], num_vertices, normala_v);
106 for (i = 0, p = ptrs[UV_IDX]; i < num_vertices * 2; ++i) {
107 p[i] = Double_field (uva_v, i);
109 memcpy (ptrs[C_IDX], String_val (colors_v), num_vertices * 4);
111 skin = s->skin;
112 for (i = 0; i < num_vertices; ++i) {
113 int j;
114 value v;
116 v = Field (skin_v, i);
117 skin[i].num_bones = Int_val (Field (v, 3));
119 for (j = 0; j < skin[i].num_bones; ++j) {
120 double val, w;
122 val = Double_val (Bp_val (Field (v, j)));
124 skin[i].boneindices[j] = (int) val;
125 w = val - skin[i].boneindices[j];
126 #ifdef USE_ALTIVEC
127 vector float vw = {w,w,w,w};
129 vec_st (vw, j*16, skin[i].weights);
130 #else
131 skin[i].weights[j] = w;
132 #endif
133 skin[i].boneindices[j] += 1;
138 static void skin_init (State *s, value vertexa_v, value normala_v,
139 value uva_v, value skin_v, value colors_v)
141 char *p;
142 GLsizei sizevn, sizev, sizeu, sizec;
143 void *ptrs[COUNT];
145 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
147 sizev = V_ELEMS * sizeof (GLfloat) * s->num_vertices;
148 sizeu = 2 * sizeof (GLfloat) * s->num_vertices;
149 sizec = 4 * s->num_vertices;
151 sizevn = sizev * 2;
153 p = simd_alloc (AL16 (sizevn) + s->num_vertices * sizeof (struct skin));
154 s->skin = (struct skin *) (p + AL16 (sizevn));
155 s->ptrs[0] = ptrs[V_IDX] = p;
156 ptrs[N_IDX] = p + sizev;
158 p = stat_alloc (sizec + sizeu);
159 s->ptrs[1] = ptrs[UV_IDX] = p;
160 ptrs[C_IDX] = p + sizeu;
162 set_geom (s, ptrs, vertexa_v, normala_v, uva_v, skin_v, colors_v);
164 if (use_vbo) {
165 glGenBuffers (2, s->bufid);
167 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
168 glBufferData (GL_ARRAY_BUFFER, sizevn, s->ptrs[0], GL_DYNAMIC_DRAW);
170 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
171 glBufferData (GL_ARRAY_BUFFER, sizeu+sizec, s->ptrs[1], GL_STATIC_DRAW);
173 glBindBuffer (GL_ARRAY_BUFFER, 0);
174 stat_free (s->ptrs[1]);
176 p = NULL;
177 s->bufs[V_IDX] = p;
178 s->bufs[N_IDX] = p + sizev;
179 s->bufs[UV_IDX] = p;
180 s->bufs[C_IDX] = p + sizeu;
182 else {
183 p = simd_alloc (sizevn);
184 s->bufs[V_IDX] = p;
185 s->bufs[N_IDX] = p + sizev;
186 s->bufs[UV_IDX] = ptrs[UV_IDX];
187 s->bufs[C_IDX] = ptrs[C_IDX];
189 memcpy (p, s->ptrs[0], sizevn);
193 CAMLprim value ml_skin_draw_begin (value unit_v)
195 State *s = &glob_state;
197 (void) unit_v;
199 glEnableClientState (GL_VERTEX_ARRAY);
200 glEnableClientState (GL_NORMAL_ARRAY);
201 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
202 glEnableClientState (GL_COLOR_ARRAY);
204 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
205 glVertexPointer (3, GL_FLOAT, V_ELEMS * sizeof (GLfloat), s->bufs[V_IDX]);
206 glNormalPointer (GL_FLOAT, V_ELEMS * sizeof (GLfloat), s->bufs[N_IDX]);
208 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
209 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
210 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
212 return Val_unit;
215 CAMLprim value ml_skin_draw_end (value unit_v)
217 (void) unit_v;
218 glDisableClientState (GL_VERTEX_ARRAY);
219 glDisableClientState (GL_NORMAL_ARRAY);
220 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
221 glDisableClientState (GL_COLOR_ARRAY);
222 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
223 return Val_unit;
226 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
228 CAMLparam2 (use_vbo_v, geom_v);
229 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
230 State *s = &glob_state;
232 use_vbo = Bool_val (use_vbo_v);
233 #ifdef _WIN32
234 if (use_vbo) {
235 GETPA (BindBuffer);
236 GETPA (GenBuffers);
237 GETPA (BufferData);
238 GETPA (BufferSubData);
239 GETPA (MapBuffer);
240 GETPA (UnmapBuffer);
242 #endif
243 vertexa_v = Field (geom_v, 0);
244 normala_v = Field (geom_v, 1);
245 uva_v = Field (geom_v, 2);
246 skin_v = Field (geom_v, 3);
247 colors_v = Field (geom_v, 4);
249 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
250 CAMLreturn (Val_unit);
253 #ifdef TIMING
254 #include <err.h>
255 #include <sys/time.h>
256 static double now (void)
258 struct timeval tv;
260 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
261 return tv.tv_sec + tv.tv_usec * 1e-6;
263 #endif
265 static void translate (State *s, float *vdst, float *ndst)
267 int i, j;
268 struct bone *b;
269 float *vsrc = s->ptrs[0];
270 float *nsrc = vsrc + s->num_vertices * V_ELEMS;
271 struct skin *skin = s->skin;
273 #ifdef TIMING
274 double S = now (), E;
275 #endif
277 #ifdef USE_ALTIVEC
278 for (i = 0; i < s->num_vertices; ++i, ++skin) {
279 vector float v, n, vs, ns, vz;
280 vector float r0, r1, r2, r3, nx, ny, nz;
282 v = n = vz = (vector float) vec_splat_u32 (0);
284 vs = vec_ld (i<<4, vsrc);
285 ns = vec_ld (i<<4, nsrc);
287 nx = vec_splat (ns, 0);
288 ny = vec_splat (ns, 1);
289 nz = vec_splat (ns, 2);
291 for (j = 0; j < skin->num_bones; ++j) {
292 vector float vw, x, y, z, t0, t1, t2;
294 b = &s->bones[skin->boneindices[j]];
296 vw = vec_ld (j<<4, skin->weights);
298 r0 = vec_ld ( 0, b->cm);
299 r1 = vec_ld (16, b->cm);
300 r2 = vec_ld (32, b->cm);
301 r3 = vec_ld (48, b->cm);
303 x = vec_splat (vs, 0);
304 y = vec_splat (vs, 1);
305 z = vec_splat (vs, 2);
307 t0 = vec_madd (r0, x, r3);
308 t1 = vec_madd (r1, y, t0);
309 t2 = vec_madd (r2, z, t1);
310 v = vec_madd (t2, vw, v);
312 t0 = vec_madd (r0, nx, vz);
313 t1 = vec_madd (r1, ny, t0);
314 t2 = vec_madd (r2, nz, t1);
315 n = vec_madd (t2, vw, n);
317 vec_st (v, i<<4, vdst);
318 vec_st (n, i<<4, ndst);
320 #else
321 for (i = 0; i < s->num_vertices; ++i,
322 vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin)
324 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w;
326 for (j = 0; j < skin->num_bones; ++j) {
327 w = skin->weights[j];
328 b = &s->bones[skin->boneindices[j]];
330 mapply_to_point (v1, b->cm, vsrc);
331 v1[0] *= w;
332 v1[1] *= w;
333 v1[2] *= w;
335 mapply_to_vector (v0, b->cm, nsrc);
336 v0[0] *= w;
337 v0[1] *= w;
338 v0[2] *= w;
340 vaddto (v, v1);
341 vaddto (n, v0);
344 vcopy (vdst, v);
345 vcopy (ndst, n);
347 #endif
349 #ifdef TIMING
350 E = now ();
351 printf ("took %f sec\n", E - S);
352 #endif
355 CAMLprim value ml_skin_set_skel (value skel_v)
357 int i;
358 size_t size;
359 struct bone *b;
360 CAMLparam1 (skel_v);
361 CAMLlocal2 (v, floats_v);
362 State *s = &glob_state;
364 s->num_bones = Wosize_val (skel_v);
365 size = (s->num_bones + 1) * sizeof (struct bone);
366 s->bones = b = simd_alloc (size);
368 memset (b, 0, size);
369 b->parent = -1;
370 b->q[3] = 1.0;
371 b->mq[3] = 1.0;
372 b->aq[3] = 1.0;
373 b->amq[3] = 1.0;
374 b++;
376 for (i = 0; i < s->num_bones; ++i, ++b) {
377 v = Field (skel_v, i);
378 floats_v = Field (v, 1);
380 b->parent = Int_val (Field (v, 0)) + 1;
382 b->v[0] = Double_field (floats_v, 1);
383 b->v[1] = Double_field (floats_v, 2);
384 b->v[2] = Double_field (floats_v, 3);
386 b->q[0] = Double_field (floats_v, 5);
387 b->q[1] = Double_field (floats_v, 6);
388 b->q[2] = Double_field (floats_v, 7);
389 b->q[3] = Double_field (floats_v, 8);
392 b = s->bones + 1;
393 for (i = 0; i < s->num_bones; ++i, ++b) {
394 float v[3];
395 struct bone *parent = &s->bones[b->parent];
397 qapply (v, parent->mq, b->v);
398 qcompose (b->mq, b->q, parent->mq);
399 vadd (b->mv, v, parent->mv);
400 #ifdef USE_ALTIVEC
401 b->cm[3] = b->mv[0];
402 b->cm[7] = b->mv[1];
403 b->cm[11] = b->mv[2];
404 #endif
407 CAMLreturn (Val_unit);
410 CAMLprim value ml_skin_set_anim (value anim_v)
412 int i;
413 CAMLparam1 (anim_v);
414 CAMLlocal1 (floats_v);
415 State *s = &glob_state;
416 struct bone *b = s->bones + 1;
418 for (i = 0; i < s->num_bones; ++i, ++b) {
419 floats_v = Field (anim_v, i);
420 b->aq[0] = Double_field (floats_v, 0);
421 b->aq[1] = Double_field (floats_v, 1);
422 b->aq[2] = Double_field (floats_v, 2);
423 b->aq[3] = Double_field (floats_v, 3);
426 b = s->bones + 1;
427 for (i = 0; i < s->num_bones; ++i, ++b) {
428 float v[4], v1[4], q[4], q1[4];
429 struct bone *parent = &s->bones[b->parent];
431 qapply (v, parent->amq, b->v);
432 qcompose (b->amq, b->aq, parent->amq);
433 vadd (b->amv, v, parent->amv);
435 qconjugate (q1, b->mq);
436 qcompose (q, q1, b->amq);
438 qapply (v, q, b->mv);
439 vsub (v1, b->amv, v);
440 q2matrixt (b->cm, q, v1);
443 CAMLreturn (Val_unit);
446 CAMLprim value ml_skin_anim (value unit_v)
448 GLboolean ret;
449 CAMLparam1 (unit_v);
450 float *vdst, *ndst;
451 State *s = &glob_state;
453 if (use_vbo) {
454 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
455 vdst = ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
456 if (!vdst) caml_failwith ("glMapBuffer failed");
458 ndst += (float *) s->bufs[N_IDX] - (float *) s->bufs[V_IDX];
460 else {
461 vdst = s->bufs[V_IDX];
462 ndst = s->bufs[N_IDX];
465 translate (s, vdst, ndst);
467 if (use_vbo) {
468 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
469 if (ret == GL_FALSE) caml_failwith ("glUnmapBuffer failed");
472 CAMLreturn (Val_unit);
475 #ifndef GL_GENERATE_MIPMAP
476 #define GL_GENERATE_MIPMAP 0x8191
477 #endif
479 CAMLprim value ml_set_generate_mipmaps (value unit_v)
481 (void) unit_v;
482 glTexParameteri (GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
483 return Val_unit;