Hardware skinning
[dormin.git] / skin.c
blob309956e07642f8cced42dd0cfad30be353ab8c8e
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #ifdef USE_ALTIVEC
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
19 #include <altivec.h>
20 #ifndef __APPLE__
21 #include <malloc.h>
22 #define simd_alloc(s) memalign (16, s)
23 #else
24 #define simd_alloc malloc
25 #endif
26 #define A16 __attribute__ ((aligned (16)))
27 #define STRIDE 16
29 #else
31 #define STRIDE 0
32 #define simd_alloc(s) malloc (s)
33 #define A16
35 #endif
37 struct skin {
38 int boneindices[3];
39 #ifdef USE_ALTIVEC
40 float weights[12] A16;
41 #else
42 float weights[3];
43 #endif
44 int num_bones;
45 } A16;
47 struct bone {
48 int parent;
50 float v[4] A16;
51 float q[4] A16;
53 float mv[4] A16;
54 float mq[4];
56 float aq[4];
57 float amq[4];
58 float amv[4];
60 float cm[16] A16;
61 } A16;
63 typedef struct {
64 int num_bones;
65 int num_vertices;
66 GLuint bufid[COUNT];
67 float *ptrs[COUNT];
68 void *bufs[COUNT];
69 struct skin *skin;
70 struct bone *bones;
71 } State;
73 static State glob_state;
74 static int use_vbo;
76 static void skin_init (State *s, value vertexa_v, value normala_v,
77 value uva_v, value skin_v, value colors_v)
79 int i;
80 GLsizei size;
81 float *p;
82 struct skin *skin;
83 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
85 if (use_vbo)
86 glGenBuffers (COUNT, s->bufid);
88 #ifdef USE_ALTIVEC
89 size = s->num_vertices * sizeof (GLfloat) * 4;
90 p = s->ptrs[V_IDX] = simd_alloc (size);
91 for (i = 0; i < s->num_vertices; ++i) {
92 p[i*4 + 0] = Double_field (vertexa_v, i*3 + 0);
93 p[i*4 + 1] = Double_field (vertexa_v, i*3 + 1);
94 p[i*4 + 2] = Double_field (vertexa_v, i*3 + 2);
95 p[i*4 + 3] = 1.0;
97 #else
98 size = s->num_vertices * sizeof (GLfloat) * 3;
99 p = s->ptrs[V_IDX] = simd_alloc (size);
100 for (i = 0; i < s->num_vertices * 3; ++i) {
101 p[i] = Double_field (vertexa_v, i);
103 #endif
104 if (use_vbo) {
105 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
106 glBufferData (GL_ARRAY_BUFFER, size, p, GL_DYNAMIC_DRAW);
108 else {
109 s->bufs[V_IDX] = simd_alloc (size);
110 memcpy (s->bufs[V_IDX], p, size);
113 #ifdef USE_ALTIVEC
114 p = s->ptrs[N_IDX] = simd_alloc (size);
115 for (i = 0; i < s->num_vertices; ++i) {
116 p[i*4 + 0] = Double_field (normala_v, i*3 + 0);
117 p[i*4 + 1] = Double_field (normala_v, i*3 + 1);
118 p[i*4 + 2] = Double_field (normala_v, i*3 + 2);
119 p[i*4 + 3] = 1.0;
121 #else
122 p = s->ptrs[N_IDX] = simd_alloc (size);
123 for (i = 0; i < s->num_vertices * 3; ++i) {
124 p[i] = Double_field (normala_v, i);
126 #endif
127 if (use_vbo) {
128 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
129 glBufferData (GL_ARRAY_BUFFER, size, p, GL_DYNAMIC_DRAW);
131 else {
132 s->bufs[N_IDX] = simd_alloc (size);
133 memcpy (s->bufs[N_IDX], p, size);
136 size = s->num_vertices * sizeof (GLfloat) * 2;
137 p = s->ptrs[UV_IDX] = simd_alloc (size);
138 for (i = 0; i < s->num_vertices * 2; ++i) {
139 p[i] = Double_field (uva_v, i);
141 if (use_vbo) {
142 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[UV_IDX]);
143 glBufferData (GL_ARRAY_BUFFER, size, p, GL_STATIC_DRAW);
145 else {
146 s->bufs[UV_IDX] = simd_alloc (size);
147 memcpy (s->bufs[UV_IDX], p, size);
149 free (p);
151 size = s->num_vertices * 4;
152 if (use_vbo) {
153 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[C_IDX]);
154 glBufferData (GL_ARRAY_BUFFER, size, String_val (colors_v), GL_STATIC_DRAW);
156 else {
157 s->bufs[C_IDX] = simd_alloc (size);
158 memcpy (s->bufs[C_IDX], String_val (colors_v), size);
161 s->skin = skin = simd_alloc (s->num_vertices * sizeof (struct skin));
162 for (i = 0; i < s->num_vertices; ++i) {
163 int j;
164 value v;
166 v = Field (skin_v, i);
167 skin[i].num_bones = Int_val (Field (v, 3));
169 for (j = 0; j < skin[i].num_bones; ++j) {
170 double val, w;
172 val = Double_val (Bp_val (Field (v, j)));
173 skin[i].boneindices[j] = (int) val;
174 w = val - skin[i].boneindices[j];
175 #ifdef USE_ALTIVEC
176 vector float vw = {w,w,w,w};
178 vec_st (vw, j*16, skin[i].weights);
179 #else
180 skin[i].weights[j] = w;
181 #endif
182 skin[i].boneindices[j] += 1;
187 CAMLprim value ml_skin_draw_begin (value unit_v)
189 State *s = &glob_state;
191 (void) unit_v;
192 glEnableClientState (GL_VERTEX_ARRAY);
193 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
194 glVertexPointer (3, GL_FLOAT, STRIDE, s->bufs[V_IDX]);
196 glEnableClientState (GL_NORMAL_ARRAY);
197 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
198 glNormalPointer (GL_FLOAT, STRIDE, s->bufs[N_IDX]);
200 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
201 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[UV_IDX]);
202 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
204 glEnableClientState (GL_COLOR_ARRAY);
205 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[C_IDX]);
206 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
208 return Val_unit;
211 CAMLprim value ml_skin_draw_end (value unit_v)
213 (void) unit_v;
214 glDisableClientState (GL_VERTEX_ARRAY);
215 glDisableClientState (GL_NORMAL_ARRAY);
216 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
217 glDisableClientState (GL_COLOR_ARRAY);
218 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
219 return Val_unit;
222 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
224 CAMLparam2 (use_vbo_v, geom_v);
225 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
226 State *s = &glob_state;
228 use_vbo = Bool_val (use_vbo_v);
229 #ifdef _WIN32
230 if (use_vbo) {
231 GETPA (BindBuffer);
232 GETPA (GenBuffers);
233 GETPA (BufferData);
234 GETPA (MapBuffer);
235 GETPA (UnmapBuffer);
237 #endif
238 vertexa_v = Field (geom_v, 0);
239 normala_v = Field (geom_v, 1);
240 uva_v = Field (geom_v, 2);
241 skin_v = Field (geom_v, 3);
242 colors_v = Field (geom_v, 4);
244 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
245 CAMLreturn (Val_unit);
248 #ifdef TIMING
249 #include <err.h>
250 #include <sys/time.h>
251 static double now (void)
253 struct timeval tv;
255 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
256 return tv.tv_sec + tv.tv_usec * 1e-6;
258 #endif
260 static void translate (State *s, float *vdst, float *ndst)
262 int i, j;
263 struct bone *b;
264 float *vsrc = s->ptrs[V_IDX];
265 float *nsrc = s->ptrs[N_IDX];
266 struct skin *skin = s->skin;
268 #ifdef TIMING
269 double S = now (), E;
270 #endif
272 #ifdef USE_ALTIVEC
273 for (i = 0; i < s->num_vertices; ++i, ++skin) {
274 vector float v, n, vs, ns, vz;
275 vector float r0, r1, r2, r3, nx, ny, nz;
277 v = n = vz = (vector float) vec_splat_u32 (0);
279 vs = vec_ld (i<<4, vsrc);
280 ns = vec_ld (i<<4, nsrc);
282 nx = vec_splat (ns, 0);
283 ny = vec_splat (ns, 1);
284 nz = vec_splat (ns, 2);
286 for (j = 0; j < skin->num_bones; ++j) {
287 vector float vw, v1, x, y, z, t0, t1, t2;
288 #if 1
289 vector unsigned char p0 =
290 {12,13,14,15,28,29,30,31};
291 vector unsigned char p1 =
292 {0,1,2,3,4,5,6,7,28,29,30,31};
293 #endif
295 b = &s->bones[skin->boneindices[j]];
297 vw = vec_ld (j<<4, skin->weights);
299 r0 = vec_ld ( 0, b->cm);
300 r1 = vec_ld (16, b->cm);
301 r2 = vec_ld (32, b->cm);
302 r3 = vec_ld (48, b->cm);
304 #if 0
305 v1 = vec_sub (vs, vec_ld (0, b->mv));
306 #else
307 t0 = vec_perm (r0, r1, p0);
308 t1 = vec_perm (t0, r2, p1);
309 v1 = vec_sub (vs, t1);
310 #endif
312 r0 = vec_madd (r0, vw, vz);
313 r1 = vec_madd (r1, vw, vz);
314 r2 = vec_madd (r2, vw, vz);
315 r3 = vec_madd (r3, vw, vz);
317 x = vec_splat (v1, 0);
318 y = vec_splat (v1, 1);
319 z = vec_splat (v1, 2);
321 t0 = vec_madd (r0, x, r3);
322 t1 = vec_madd (r1, y, t0);
323 t2 = vec_madd (r2, z, t1);
324 v = vec_add (v, t2);
326 t0 = vec_madd (r0, nx, n);
327 t1 = vec_madd (r1, ny, t0);
328 n = vec_madd (r2, nz, t1);
330 vec_st (v, i<<4, vdst);
331 vec_st (n, i<<4, ndst);
333 #else
334 for (i = 0; i < s->num_vertices; ++i,
335 vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin)
337 if (skin->num_bones == 1) {
338 float v0[4];
340 b = &s->bones[skin->boneindices[0]];
342 vsub (v0, vsrc, b->mv);
343 mapply_to_point (vdst, b->cm, v0);
344 mapply_to_vector (ndst, b->cm, nsrc);
346 else
348 int z = 0;
349 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w, m[12];
351 for (j = 0; j < skin->num_bones; ++j) {
352 w = skin->weights[j];
353 b = &s->bones[skin->boneindices[j]];
355 if (w < 0.0) z = 1;
356 vsub (v0, vsrc, b->mv);
358 mscale (m, b->cm, w);
359 mapply_to_point (v0, m, v0);
360 mapply_to_vector (v1, m, nsrc);
361 vaddto (v, v0);
362 vaddto (n, v1);
365 /* hack hack */
366 if (z) vcopy (vdst, vsrc);
367 else vcopy (vdst, v);
368 vcopy (ndst, n);
371 #endif
373 #ifdef TIMING
374 E = now ();
375 printf ("took %f sec\n", E - S);
376 #endif
379 CAMLprim value ml_skin_set_skel (value skel_v)
381 int i;
382 size_t size;
383 struct bone *b;
384 CAMLparam1 (skel_v);
385 CAMLlocal2 (v, floats_v);
386 State *s = &glob_state;
388 s->num_bones = Wosize_val (skel_v);
389 size = (s->num_bones + 1) * sizeof (struct bone);
390 s->bones = b = simd_alloc (size);
392 memset (b, 0, size);
393 b->parent = -1;
394 b->q[3] = 1.0;
395 b->mq[3] = 1.0;
396 b->aq[3] = 1.0;
397 b->amq[3] = 1.0;
398 b++;
400 for (i = 0; i < s->num_bones; ++i, ++b) {
401 v = Field (skel_v, i);
402 floats_v = Field (v, 1);
404 b->parent = Int_val (Field (v, 0)) + 1;
406 b->v[0] = Double_field (floats_v, 1);
407 b->v[1] = Double_field (floats_v, 2);
408 b->v[2] = Double_field (floats_v, 3);
410 b->q[0] = Double_field (floats_v, 5);
411 b->q[1] = Double_field (floats_v, 6);
412 b->q[2] = Double_field (floats_v, 7);
413 b->q[3] = Double_field (floats_v, 8);
416 b = s->bones + 1;
417 for (i = 0; i < s->num_bones; ++i, ++b) {
418 float v[3];
419 struct bone *parent = &s->bones[b->parent];
421 qapply (v, parent->mq, b->v);
422 qcompose (b->mq, b->q, parent->mq);
423 vadd (b->mv, v, parent->mv);
424 #ifdef USE_ALTIVEC
425 b->cm[3] = b->mv[0];
426 b->cm[7] = b->mv[1];
427 b->cm[11] = b->mv[2];
428 #endif
431 CAMLreturn (Val_unit);
434 CAMLprim value ml_skin_set_anim (value anim_v)
436 int i;
437 CAMLparam1 (anim_v);
438 CAMLlocal1 (floats_v);
439 State *s = &glob_state;
440 struct bone *b = s->bones + 1;
442 for (i = 0; i < s->num_bones; ++i, ++b) {
443 floats_v = Field (anim_v, i);
444 b->aq[0] = Double_field (floats_v, 0);
445 b->aq[1] = Double_field (floats_v, 1);
446 b->aq[2] = Double_field (floats_v, 2);
447 b->aq[3] = Double_field (floats_v, 3);
450 b = s->bones + 1;
451 for (i = 0; i < s->num_bones; ++i, ++b) {
452 float v[3], q[4], q1[4];;
453 struct bone *parent = &s->bones[b->parent];
455 qapply (v, parent->amq, b->v);
456 qcompose (b->amq, b->aq, parent->amq);
457 vadd (b->amv, v, parent->amv);
459 qconjugate (q1, b->mq);
460 qcompose (q, q1, b->amq);
461 q2matrixt (b->cm, q, b->amv);
464 CAMLreturn (Val_unit);
467 CAMLprim value ml_skin_anim (value unit_v)
469 GLboolean ret;
470 CAMLparam1 (unit_v);
471 float *vdst, *vsrc, *ndst, *nsrc;
472 State *s = &glob_state;
474 if (use_vbo) {
475 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
476 vdst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
477 if (!vdst) {
478 fprintf (stderr, "glMapBuffer for vertices failed\n");
479 exit (EXIT_FAILURE);
482 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
483 ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
484 if (!ndst) {
485 fprintf (stderr, "glMapBuffer for normals failed\n");
486 exit (EXIT_FAILURE);
489 else {
490 vdst = s->bufs[V_IDX];
491 ndst = s->bufs[N_IDX];
494 vsrc = s->ptrs[V_IDX];
495 nsrc = s->ptrs[N_IDX];
497 translate (s, vdst, ndst);
499 if (use_vbo) {
500 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
501 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
502 if (ret == GL_FALSE) {
503 fprintf (stderr, "glUnmapBuffer for vertices failed\n");
504 exit (EXIT_FAILURE);
507 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
508 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
509 if (ret == GL_FALSE) {
510 fprintf (stderr, "glUnmapBuffer for normals failed\n");
511 exit (EXIT_FAILURE);
515 CAMLreturn (Val_unit);