Remove negative weight hack
[dormin.git] / skin.c
blobf3e1cf6f22dfc610f1bb7c83ead825c19213a2db
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #ifdef USE_ALTIVEC
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
19 #include <altivec.h>
20 #ifndef __APPLE__
21 #include <malloc.h>
22 #define simd_alloc(s) memalign (16, s)
23 #else
24 #define simd_alloc malloc
25 #endif
26 #define A16 __attribute__ ((aligned (16)))
27 #define STRIDE 16
28 #define V_ELEMS 4
29 #define AL16(i) (((i)+15)&~15)
31 #else
33 #define STRIDE 0
34 #define V_ELEMS 3
35 #define simd_alloc(s) malloc (s)
36 #define A16
37 #define AL16(i) (i)
39 #endif
41 struct skin {
42 #ifdef USE_ALTIVEC
43 float weights[12];
44 #else
45 float weights[3];
46 #endif
47 int boneindices[3];
48 int num_bones;
49 } A16;
51 struct bone {
52 float v[4];
53 float q[4];
55 float mv[4];
56 float mq[4];
58 float aq[4];
59 float amq[4];
60 float amv[4];
62 float cm[16];
63 int parent;
64 } A16;
66 typedef struct {
67 int num_bones;
68 int num_vertices;
69 GLuint bufid[2];
70 void *ptrs[2];
71 void *bufs[COUNT];
72 struct skin *skin;
73 struct bone *bones;
74 } State;
76 static State glob_state;
77 static int use_vbo;
79 static void copy_vertices (float *p, int num_vertices, value a_v)
81 int i, k;
83 for (i = 0, k = 0; i < num_vertices; ++i, p += V_ELEMS) {
84 p[0] = Double_field (a_v, k++);
85 p[1] = Double_field (a_v, k++);
86 p[2] = Double_field (a_v, k++);
87 #ifdef USE_ALTIVEC
88 p[3] = 1.0;
89 #endif
93 static void set_geom (State *s, void **ptrs, value vertexa_v, value normala_v,
94 value uva_v, value skin_v, value colors_v)
96 int i;
97 float *p;
98 int num_vertices;
99 struct skin *skin;
101 num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
103 copy_vertices (ptrs[V_IDX], num_vertices, vertexa_v);
104 copy_vertices (ptrs[N_IDX], num_vertices, normala_v);
106 for (i = 0, p = ptrs[UV_IDX]; i < num_vertices * 2; ++i) {
107 p[i] = Double_field (uva_v, i);
109 memcpy (ptrs[C_IDX], String_val (colors_v), num_vertices * 4);
111 skin = s->skin;
112 for (i = 0; i < num_vertices; ++i) {
113 int j;
114 value v;
116 v = Field (skin_v, i);
117 skin[i].num_bones = Int_val (Field (v, 3));
119 for (j = 0; j < skin[i].num_bones; ++j) {
120 double val, w;
122 val = Double_val (Bp_val (Field (v, j)));
124 skin[i].boneindices[j] = (int) val;
125 w = val - skin[i].boneindices[j];
126 #ifdef USE_ALTIVEC
127 vector float vw = {w,w,w,w};
129 vec_st (vw, j*16, skin[i].weights);
130 #else
131 skin[i].weights[j] = w;
132 #endif
133 skin[i].boneindices[j] += 1;
138 static void skin_init (State *s, value vertexa_v, value normala_v,
139 value uva_v, value skin_v, value colors_v)
141 char *p;
142 GLsizei sizevn, sizev, sizeu, sizec;
143 void *ptrs[COUNT];
145 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
147 sizev = V_ELEMS * sizeof (GLfloat) * s->num_vertices;
148 sizeu = 2 * sizeof (GLfloat) * s->num_vertices;
149 sizec = 4 * s->num_vertices;
151 sizevn = sizev * 2;
153 p = simd_alloc (AL16 (sizevn) + s->num_vertices * sizeof (struct skin));
154 s->skin = (struct skin *) (p + AL16 (sizevn));
155 s->ptrs[0] = ptrs[V_IDX] = p;
156 ptrs[N_IDX] = p + sizev;
158 p = stat_alloc (sizec + sizeu);
159 s->ptrs[1] = ptrs[UV_IDX] = p;
160 ptrs[C_IDX] = p + sizeu;
162 set_geom (s, ptrs, vertexa_v, normala_v, uva_v, skin_v, colors_v);
164 if (use_vbo) {
165 glGenBuffers (2, s->bufid);
167 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
168 glBufferData (GL_ARRAY_BUFFER, sizevn, s->ptrs[0], GL_DYNAMIC_DRAW);
170 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
171 glBufferData (GL_ARRAY_BUFFER, sizeu+sizec, s->ptrs[1], GL_STATIC_DRAW);
173 glBindBuffer (GL_ARRAY_BUFFER, 0);
174 stat_free (s->ptrs[1]);
176 p = NULL;
177 s->bufs[V_IDX] = p;
178 s->bufs[N_IDX] = p + sizev;
179 s->bufs[UV_IDX] = p;
180 s->bufs[C_IDX] = p + sizeu;
182 else {
183 p = simd_alloc (sizevn);
184 s->bufs[V_IDX] = p;
185 s->bufs[N_IDX] = p + sizev;
186 s->bufs[UV_IDX] = ptrs[UV_IDX];
187 s->bufs[C_IDX] = ptrs[C_IDX];
189 memcpy (p, s->ptrs[0], sizevn);
193 CAMLprim value ml_skin_draw_begin (value unit_v)
195 State *s = &glob_state;
197 (void) unit_v;
199 glEnableClientState (GL_VERTEX_ARRAY);
200 glEnableClientState (GL_NORMAL_ARRAY);
201 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
202 glEnableClientState (GL_COLOR_ARRAY);
204 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
205 glVertexPointer (3, GL_FLOAT, V_ELEMS * sizeof (GLfloat), s->bufs[V_IDX]);
206 glNormalPointer (GL_FLOAT, V_ELEMS * sizeof (GLfloat), s->bufs[N_IDX]);
208 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
209 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
210 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
212 return Val_unit;
215 CAMLprim value ml_skin_draw_end (value unit_v)
217 (void) unit_v;
218 glDisableClientState (GL_VERTEX_ARRAY);
219 glDisableClientState (GL_NORMAL_ARRAY);
220 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
221 glDisableClientState (GL_COLOR_ARRAY);
222 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
223 return Val_unit;
226 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
228 CAMLparam2 (use_vbo_v, geom_v);
229 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
230 State *s = &glob_state;
232 use_vbo = Bool_val (use_vbo_v);
233 #ifdef _WIN32
234 if (use_vbo) {
235 GETPA (BindBuffer);
236 GETPA (GenBuffers);
237 GETPA (BufferData);
238 GETPA (BufferSubData);
239 GETPA (MapBuffer);
240 GETPA (UnmapBuffer);
242 #endif
243 vertexa_v = Field (geom_v, 0);
244 normala_v = Field (geom_v, 1);
245 uva_v = Field (geom_v, 2);
246 skin_v = Field (geom_v, 3);
247 colors_v = Field (geom_v, 4);
249 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
250 CAMLreturn (Val_unit);
253 #ifdef TIMING
254 #include <err.h>
255 #include <sys/time.h>
256 static double now (void)
258 struct timeval tv;
260 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
261 return tv.tv_sec + tv.tv_usec * 1e-6;
263 #endif
265 static void translate (State *s, float *vdst, float *ndst)
267 int i, j;
268 struct bone *b;
269 float *vsrc = s->ptrs[0];
270 float *nsrc = vsrc + s->num_vertices * V_ELEMS;
271 struct skin *skin = s->skin;
273 #ifdef TIMING
274 double S = now (), E;
275 #endif
277 #ifdef USE_ALTIVEC
278 for (i = 0; i < s->num_vertices; ++i, ++skin) {
279 vector float v, n, vs, ns, vz;
280 vector float r0, r1, r2, r3, nx, ny, nz;
282 v = n = vz = (vector float) vec_splat_u32 (0);
284 vs = vec_ld (i<<4, vsrc);
285 ns = vec_ld (i<<4, nsrc);
287 nx = vec_splat (ns, 0);
288 ny = vec_splat (ns, 1);
289 nz = vec_splat (ns, 2);
291 for (j = 0; j < skin->num_bones; ++j) {
292 vector float vw, x, y, z, t0, t1, t2;
294 b = &s->bones[skin->boneindices[j]];
296 vw = vec_ld (j<<4, skin->weights);
298 r0 = vec_ld ( 0, b->cm);
299 r1 = vec_ld (16, b->cm);
300 r2 = vec_ld (32, b->cm);
301 r3 = vec_ld (48, b->cm);
303 x = vec_splat (vs, 0);
304 y = vec_splat (vs, 1);
305 z = vec_splat (vs, 2);
307 t0 = vec_madd (r0, x, r3);
308 t1 = vec_madd (r1, y, t0);
309 t2 = vec_madd (r2, z, t1);
310 v = vec_madd (t2, vw, v);
312 t0 = vec_madd (r0, nx, vz);
313 t1 = vec_madd (r1, ny, t0);
314 t2 = vec_madd (r2, nz, t1);
315 n = vec_madd (t2, vw, n);
317 vec_st (v, i<<4, vdst);
318 vec_st (n, i<<4, ndst);
320 #else
321 for (i = 0; i < s->num_vertices; ++i,
322 vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin)
324 if (skin->num_bones == 1) {
325 b = &s->bones[skin->boneindices[0]];
327 mapply_to_point (vdst, b->cm, vsrc);
328 mapply_to_vector (ndst, b->cm, nsrc);
330 else
332 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w;
334 for (j = 0; j < skin->num_bones; ++j) {
335 w = skin->weights[j];
336 b = &s->bones[skin->boneindices[j]];
338 mapply_to_point (v1, b->cm, vsrc);
339 v1[0] *= w;
340 v1[1] *= w;
341 v1[2] *= w;
343 mapply_to_vector (v0, b->cm, nsrc);
344 v0[0] *= w;
345 v0[1] *= w;
346 v0[2] *= w;
348 vaddto (v, v1);
349 vaddto (n, v0);
352 vcopy (vdst, v);
353 vcopy (ndst, n);
356 #endif
358 #ifdef TIMING
359 E = now ();
360 printf ("took %f sec\n", E - S);
361 #endif
364 CAMLprim value ml_skin_set_skel (value skel_v)
366 int i;
367 size_t size;
368 struct bone *b;
369 CAMLparam1 (skel_v);
370 CAMLlocal2 (v, floats_v);
371 State *s = &glob_state;
373 s->num_bones = Wosize_val (skel_v);
374 size = (s->num_bones + 1) * sizeof (struct bone);
375 s->bones = b = simd_alloc (size);
377 memset (b, 0, size);
378 b->parent = -1;
379 b->q[3] = 1.0;
380 b->mq[3] = 1.0;
381 b->aq[3] = 1.0;
382 b->amq[3] = 1.0;
383 b++;
385 for (i = 0; i < s->num_bones; ++i, ++b) {
386 v = Field (skel_v, i);
387 floats_v = Field (v, 1);
389 b->parent = Int_val (Field (v, 0)) + 1;
391 b->v[0] = Double_field (floats_v, 1);
392 b->v[1] = Double_field (floats_v, 2);
393 b->v[2] = Double_field (floats_v, 3);
395 b->q[0] = Double_field (floats_v, 5);
396 b->q[1] = Double_field (floats_v, 6);
397 b->q[2] = Double_field (floats_v, 7);
398 b->q[3] = Double_field (floats_v, 8);
401 b = s->bones + 1;
402 for (i = 0; i < s->num_bones; ++i, ++b) {
403 float v[3];
404 struct bone *parent = &s->bones[b->parent];
406 qapply (v, parent->mq, b->v);
407 qcompose (b->mq, b->q, parent->mq);
408 vadd (b->mv, v, parent->mv);
409 #ifdef USE_ALTIVEC
410 b->cm[3] = b->mv[0];
411 b->cm[7] = b->mv[1];
412 b->cm[11] = b->mv[2];
413 #endif
416 CAMLreturn (Val_unit);
419 CAMLprim value ml_skin_set_anim (value anim_v)
421 int i;
422 CAMLparam1 (anim_v);
423 CAMLlocal1 (floats_v);
424 State *s = &glob_state;
425 struct bone *b = s->bones + 1;
427 for (i = 0; i < s->num_bones; ++i, ++b) {
428 floats_v = Field (anim_v, i);
429 b->aq[0] = Double_field (floats_v, 0);
430 b->aq[1] = Double_field (floats_v, 1);
431 b->aq[2] = Double_field (floats_v, 2);
432 b->aq[3] = Double_field (floats_v, 3);
435 b = s->bones + 1;
436 for (i = 0; i < s->num_bones; ++i, ++b) {
437 float v[4], v1[4], q[4], q1[4];
438 struct bone *parent = &s->bones[b->parent];
440 qapply (v, parent->amq, b->v);
441 qcompose (b->amq, b->aq, parent->amq);
442 vadd (b->amv, v, parent->amv);
444 qconjugate (q1, b->mq);
445 qcompose (q, q1, b->amq);
447 qapply (v, q, b->mv);
448 vsub (v1, b->amv, v);
449 q2matrixt (b->cm, q, v1);
452 CAMLreturn (Val_unit);
455 CAMLprim value ml_skin_anim (value unit_v)
457 GLboolean ret;
458 CAMLparam1 (unit_v);
459 float *vdst, *ndst;
460 State *s = &glob_state;
462 if (use_vbo) {
463 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
464 vdst = ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
465 if (!vdst) caml_failwith ("glMapBuffer failed");
467 ndst += (float *) s->bufs[N_IDX] - (float *) s->bufs[V_IDX];
469 else {
470 vdst = s->bufs[V_IDX];
471 ndst = s->bufs[N_IDX];
474 translate (s, vdst, ndst);
476 if (use_vbo) {
477 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
478 if (ret == GL_FALSE) caml_failwith ("glUnmapBuffer failed");
481 CAMLreturn (Val_unit);
484 #ifndef GL_GENERATE_MIPMAP
485 #define GL_GENERATE_MIPMAP 0x8191
486 #endif
488 CAMLprim value ml_set_generate_mipmaps (value unit_v)
490 (void) unit_v;
491 glTexParameteri (GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
492 return Val_unit;