Cosmetics
[dormin.git] / skin.c
blobca540e36f9472ac09665ee153d8b7ab6ed7dd38d
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #ifdef USE_ALTIVEC
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
19 #include <altivec.h>
20 #ifndef __APPLE__
21 #include <malloc.h>
22 #define simd_alloc(s) memalign (16, s)
23 #else
24 #define simd_alloc malloc
25 #endif
26 #define A16 __attribute__ ((aligned (16)))
27 #define STRIDE 16
28 #define V_ELEMS 4
29 #define AL16(i) (((i)+15)&~15)
31 #else
33 #define STRIDE 0
34 #define V_ELEMS 3
35 #define simd_alloc(s) malloc (s)
36 #define A16
37 #define AL16(i) (i)
39 #endif
41 struct skin {
42 #ifdef USE_ALTIVEC
43 float weights[12];
44 #else
45 float weights[3];
46 #endif
47 int boneindices[3];
48 int num_bones;
49 } A16;
51 struct bone {
52 float v[4];
53 float q[4];
55 float mv[4];
56 float mq[4];
58 float aq[4];
59 float amq[4];
60 float amv[4];
62 float cm[16];
63 int parent;
64 } A16;
66 typedef struct {
67 int num_bones;
68 int num_vertices;
69 GLuint bufid[2];
70 void *ptrs[2];
71 void *bufs[COUNT];
72 struct skin *skin;
73 struct bone *bones;
74 } State;
76 static State glob_state;
77 static int use_vbo;
79 static void copy_vertices (float *p, int num_vertices, value a_v)
81 int i, k;
83 for (i = 0, k = 0; i < num_vertices; ++i, p += V_ELEMS) {
84 p[0] = Double_field (a_v, k++);
85 p[1] = Double_field (a_v, k++);
86 p[2] = Double_field (a_v, k++);
87 #ifdef USE_ALTIVEC
88 p[3] = 1.0;
89 #endif
93 static void set_geom (State *s, void **ptrs, value vertexa_v, value normala_v,
94 value uva_v, value skin_v, value colors_v)
96 int i;
97 float *p;
98 int num_vertices;
99 struct skin *skin;
101 num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
103 copy_vertices (ptrs[V_IDX], num_vertices, vertexa_v);
104 copy_vertices (ptrs[N_IDX], num_vertices, normala_v);
106 for (i = 0, p = ptrs[UV_IDX]; i < num_vertices * 2; ++i) {
107 p[i] = Double_field (uva_v, i);
109 memcpy (ptrs[C_IDX], String_val (colors_v), num_vertices * 4);
111 skin = s->skin;
112 for (i = 0; i < num_vertices; ++i) {
113 int j;
114 value v;
116 v = Field (skin_v, i);
117 skin[i].num_bones = Int_val (Field (v, 3));
119 for (j = 0; j < skin[i].num_bones; ++j) {
120 double val, w;
122 val = Double_val (Bp_val (Field (v, j)));
124 skin[i].boneindices[j] = (int) val;
125 w = val - skin[i].boneindices[j];
126 #ifdef USE_ALTIVEC
127 vector float vw = {w,w,w,w};
129 vec_st (vw, j*16, skin[i].weights);
130 #else
131 skin[i].weights[j] = w;
132 #endif
133 skin[i].boneindices[j] += 1;
138 static void skin_init (State *s, value vertexa_v, value normala_v,
139 value uva_v, value skin_v, value colors_v)
141 char *p;
142 GLsizei sizevn, sizev, sizeu, sizec;
143 void *ptrs[COUNT];
145 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
147 sizev = V_ELEMS * sizeof (GLfloat) * s->num_vertices;
148 sizeu = 2 * sizeof (GLfloat) * s->num_vertices;
149 sizec = 4 * s->num_vertices;
151 sizevn = sizev * 2;
153 p = simd_alloc (AL16 (sizevn) + s->num_vertices * sizeof (struct skin));
154 s->skin = (struct skin *) (p + AL16 (sizevn));
155 s->ptrs[0] = ptrs[V_IDX] = p;
156 ptrs[N_IDX] = p + sizev;
158 p = stat_alloc (sizec + sizeu);
159 s->ptrs[1] = ptrs[UV_IDX] = p;
160 ptrs[C_IDX] = p + sizeu;
162 set_geom (s, ptrs, vertexa_v, normala_v, uva_v, skin_v, colors_v);
164 if (use_vbo) {
165 glGenBuffers (2, s->bufid);
167 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
168 glBufferData (GL_ARRAY_BUFFER, sizevn, s->ptrs[0], GL_DYNAMIC_DRAW);
170 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
171 glBufferData (GL_ARRAY_BUFFER, sizeu+sizec, s->ptrs[1], GL_STATIC_DRAW);
173 glBindBuffer (GL_ARRAY_BUFFER, 0);
174 stat_free (s->ptrs[1]);
176 p = NULL;
177 s->bufs[V_IDX] = p;
178 s->bufs[N_IDX] = p + sizev;
179 s->bufs[UV_IDX] = p;
180 s->bufs[C_IDX] = p + sizeu;
182 else {
183 p = simd_alloc (sizevn);
184 s->bufs[V_IDX] = p;
185 s->bufs[N_IDX] = p + sizev;
186 s->bufs[UV_IDX] = ptrs[UV_IDX];
187 s->bufs[C_IDX] = ptrs[C_IDX];
189 memcpy (p, s->ptrs[0], sizevn);
193 CAMLprim value ml_skin_draw_begin (value unit_v)
195 State *s = &glob_state;
197 (void) unit_v;
199 glEnableClientState (GL_VERTEX_ARRAY);
200 glEnableClientState (GL_NORMAL_ARRAY);
201 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
202 glEnableClientState (GL_COLOR_ARRAY);
204 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
205 glVertexPointer (3, GL_FLOAT, V_ELEMS * sizeof (GLfloat), s->bufs[V_IDX]);
206 glNormalPointer (GL_FLOAT, V_ELEMS * sizeof (GLfloat), s->bufs[N_IDX]);
208 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
209 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
210 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
212 return Val_unit;
215 CAMLprim value ml_skin_draw_end (value unit_v)
217 (void) unit_v;
218 glDisableClientState (GL_VERTEX_ARRAY);
219 glDisableClientState (GL_NORMAL_ARRAY);
220 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
221 glDisableClientState (GL_COLOR_ARRAY);
222 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
223 return Val_unit;
226 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
228 CAMLparam2 (use_vbo_v, geom_v);
229 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
230 State *s = &glob_state;
232 use_vbo = Bool_val (use_vbo_v);
233 #ifdef _WIN32
234 if (use_vbo) {
235 GETPA (BindBuffer);
236 GETPA (GenBuffers);
237 GETPA (BufferData);
238 GETPA (BufferSubData);
239 GETPA (MapBuffer);
240 GETPA (UnmapBuffer);
242 #endif
243 vertexa_v = Field (geom_v, 0);
244 normala_v = Field (geom_v, 1);
245 uva_v = Field (geom_v, 2);
246 skin_v = Field (geom_v, 3);
247 colors_v = Field (geom_v, 4);
249 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
250 CAMLreturn (Val_unit);
253 #ifdef TIMING
254 #include <err.h>
255 #include <sys/time.h>
256 static double now (void)
258 struct timeval tv;
260 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
261 return tv.tv_sec + tv.tv_usec * 1e-6;
263 #endif
265 static void translate (State *s, float *vdst, float *ndst)
267 int i, j;
268 struct bone *b;
269 float *vsrc = s->ptrs[0];
270 float *nsrc = vsrc + s->num_vertices * V_ELEMS;
271 struct skin *skin = s->skin;
273 #ifdef TIMING
274 double S = now (), E;
275 #endif
277 #ifdef USE_ALTIVEC
278 for (i = 0; i < s->num_vertices; ++i, ++skin) {
279 vector float v, n, vs, ns, vz;
280 vector float r0, r1, r2, r3, nx, ny, nz;
282 v = n = vz = (vector float) vec_splat_u32 (0);
284 vs = vec_ld (i<<4, vsrc);
285 ns = vec_ld (i<<4, nsrc);
287 nx = vec_splat (ns, 0);
288 ny = vec_splat (ns, 1);
289 nz = vec_splat (ns, 2);
291 for (j = 0; j < skin->num_bones; ++j) {
292 vector float vw, x, y, z, t0, t1, t2;
294 b = &s->bones[skin->boneindices[j]];
296 vw = vec_ld (j<<4, skin->weights);
298 r0 = vec_ld ( 0, b->cm);
299 r1 = vec_ld (16, b->cm);
300 r2 = vec_ld (32, b->cm);
301 r3 = vec_ld (48, b->cm);
303 x = vec_splat (vs, 0);
304 y = vec_splat (vs, 1);
305 z = vec_splat (vs, 2);
307 t0 = vec_madd (r0, x, r3);
308 t1 = vec_madd (r1, y, t0);
309 t2 = vec_madd (r2, z, t1);
310 v = vec_madd (t2, vw, v);
312 t0 = vec_madd (r0, nx, vz);
313 t1 = vec_madd (r1, ny, t0);
314 t2 = vec_madd (r2, nz, t1);
315 n = vec_madd (t2, vw, n);
317 vec_st (v, i<<4, vdst);
318 vec_st (n, i<<4, ndst);
320 #else
321 for (i = 0; i < s->num_vertices; ++i,
322 vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin)
324 if (skin->num_bones == 1) {
325 b = &s->bones[skin->boneindices[0]];
327 mapply_to_point (vdst, b->cm, vsrc);
328 mapply_to_vector (ndst, b->cm, nsrc);
330 else
332 int z = 0;
333 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w;
335 for (j = 0; j < skin->num_bones; ++j) {
336 w = skin->weights[j];
337 b = &s->bones[skin->boneindices[j]];
339 if (w < 0.0) z = 1;
341 mapply_to_point (v1, b->cm, vsrc);
342 v1[0] *= w;
343 v1[1] *= w;
344 v1[2] *= w;
346 mapply_to_vector (v0, b->cm, nsrc);
347 v0[0] *= w;
348 v0[1] *= w;
349 v0[2] *= w;
351 vaddto (v, v1);
352 vaddto (n, v0);
355 /* hack hack */
356 if (z) vcopy (vdst, vsrc);
357 else vcopy (vdst, v);
358 vcopy (ndst, n);
361 #endif
363 #ifdef TIMING
364 E = now ();
365 printf ("took %f sec\n", E - S);
366 #endif
369 CAMLprim value ml_skin_set_skel (value skel_v)
371 int i;
372 size_t size;
373 struct bone *b;
374 CAMLparam1 (skel_v);
375 CAMLlocal2 (v, floats_v);
376 State *s = &glob_state;
378 s->num_bones = Wosize_val (skel_v);
379 size = (s->num_bones + 1) * sizeof (struct bone);
380 s->bones = b = simd_alloc (size);
382 memset (b, 0, size);
383 b->parent = -1;
384 b->q[3] = 1.0;
385 b->mq[3] = 1.0;
386 b->aq[3] = 1.0;
387 b->amq[3] = 1.0;
388 b++;
390 for (i = 0; i < s->num_bones; ++i, ++b) {
391 v = Field (skel_v, i);
392 floats_v = Field (v, 1);
394 b->parent = Int_val (Field (v, 0)) + 1;
396 b->v[0] = Double_field (floats_v, 1);
397 b->v[1] = Double_field (floats_v, 2);
398 b->v[2] = Double_field (floats_v, 3);
400 b->q[0] = Double_field (floats_v, 5);
401 b->q[1] = Double_field (floats_v, 6);
402 b->q[2] = Double_field (floats_v, 7);
403 b->q[3] = Double_field (floats_v, 8);
406 b = s->bones + 1;
407 for (i = 0; i < s->num_bones; ++i, ++b) {
408 float v[3];
409 struct bone *parent = &s->bones[b->parent];
411 qapply (v, parent->mq, b->v);
412 qcompose (b->mq, b->q, parent->mq);
413 vadd (b->mv, v, parent->mv);
414 #ifdef USE_ALTIVEC
415 b->cm[3] = b->mv[0];
416 b->cm[7] = b->mv[1];
417 b->cm[11] = b->mv[2];
418 #endif
421 CAMLreturn (Val_unit);
424 CAMLprim value ml_skin_set_anim (value anim_v)
426 int i;
427 CAMLparam1 (anim_v);
428 CAMLlocal1 (floats_v);
429 State *s = &glob_state;
430 struct bone *b = s->bones + 1;
432 for (i = 0; i < s->num_bones; ++i, ++b) {
433 floats_v = Field (anim_v, i);
434 b->aq[0] = Double_field (floats_v, 0);
435 b->aq[1] = Double_field (floats_v, 1);
436 b->aq[2] = Double_field (floats_v, 2);
437 b->aq[3] = Double_field (floats_v, 3);
440 b = s->bones + 1;
441 for (i = 0; i < s->num_bones; ++i, ++b) {
442 float v[4], v1[4], q[4], q1[4];
443 struct bone *parent = &s->bones[b->parent];
445 qapply (v, parent->amq, b->v);
446 qcompose (b->amq, b->aq, parent->amq);
447 vadd (b->amv, v, parent->amv);
449 qconjugate (q1, b->mq);
450 qcompose (q, q1, b->amq);
452 qapply (v, q, b->mv);
453 vsub (v1, b->amv, v);
454 q2matrixt (b->cm, q, v1);
457 CAMLreturn (Val_unit);
460 CAMLprim value ml_skin_anim (value unit_v)
462 GLboolean ret;
463 CAMLparam1 (unit_v);
464 float *vdst, *ndst;
465 State *s = &glob_state;
467 if (use_vbo) {
468 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
469 vdst = ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
470 if (!vdst) caml_failwith ("glMapBuffer failed");
472 ndst += (float *) s->bufs[N_IDX] - (float *) s->bufs[V_IDX];
474 else {
475 vdst = s->bufs[V_IDX];
476 ndst = s->bufs[N_IDX];
479 translate (s, vdst, ndst);
481 if (use_vbo) {
482 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
483 if (ret == GL_FALSE) caml_failwith ("glUnmapBuffer failed");
486 CAMLreturn (Val_unit);
489 #ifndef GL_GENERATE_MIPMAP
490 #define GL_GENERATE_MIPMAP 0x8191
491 #endif
493 CAMLprim value ml_set_generate_mipmaps (value unit_v)
495 (void) unit_v;
496 glTexParameteri (GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
497 return Val_unit;