Reorganize build parameters a bit
[dormin.git] / skin.c
blob569ac698355c9ffc2324c53a34b0dccb29eba3af
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #define ALNN(n, i) (((i)+(n-1))&~(n-1))
17 #ifdef G4
18 #define DSTAL 32
19 #else
20 #define DSTAL 16
21 #endif
23 #ifdef USE_ALTIVEC
24 /* Altivec code derived from: */
25 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
26 #include <altivec.h>
27 #include <malloc.h>
28 #define simd_alloc(b, s) memalign (b, s)
29 #define A16 __attribute__ ((aligned (16)))
30 #define AL16(i) ALNN (16, i)
31 #define AL32(i) ALNN (32, i)
32 #else
33 #define simd_alloc(b, s) malloc (s)
34 #define A16
35 #define AL16(i) (i)
36 #define AL32(i) (i)
37 #endif
39 struct skin {
40 float weights[3];
41 int boneindices[3];
42 int num_bones;
43 } A16;
45 struct bone {
46 float v[4];
47 float q[4];
49 float mv[4];
50 float mq[4];
52 float aq[4];
53 float amq[4];
54 float amv[4];
56 float cm[16];
57 int parent;
58 } A16;
60 typedef struct {
61 int num_bones;
62 int num_vertices;
63 GLuint bufid[2];
64 void *ptrs[2];
65 void *bufs[COUNT];
66 struct skin *skin;
67 struct bone *bones;
68 } State;
70 static State glob_state;
71 static int use_vbo;
73 static void copy_vertices (float *p, int num_vertices, value a_v)
75 int i, k;
77 for (i = 0, k = 0; i < num_vertices; ++i, p += 3) {
78 p[0] = Double_field (a_v, k++);
79 p[1] = Double_field (a_v, k++);
80 p[2] = Double_field (a_v, k++);
84 static void set_geom (State *s, void **ptrs, value vertexa_v, value normala_v,
85 value uva_v, value skin_v, value colors_v)
87 int i;
88 float *p;
89 int num_vertices;
90 struct skin *skin;
92 num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
94 copy_vertices (ptrs[V_IDX], num_vertices, vertexa_v);
95 copy_vertices (ptrs[N_IDX], num_vertices, normala_v);
97 for (i = 0, p = ptrs[UV_IDX]; i < num_vertices * 2; ++i) {
98 p[i] = Double_field (uva_v, i);
100 memcpy (ptrs[C_IDX], String_val (colors_v), num_vertices * 4);
102 skin = s->skin;
103 for (i = 0; i < num_vertices; ++i) {
104 int j;
105 value v;
107 v = Field (skin_v, i);
108 skin[i].num_bones = Int_val (Field (v, 3));
110 for (j = 0; j < skin[i].num_bones; ++j) {
111 double val, w;
113 val = Double_val (Bp_val (Field (v, j)));
115 skin[i].boneindices[j] = (int) val;
116 w = val - skin[i].boneindices[j];
117 skin[i].weights[j] = w;
118 skin[i].boneindices[j] += 1;
123 static void skin_init (State *s, value vertexa_v, value normala_v,
124 value uva_v, value skin_v, value colors_v)
126 char *p;
127 GLsizei sizevn, sizev, sizeu, sizec;
128 void *ptrs[COUNT];
130 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
132 sizev = AL32 (3 * sizeof (GLfloat) * s->num_vertices);
133 sizeu = 2 * sizeof (GLfloat) * s->num_vertices;
134 sizec = 4 * s->num_vertices;
136 sizevn = sizev * 2;
138 p = simd_alloc (16, AL16 (sizevn) + s->num_vertices * sizeof (struct skin));
139 s->skin = (struct skin *) (p + AL16 (sizevn));
140 s->ptrs[0] = ptrs[V_IDX] = p;
141 ptrs[N_IDX] = p + sizev;
143 p = stat_alloc (sizec + sizeu);
144 s->ptrs[1] = ptrs[UV_IDX] = p;
145 ptrs[C_IDX] = p + sizeu;
147 set_geom (s, ptrs, vertexa_v, normala_v, uva_v, skin_v, colors_v);
149 if (use_vbo) {
150 glGenBuffers (2, s->bufid);
152 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
153 glBufferData (GL_ARRAY_BUFFER, sizevn, s->ptrs[0], GL_DYNAMIC_DRAW);
155 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
156 glBufferData (GL_ARRAY_BUFFER, sizeu+sizec, s->ptrs[1], GL_STATIC_DRAW);
158 glBindBuffer (GL_ARRAY_BUFFER, 0);
159 stat_free (s->ptrs[1]);
161 p = NULL;
162 s->bufs[V_IDX] = p;
163 s->bufs[N_IDX] = p + sizev;
164 s->bufs[UV_IDX] = p;
165 s->bufs[C_IDX] = p + sizeu;
167 else {
168 p = simd_alloc (DSTAL, sizevn);
169 s->bufs[V_IDX] = p;
170 s->bufs[N_IDX] = p + sizev;
171 s->bufs[UV_IDX] = ptrs[UV_IDX];
172 s->bufs[C_IDX] = ptrs[C_IDX];
174 memcpy (p, s->ptrs[0], sizevn);
178 CAMLprim value ml_skin_draw_begin (value unit_v)
180 State *s = &glob_state;
182 (void) unit_v;
184 glEnableClientState (GL_VERTEX_ARRAY);
185 glEnableClientState (GL_NORMAL_ARRAY);
186 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
187 glEnableClientState (GL_COLOR_ARRAY);
189 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
190 glVertexPointer (3, GL_FLOAT, 3 * sizeof (GLfloat), s->bufs[V_IDX]);
191 glNormalPointer (GL_FLOAT, 3 * sizeof (GLfloat), s->bufs[N_IDX]);
193 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
194 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
195 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
197 return Val_unit;
200 CAMLprim value ml_skin_draw_end (value unit_v)
202 (void) unit_v;
203 glDisableClientState (GL_VERTEX_ARRAY);
204 glDisableClientState (GL_NORMAL_ARRAY);
205 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
206 glDisableClientState (GL_COLOR_ARRAY);
207 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
208 return Val_unit;
211 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
213 CAMLparam2 (use_vbo_v, geom_v);
214 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
215 State *s = &glob_state;
217 use_vbo = Bool_val (use_vbo_v);
218 #ifdef _WIN32
219 if (use_vbo) {
220 GETPA (BindBuffer);
221 GETPA (GenBuffers);
222 GETPA (BufferData);
223 GETPA (BufferSubData);
224 GETPA (MapBuffer);
225 GETPA (UnmapBuffer);
227 #endif
228 vertexa_v = Field (geom_v, 0);
229 normala_v = Field (geom_v, 1);
230 uva_v = Field (geom_v, 2);
231 skin_v = Field (geom_v, 3);
232 colors_v = Field (geom_v, 4);
234 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
235 CAMLreturn (Val_unit);
238 #ifdef TIMING
239 #include <err.h>
240 #include <sys/time.h>
241 static double now (void)
243 struct timeval tv;
245 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
246 return tv.tv_sec + tv.tv_usec * 1e-6;
248 #endif
250 #ifdef USE_ALTIVEC
252 #define DCB(o, b, i) __asm__ __volatile__ (#o " %0, %1" ::"b"(b),"r"(i))
254 static vector float appbones (State *s,
255 struct skin *skin,
256 vector float x,
257 vector float y,
258 vector float z,
259 vector float nx,
260 vector float ny,
261 vector float nz,
262 vector float *np)
264 int j;
265 struct bone *b;
266 vector float vz = (vector float) vec_splat_u32 (0);
267 vector float v, w, n;
268 vector unsigned char S = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4<<3};
270 v = n = vz;
271 w = vec_ld (0, skin->weights);
273 j = skin->num_bones;
274 for (j = 0; j < skin->num_bones; ++j) {
275 vector float t0, t1, t2, t3, t4, t5, r0, r1, r2, r3, vw;
277 b = &s->bones[skin->boneindices[j]];
278 vw = vec_splat (w, 0);
279 w = vec_slo (w, S);
281 r0 = vec_ld ( 0, b->cm);
282 r1 = vec_ld (16, b->cm);
283 r2 = vec_ld (32, b->cm);
284 r3 = vec_ld (48, b->cm);
286 t0 = vec_madd (r0, x, r3);
287 t1 = vec_madd (r1, y, t0);
288 t2 = vec_madd (r2, z, t1);
289 v = vec_madd (t2, vw, v);
291 t3 = vec_madd (r0, nx, vz);
292 t4 = vec_madd (r1, ny, t0);
293 t5 = vec_madd (r2, nz, t4);
294 n = vec_madd (t5, vw, n);
298 *np = n;
299 return v;
301 #endif
303 static void translate (State *s, float *vdst, float *ndst)
305 int i, j;
306 struct bone *b;
307 float *vsrc = s->ptrs[0];
308 float *nsrc = vsrc + ALNN (32, s->num_vertices * 3);
309 struct skin *skin = s->skin;
311 #ifdef TIMING
312 double S = now (), E;
313 #endif
315 #ifdef USE_ALTIVEC
316 vector unsigned char p0 =
317 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19 };
318 vector unsigned char p1 =
319 { 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23 };
320 vector unsigned char p2 =
321 { 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 };
323 for (i = 0, j = 0; i < s->num_vertices >> 2; ++i, j += 48) {
324 vector float v0, v1, v2, n0, n1, n2;
325 vector float vx, vy, vz, nx, ny, nz;
326 vector float vr0, vr1, vr2, vr3;
327 vector float nr0, nr1, nr2, nr3;
329 #ifdef G4
330 if (!(i & 3)) {
331 DCB (dcbz, vdst, j);
332 DCB (dcbz, ndst, j);
335 DCB (dcbz, vdst, j + 32);
336 DCB (dcbz, ndst, j + 32);
337 #endif
339 DCB (dcbt, skin, 0);
340 DCB (dcbt, skin + 1, 0);
341 DCB (dcbt, skin + 2, 0);
342 DCB (dcbt, skin + 3, 0);
344 DCB (dcbt, vsrc, j + 64);
345 DCB (dcbt, nsrc, j + 64);
346 DCB (dcbt, vsrc, j + 96);
347 DCB (dcbt, nsrc, j + 96);
349 /* Load */
350 v0 = vec_ld (j, vsrc);
351 v1 = vec_ld (j + 16, vsrc);
352 v2 = vec_ld (j + 32, vsrc);
353 n0 = vec_ld (j, nsrc);
354 n1 = vec_ld (j + 16, nsrc);
355 n2 = vec_ld (j + 32, nsrc);
357 /* First vertex/normal */
358 vx = vec_splat (v0, 0);
359 vy = vec_splat (v0, 1);
360 vz = vec_splat (v0, 2);
361 nx = vec_splat (n0, 0);
362 ny = vec_splat (n0, 1);
363 nz = vec_splat (n0, 2);
365 vr0 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr0);
366 skin++;
368 /* Second vertex/normal */
369 vx = vec_splat (v0, 3);
370 vy = vec_splat (v1, 0);
371 vz = vec_splat (v1, 1);
372 nx = vec_splat (n0, 3);
373 ny = vec_splat (n1, 0);
374 nz = vec_splat (n1, 1);
376 vr1 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr1);
377 skin++;
379 /* Third vertex/normal */
380 vx = vec_splat (v1, 2);
381 vy = vec_splat (v1, 3);
382 vz = vec_splat (v2, 0);
383 nx = vec_splat (n1, 2);
384 ny = vec_splat (n1, 3);
385 nz = vec_splat (n2, 0);
387 vr2 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr2);
388 skin++;
390 /* Fourth vertex/normal */
391 vx = vec_splat (v2, 1);
392 vy = vec_splat (v2, 2);
393 vz = vec_splat (v2, 3);
394 nx = vec_splat (n2, 1);
395 ny = vec_splat (n2, 2);
396 nz = vec_splat (n2, 3);
398 vr3 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr3);
399 skin++;
401 /* Assemble */
402 v0 = vec_perm (vr0, vr1, p0);
403 v1 = vec_perm (vr1, vr2, p1);
404 v2 = vec_perm (vr2, vr3, p2);
406 n0 = vec_perm (nr0, nr1, p0);
407 n1 = vec_perm (nr1, nr2, p1);
408 n2 = vec_perm (nr2, nr3, p2);
410 /* Store */
411 vec_st (v0, j, vdst);
412 vec_st (v1, j + 16, vdst);
413 vec_st (v2, j + 32, vdst);
415 vec_st (n0, j, ndst);
416 vec_st (n1, j + 16, ndst);
417 vec_st (n2, j + 32, ndst);
420 i <<= 2;
421 vsrc += i*3;
422 nsrc += i*3;
423 vdst += i*3;
424 ndst += i*3;
425 #else
426 i = 0;
427 #endif
429 for (; i < s->num_vertices; ++i, vsrc += 3, nsrc += 3, vdst += 3, ndst += 3,
430 ++skin)
432 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w;
434 for (j = 0; j < skin->num_bones; ++j) {
435 w = skin->weights[j];
436 b = &s->bones[skin->boneindices[j]];
438 mapply_to_point (v1, b->cm, vsrc);
439 v1[0] *= w;
440 v1[1] *= w;
441 v1[2] *= w;
443 mapply_to_vector (v0, b->cm, nsrc);
444 v0[0] *= w;
445 v0[1] *= w;
446 v0[2] *= w;
448 vaddto (v, v1);
449 vaddto (n, v0);
452 vcopy (vdst, v);
453 vcopy (ndst, n);
456 #ifdef TIMING
457 E = now ();
458 printf ("took %f sec\n", E - S);
459 #endif
462 CAMLprim value ml_skin_set_skel (value skel_v)
464 int i;
465 size_t size;
466 struct bone *b;
467 CAMLparam1 (skel_v);
468 CAMLlocal2 (v, floats_v);
469 State *s = &glob_state;
471 s->num_bones = Wosize_val (skel_v);
472 size = (s->num_bones + 1) * sizeof (struct bone);
473 s->bones = b = simd_alloc (16, size);
475 memset (b, 0, size);
476 b->parent = -1;
477 b->q[3] = 1.0;
478 b->mq[3] = 1.0;
479 b->aq[3] = 1.0;
480 b->amq[3] = 1.0;
481 b++;
483 for (i = 0; i < s->num_bones; ++i, ++b) {
484 v = Field (skel_v, i);
485 floats_v = Field (v, 1);
487 b->parent = Int_val (Field (v, 0)) + 1;
489 b->v[0] = Double_field (floats_v, 1);
490 b->v[1] = Double_field (floats_v, 2);
491 b->v[2] = Double_field (floats_v, 3);
493 b->q[0] = Double_field (floats_v, 5);
494 b->q[1] = Double_field (floats_v, 6);
495 b->q[2] = Double_field (floats_v, 7);
496 b->q[3] = Double_field (floats_v, 8);
499 b = s->bones + 1;
500 for (i = 0; i < s->num_bones; ++i, ++b) {
501 float v[3];
502 struct bone *parent = &s->bones[b->parent];
504 qapply (v, parent->mq, b->v);
505 qcompose (b->mq, b->q, parent->mq);
506 vadd (b->mv, v, parent->mv);
507 #ifdef USE_ALTIVEC
508 b->cm[3] = b->mv[0];
509 b->cm[7] = b->mv[1];
510 b->cm[11] = b->mv[2];
511 #endif
514 CAMLreturn (Val_unit);
517 CAMLprim value ml_skin_set_anim (value anim_v)
519 int i;
520 CAMLparam1 (anim_v);
521 CAMLlocal1 (floats_v);
522 State *s = &glob_state;
523 struct bone *b = s->bones + 1;
525 for (i = 0; i < s->num_bones; ++i, ++b) {
526 floats_v = Field (anim_v, i);
527 b->aq[0] = Double_field (floats_v, 0);
528 b->aq[1] = Double_field (floats_v, 1);
529 b->aq[2] = Double_field (floats_v, 2);
530 b->aq[3] = Double_field (floats_v, 3);
533 b = s->bones + 1;
534 for (i = 0; i < s->num_bones; ++i, ++b) {
535 float v[4], v1[4], q[4], q1[4];
536 struct bone *parent = &s->bones[b->parent];
538 qapply (v, parent->amq, b->v);
539 qcompose (b->amq, b->aq, parent->amq);
540 vadd (b->amv, v, parent->amv);
542 qconjugate (q1, b->mq);
543 qcompose (q, q1, b->amq);
545 qapply (v, q, b->mv);
546 vsub (v1, b->amv, v);
547 q2matrixt (b->cm, q, v1);
550 CAMLreturn (Val_unit);
553 CAMLprim value ml_skin_anim (value unit_v)
555 GLboolean ret;
556 CAMLparam1 (unit_v);
557 float *vdst, *ndst;
558 State *s = &glob_state;
560 if (use_vbo) {
561 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
562 vdst = ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
563 if (!vdst) caml_failwith ("glMapBuffer failed");
565 ndst += (float *) s->bufs[N_IDX] - (float *) s->bufs[V_IDX];
567 else {
568 vdst = s->bufs[V_IDX];
569 ndst = s->bufs[N_IDX];
572 translate (s, vdst, ndst);
574 if (use_vbo) {
575 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
576 if (ret == GL_FALSE) caml_failwith ("glUnmapBuffer failed");
579 CAMLreturn (Val_unit);
582 #ifndef GL_GENERATE_MIPMAP
583 #define GL_GENERATE_MIPMAP 0x8191
584 #endif
586 CAMLprim value ml_set_generate_mipmaps (value unit_v)
588 (void) unit_v;
589 glTexParameteri (GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
590 return Val_unit;