Deobfuscate and correct documentation
[dormin.git] / skin.c
blob1832ad0236672de1f077fdbde9a72f5203e9b9e1
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #define ALNN(n, i) (((i)+(n-1))&~(n-1))
17 #ifdef G4
18 #define DSTAL 32
19 #else
20 #define DSTAL 16
21 #endif
23 #ifdef USE_ALTIVEC
24 /* Altivec code derived from: */
25 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
26 #include <altivec.h>
27 #include <malloc.h>
28 #define simd_alloc(b, s) memalign (b, s)
29 #define A16 __attribute__ ((aligned (16)))
30 #define AL16(i) ALNN (16, i)
31 #define AL32(i) ALNN (32, i)
32 #define CM_ELEMS 16
33 #else
34 #define simd_alloc(b, s) malloc (s)
35 #define A16
36 #define AL16(i) (i)
37 #define AL32(i) (i)
38 #define CM_ELEMS 12
39 #endif
41 struct skin {
42 float weights[3];
43 int boneinfo;
44 } A16;
46 struct bone {
47 float v[4];
48 float q[4];
50 float mv[4];
51 float mq[4];
53 float aq[4];
54 float amq[4];
55 float amv[4];
57 float cm[16];
58 int parent;
61 struct abone {
62 float cm[CM_ELEMS];
63 } A16;
65 typedef struct {
66 int num_bones;
67 int num_vertices;
68 GLuint bufid[2];
69 void *ptrs[2];
70 void *bufs[COUNT];
71 struct skin *skin;
72 struct bone *bones;
73 struct abone *abones;
74 } State;
76 static State glob_state;
77 static int use_vbo;
79 static void copy_vertices (float *p, int num_vertices, value a_v)
81 int i, k;
83 for (i = 0, k = 0; i < num_vertices; ++i, p += 3) {
84 p[0] = Double_field (a_v, k++);
85 p[1] = Double_field (a_v, k++);
86 p[2] = Double_field (a_v, k++);
90 static void set_geom (State *s, void **ptrs, value vertexa_v, value normala_v,
91 value uva_v, value skin_v, value colors_v)
93 int i;
94 float *p;
95 int num_vertices;
96 struct skin *skin;
98 num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
100 copy_vertices (ptrs[V_IDX], num_vertices, vertexa_v);
101 copy_vertices (ptrs[N_IDX], num_vertices, normala_v);
103 for (i = 0, p = ptrs[UV_IDX]; i < num_vertices * 2; ++i) {
104 p[i] = Double_field (uva_v, i);
106 memcpy (ptrs[C_IDX], String_val (colors_v), num_vertices * 4);
108 skin = s->skin;
109 for (i = 0; i < num_vertices; ++i) {
110 int j;
111 value v;
113 v = Field (skin_v, i);
114 skin[i].boneinfo = Int_val (Field (v, 3));
116 for (j = 0; j < Int_val (Field (v, 3)); ++j) {
117 double val;
118 int boneindex;
119 const int shifts[] = {2,12,22};
121 val = Double_val (Bp_val (Field (v, j)));
123 boneindex = (int) val;
124 skin[i].weights[j] = val - boneindex;
125 skin[i].boneinfo |= (boneindex + 1) << shifts[j];
130 static void skin_init (State *s, value vertexa_v, value normala_v,
131 value uva_v, value skin_v, value colors_v)
133 char *p;
134 GLsizei sizevn, sizev, sizeu, sizec;
135 void *ptrs[COUNT];
137 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
139 sizev = AL32 (3 * sizeof (GLfloat) * s->num_vertices);
140 sizeu = 2 * sizeof (GLfloat) * s->num_vertices;
141 sizec = 4 * s->num_vertices;
143 sizevn = sizev * 2;
145 p = simd_alloc (16, AL16 (sizevn) + s->num_vertices * sizeof (struct skin));
146 s->skin = (struct skin *) (p + AL16 (sizevn));
147 s->ptrs[0] = ptrs[V_IDX] = p;
148 ptrs[N_IDX] = p + sizev;
150 p = stat_alloc (sizec + sizeu);
151 s->ptrs[1] = ptrs[UV_IDX] = p;
152 ptrs[C_IDX] = p + sizeu;
154 set_geom (s, ptrs, vertexa_v, normala_v, uva_v, skin_v, colors_v);
156 if (use_vbo) {
157 glGenBuffers (2, s->bufid);
159 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
160 glBufferData (GL_ARRAY_BUFFER, sizevn, s->ptrs[0], GL_DYNAMIC_DRAW);
162 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
163 glBufferData (GL_ARRAY_BUFFER, sizeu+sizec, s->ptrs[1], GL_STATIC_DRAW);
165 glBindBuffer (GL_ARRAY_BUFFER, 0);
166 stat_free (s->ptrs[1]);
168 p = NULL;
169 s->bufs[V_IDX] = p;
170 s->bufs[N_IDX] = p + sizev;
171 s->bufs[UV_IDX] = p;
172 s->bufs[C_IDX] = p + sizeu;
174 else {
175 p = simd_alloc (DSTAL, sizevn);
176 s->bufs[V_IDX] = p;
177 s->bufs[N_IDX] = p + sizev;
178 s->bufs[UV_IDX] = ptrs[UV_IDX];
179 s->bufs[C_IDX] = ptrs[C_IDX];
181 memcpy (p, s->ptrs[0], sizevn);
183 s->num_vertices /= 1;
186 CAMLprim value ml_skin_draw_begin (value unit_v)
188 State *s = &glob_state;
190 (void) unit_v;
192 glEnableClientState (GL_VERTEX_ARRAY);
193 glEnableClientState (GL_NORMAL_ARRAY);
194 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
195 glEnableClientState (GL_COLOR_ARRAY);
197 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
198 glVertexPointer (3, GL_FLOAT, 3 * sizeof (GLfloat), s->bufs[V_IDX]);
199 glNormalPointer (GL_FLOAT, 3 * sizeof (GLfloat), s->bufs[N_IDX]);
201 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[1]);
202 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
203 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
205 return Val_unit;
208 CAMLprim value ml_skin_draw_end (value unit_v)
210 (void) unit_v;
211 glDisableClientState (GL_VERTEX_ARRAY);
212 glDisableClientState (GL_NORMAL_ARRAY);
213 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
214 glDisableClientState (GL_COLOR_ARRAY);
215 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
216 return Val_unit;
219 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
221 CAMLparam2 (use_vbo_v, geom_v);
222 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
223 State *s = &glob_state;
225 use_vbo = Bool_val (use_vbo_v);
226 #ifdef _WIN32
227 if (use_vbo) {
228 GETPA (BindBuffer);
229 GETPA (GenBuffers);
230 GETPA (BufferData);
231 GETPA (BufferSubData);
232 GETPA (MapBuffer);
233 GETPA (UnmapBuffer);
235 #endif
236 vertexa_v = Field (geom_v, 0);
237 normala_v = Field (geom_v, 1);
238 uva_v = Field (geom_v, 2);
239 skin_v = Field (geom_v, 3);
240 colors_v = Field (geom_v, 4);
242 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
243 CAMLreturn (Val_unit);
246 #ifdef TIMING
247 #include <err.h>
248 #include <sys/time.h>
249 static double now (void)
251 struct timeval tv;
253 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
254 return tv.tv_sec + tv.tv_usec * 1e-6;
256 #endif
258 #ifdef USE_ALTIVEC
260 #define DCB(o, b, i) __asm__ __volatile__ (#o " %0, %1" ::"b"(b),"r"(i))
262 static vector float appbones (State *s,
263 struct skin *skin,
264 vector float x,
265 vector float y,
266 vector float z,
267 vector float nx,
268 vector float ny,
269 vector float nz,
270 vector float *np)
272 int j;
273 int num_bones;
274 int bone_index;
275 struct abone *b;
276 vector float vz = (vector float) vec_splat_u32 (0);
277 vector float v, w, n;
278 vector unsigned char S = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4<<3};
280 v = n = vz;
281 w = vec_ld (0, skin->weights);
283 num_bones = skin->boneinfo & 3;
284 bone_index = skin->boneinfo >> 2;
285 for (j = 0; j < num_bones; ++j) {
286 vector float t0, t1, t2, t3, t4, t5, r0, r1, r2, r3, vw;
288 b = &s->abones[bone_index & 0x3ff];
289 bone_index >>= 10;
290 vw = vec_splat (w, 0);
291 w = vec_slo (w, S);
293 r0 = vec_ld ( 0, b->cm);
294 r1 = vec_ld (16, b->cm);
295 r2 = vec_ld (32, b->cm);
296 r3 = vec_ld (48, b->cm);
298 t0 = vec_madd (r0, x, r3);
299 t1 = vec_madd (r1, y, t0);
300 t2 = vec_madd (r2, z, t1);
301 v = vec_madd (t2, vw, v);
303 t3 = vec_madd (r0, nx, vz);
304 t4 = vec_madd (r1, ny, t3);
305 t5 = vec_madd (r2, nz, t4);
306 n = vec_madd (t5, vw, n);
309 *np = n;
310 return v;
312 #endif
314 static void translate (State *s, float *vdst, float *ndst)
316 int i, j;
317 struct abone *b;
318 float *vsrc = s->ptrs[0];
319 float *nsrc =
320 (float *) ((char *) vsrc + AL32 (s->num_vertices * 3 * sizeof (GLfloat)));
321 struct skin *skin = s->skin;
323 #ifdef TIMING
324 double S = now (), E;
325 #endif
327 #ifdef USE_ALTIVEC
328 vector unsigned char p0 =
329 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19 };
330 vector unsigned char p1 =
331 { 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23 };
332 vector unsigned char p2 =
333 { 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 };
335 for (i = 0, j = 0; i < s->num_vertices >> 2; ++i, j += 48) {
336 vector float v0, v1, v2, n0, n1, n2;
337 vector float vx, vy, vz, nx, ny, nz;
338 vector float vr0, vr1, vr2, vr3;
339 vector float nr0, nr1, nr2, nr3;
341 #ifdef G4
342 if (!(i & 3)) {
343 DCB (dcbz, vdst, j);
344 DCB (dcbz, ndst, j);
347 DCB (dcbz, vdst, j + 32);
348 DCB (dcbz, ndst, j + 32);
349 #endif
351 DCB (dcbt, skin, 0);
352 DCB (dcbt, skin + 1, 0);
353 DCB (dcbt, skin + 2, 0);
354 DCB (dcbt, skin + 3, 0);
356 DCB (dcbt, vsrc, j + 64);
357 DCB (dcbt, nsrc, j + 64);
358 DCB (dcbt, vsrc, j + 96);
359 DCB (dcbt, nsrc, j + 96);
361 /* Load */
362 v0 = vec_ld (j, vsrc);
363 v1 = vec_ld (j + 16, vsrc);
364 v2 = vec_ld (j + 32, vsrc);
365 n0 = vec_ld (j, nsrc);
366 n1 = vec_ld (j + 16, nsrc);
367 n2 = vec_ld (j + 32, nsrc);
369 /* First vertex/normal */
370 vx = vec_splat (v0, 0);
371 vy = vec_splat (v0, 1);
372 vz = vec_splat (v0, 2);
373 nx = vec_splat (n0, 0);
374 ny = vec_splat (n0, 1);
375 nz = vec_splat (n0, 2);
377 vr0 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr0);
378 skin++;
380 /* Second vertex/normal */
381 vx = vec_splat (v0, 3);
382 vy = vec_splat (v1, 0);
383 vz = vec_splat (v1, 1);
384 nx = vec_splat (n0, 3);
385 ny = vec_splat (n1, 0);
386 nz = vec_splat (n1, 1);
388 vr1 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr1);
389 skin++;
391 /* Third vertex/normal */
392 vx = vec_splat (v1, 2);
393 vy = vec_splat (v1, 3);
394 vz = vec_splat (v2, 0);
395 nx = vec_splat (n1, 2);
396 ny = vec_splat (n1, 3);
397 nz = vec_splat (n2, 0);
399 vr2 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr2);
400 skin++;
402 /* Fourth vertex/normal */
403 vx = vec_splat (v2, 1);
404 vy = vec_splat (v2, 2);
405 vz = vec_splat (v2, 3);
406 nx = vec_splat (n2, 1);
407 ny = vec_splat (n2, 2);
408 nz = vec_splat (n2, 3);
410 vr3 = appbones (s, skin, vx, vy, vz, nx, ny, nz, &nr3);
411 skin++;
413 /* Assemble */
414 v0 = vec_perm (vr0, vr1, p0);
415 v1 = vec_perm (vr1, vr2, p1);
416 v2 = vec_perm (vr2, vr3, p2);
418 n0 = vec_perm (nr0, nr1, p0);
419 n1 = vec_perm (nr1, nr2, p1);
420 n2 = vec_perm (nr2, nr3, p2);
422 /* Store */
423 vec_st (v0, j, vdst);
424 vec_st (v1, j + 16, vdst);
425 vec_st (v2, j + 32, vdst);
427 vec_st (n0, j, ndst);
428 vec_st (n1, j + 16, ndst);
429 vec_st (n2, j + 32, ndst);
432 i <<= 2;
433 vsrc += i*3;
434 nsrc += i*3;
435 vdst += i*3;
436 ndst += i*3;
437 #else
438 i = 0;
439 #endif
441 for (; i < s->num_vertices; ++i, vsrc += 3, nsrc += 3, vdst += 3, ndst += 3,
442 ++skin)
444 int num_bones, bone_index;
445 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w;
447 num_bones = skin->boneinfo & 3;
448 bone_index = skin->boneinfo >> 2;
449 for (j = 0; j < num_bones; ++j) {
450 w = skin->weights[j];
451 b = &s->abones[bone_index & 0x3ff];
452 bone_index >>= 10;
454 mapply_to_point (v1, b->cm, vsrc);
455 v1[0] *= w;
456 v1[1] *= w;
457 v1[2] *= w;
459 mapply_to_vector (v0, b->cm, nsrc);
460 v0[0] *= w;
461 v0[1] *= w;
462 v0[2] *= w;
464 vaddto (v, v1);
465 vaddto (n, v0);
468 vcopy (vdst, v);
469 vcopy (ndst, n);
472 #ifdef TIMING
473 E = now ();
474 printf ("took %f sec\n", E - S);
475 #endif
478 CAMLprim value ml_skin_set_skel (value skel_v)
480 int i;
481 size_t size;
482 struct bone *b;
483 struct abone *ab;
484 CAMLparam1 (skel_v);
485 CAMLlocal2 (v, floats_v);
486 State *s = &glob_state;
488 s->num_bones = Wosize_val (skel_v);
489 size = (s->num_bones + 1) * sizeof (*b);
490 s->bones = b = simd_alloc (16, size);
491 s->abones = ab = simd_alloc (16, (s->num_bones + 1) * sizeof (*ab));
493 memset (b, 0, size);
494 b->parent = -1;
495 b->q[3] = 1.0;
496 b->mq[3] = 1.0;
497 b->aq[3] = 1.0;
498 b->amq[3] = 1.0;
499 b++;
501 for (i = 0; i < s->num_bones; ++i, ++b) {
502 v = Field (skel_v, i);
503 floats_v = Field (v, 1);
505 b->parent = Int_val (Field (v, 0)) + 1;
507 b->v[0] = Double_field (floats_v, 1);
508 b->v[1] = Double_field (floats_v, 2);
509 b->v[2] = Double_field (floats_v, 3);
511 b->q[0] = Double_field (floats_v, 5);
512 b->q[1] = Double_field (floats_v, 6);
513 b->q[2] = Double_field (floats_v, 7);
514 b->q[3] = Double_field (floats_v, 8);
517 b = s->bones + 1;
518 ab = s->abones + 1;
519 for (i = 0; i < s->num_bones; ++i, ++b, ++ab) {
520 float v[3];
521 struct bone *parent = &s->bones[b->parent];
523 qapply (v, parent->mq, b->v);
524 qcompose (b->mq, b->q, parent->mq);
525 vadd (b->mv, v, parent->mv);
528 CAMLreturn (Val_unit);
531 CAMLprim value ml_skin_set_anim (value anim_v)
533 int i;
534 CAMLparam1 (anim_v);
535 CAMLlocal1 (floats_v);
536 State *s = &glob_state;
537 struct bone *b = s->bones + 1;
538 struct abone *ab = s->abones + 1;
540 for (i = 0; i < s->num_bones; ++i, ++b) {
541 floats_v = Field (anim_v, i);
542 b->aq[0] = Double_field (floats_v, 0);
543 b->aq[1] = Double_field (floats_v, 1);
544 b->aq[2] = Double_field (floats_v, 2);
545 b->aq[3] = Double_field (floats_v, 3);
548 b = s->bones + 1;
549 for (i = 0; i < s->num_bones; ++i, ++b, ++ab) {
550 float v[4], v1[4], q[4], q1[4];
551 struct bone *parent = &s->bones[b->parent];
553 qapply (v, parent->amq, b->v);
554 qcompose (b->amq, b->aq, parent->amq);
555 vadd (b->amv, v, parent->amv);
557 qconjugate (q1, b->mq);
558 qcompose (q, q1, b->amq);
560 qapply (v, q, b->mv);
561 vsub (v1, b->amv, v);
562 q2matrixt (ab->cm, q, v1);
565 CAMLreturn (Val_unit);
568 CAMLprim value ml_skin_anim (value unit_v)
570 GLboolean ret;
571 CAMLparam1 (unit_v);
572 float *vdst, *ndst;
573 State *s = &glob_state;
575 if (use_vbo) {
576 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[0]);
577 vdst = ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
578 if (!vdst) caml_failwith ("glMapBuffer failed");
580 ndst += (float *) s->bufs[N_IDX] - (float *) s->bufs[V_IDX];
582 else {
583 vdst = s->bufs[V_IDX];
584 ndst = s->bufs[N_IDX];
587 translate (s, vdst, ndst);
589 if (use_vbo) {
590 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
591 if (ret == GL_FALSE) caml_failwith ("glUnmapBuffer failed");
594 CAMLreturn (Val_unit);
597 #ifndef GL_GENERATE_MIPMAP
598 #define GL_GENERATE_MIPMAP 0x8191
599 #endif
601 CAMLprim value ml_set_generate_mipmaps (value unit_v)
603 (void) unit_v;
604 glTexParameteri (GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
605 return Val_unit;