Remove vperm hackery
[dormin.git] / skin.c
blob935a190586559f0691f32b06f05e751985bfe1de
1 #include <math.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include <caml/fail.h>
7 #include <caml/alloc.h>
8 #include <caml/memory.h>
10 #include "vec.c"
11 #include "pgl.h"
13 enum {V_IDX, N_IDX, UV_IDX, C_IDX, COUNT};
15 #ifdef USE_ALTIVEC
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
19 #include <altivec.h>
20 #ifndef __APPLE__
21 #include <malloc.h>
22 #define simd_alloc(s) memalign (16, s)
23 #else
24 #define simd_alloc malloc
25 #endif
26 #define A16 __attribute__ ((aligned (16)))
27 #define STRIDE 16
28 #define V_ELEMS 4
29 #define AL16(i) (((i)+15)&~15)
31 #else
33 #define STRIDE 0
34 #define V_ELEMS 3
35 #define simd_alloc(s) malloc (s)
36 #define A16
37 #define AL16(i) (i)
39 #endif
41 const int usage[COUNT] = {GL_DYNAMIC_DRAW, GL_DYNAMIC_DRAW,
42 GL_STATIC_DRAW, GL_STATIC_DRAW};
44 struct skin {
45 int boneindices[3];
46 #ifdef USE_ALTIVEC
47 float weights[12] A16;
48 #else
49 float weights[3];
50 #endif
51 int num_bones;
52 } A16;
54 struct bone {
55 int parent;
57 float v[4] A16;
58 float q[4] A16;
60 float mv[4] A16;
61 float mq[4];
63 float aq[4];
64 float amq[4];
65 float amv[4];
67 float cm[16] A16;
68 } A16;
70 typedef struct {
71 int num_bones;
72 int num_vertices;
73 GLuint bufid[COUNT];
74 void *ptrs[COUNT];
75 void *bufs[COUNT];
76 struct skin *skin;
77 struct bone *bones;
78 } State;
80 static State glob_state;
81 static int use_vbo;
83 static void copy_vertices (float *p, int num_vertices, value a_v)
85 int i, j, k;
87 for (i = 0, j = 0, k = 0; i < num_vertices; ++i) {
88 p[j++] = Double_field (a_v, k++);
89 p[j++] = Double_field (a_v, k++);
90 p[j++] = Double_field (a_v, k++);
91 #ifdef USE_ALTIVEC
92 p[j++] = 1.0;
93 #endif
97 static void set_geom (State *s, value vertexa_v, value normala_v,
98 value uva_v, value skin_v, value colors_v)
100 int i;
101 float *p;
102 int offset;
103 int num_vertices;
104 struct skin *skin;
106 num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
108 copy_vertices (s->ptrs[V_IDX], num_vertices, vertexa_v);
109 copy_vertices (s->ptrs[N_IDX], num_vertices, normala_v);
111 for (i = 0, p = s->ptrs[UV_IDX]; i < num_vertices * 2; ++i) {
112 p[i] = Double_field (uva_v, i);
114 memcpy (s->ptrs[C_IDX], String_val (colors_v), num_vertices * 4);
116 skin = s->skin;
117 for (i = 0; i < num_vertices; ++i) {
118 int j;
119 value v;
121 v = Field (skin_v, i);
122 skin[i].num_bones = Int_val (Field (v, 3));
124 for (j = 0; j < skin[i].num_bones; ++j) {
125 double val, w, r;
127 val = Double_val (Bp_val (Field (v, j)));
129 skin[i].boneindices[j] = (int) val;
130 w = val - skin[i].boneindices[j];
131 #ifdef USE_ALTIVEC
132 vector float vw = {w,w,w,w};
134 vec_st (vw, j*16, skin[i].weights);
135 #else
136 skin[i].weights[j] = w;
137 #endif
138 skin[i].boneindices[j] += 1;
143 static void skin_init (State *s, value vertexa_v, value normala_v,
144 value uva_v, value skin_v, value colors_v)
146 int i;
147 char *p;
148 GLsizei size;
149 int sizes[COUNT], offsets[COUNT];
151 s->num_vertices = Wosize_val (vertexa_v) / (Double_wosize * 3);
153 sizes[V_IDX] = V_ELEMS * sizeof (GLfloat);
154 sizes[N_IDX] = V_ELEMS * sizeof (GLfloat);
155 sizes[UV_IDX] = 2 * sizeof (GLfloat);
156 sizes[C_IDX] = 4;
158 for (i = 0, size = 0; i < COUNT; ++i) {
159 offsets[i] = size;
160 sizes[i] *= s->num_vertices;
161 size += sizes[i];
164 p = simd_alloc (AL16 (size) + s->num_vertices * sizeof (struct skin));
165 s->skin = (struct skin *) (p + AL16 (size));
167 for (i = 0; i < COUNT; ++i) s->ptrs[i] = p + offsets[i];
169 set_geom (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
171 if (use_vbo) {
172 glGenBuffers (COUNT, s->bufid);
174 for (i = 0; i < COUNT; ++i) {
175 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[i]);
176 glBufferData (GL_ARRAY_BUFFER, sizes[i], NULL, usage[i]);
177 glBufferSubData (GL_ARRAY_BUFFER, 0, sizes[i], s->ptrs[i]);
178 s->bufs[i] = NULL;
180 glBindBuffer (GL_ARRAY_BUFFER, 0);
182 else {
183 for (i = 0; i < COUNT; ++i) {
184 if (usage[i] == GL_STATIC_DRAW) {
185 s->bufs[i] = s->ptrs[i];
187 else {
188 s->bufs[i] = simd_alloc (sizes[i]);
189 memcpy (s->bufs[i], s->ptrs[i], sizes[i]);
195 CAMLprim value ml_skin_draw_begin (value unit_v)
197 State *s = &glob_state;
199 (void) unit_v;
200 glEnableClientState (GL_VERTEX_ARRAY);
201 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
202 glVertexPointer (3, GL_FLOAT, STRIDE, s->bufs[V_IDX]);
204 glEnableClientState (GL_NORMAL_ARRAY);
205 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
206 glNormalPointer (GL_FLOAT, STRIDE, s->bufs[N_IDX]);
208 glEnableClientState (GL_TEXTURE_COORD_ARRAY);
209 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[UV_IDX]);
210 glTexCoordPointer (2, GL_FLOAT, 0, s->bufs[UV_IDX]);
212 glEnableClientState (GL_COLOR_ARRAY);
213 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, s->bufid[C_IDX]);
214 glColorPointer (4, GL_UNSIGNED_BYTE, 0, s->bufs[C_IDX]);
216 return Val_unit;
219 CAMLprim value ml_skin_draw_end (value unit_v)
221 (void) unit_v;
222 glDisableClientState (GL_VERTEX_ARRAY);
223 glDisableClientState (GL_NORMAL_ARRAY);
224 glDisableClientState (GL_TEXTURE_COORD_ARRAY);
225 glDisableClientState (GL_COLOR_ARRAY);
226 if (use_vbo) glBindBuffer (GL_ARRAY_BUFFER, 0);
227 return Val_unit;
230 CAMLprim value ml_skin_init (value use_vbo_v, value geom_v)
232 CAMLparam2 (use_vbo_v, geom_v);
233 CAMLlocal5 (vertexa_v, normala_v, uva_v, skin_v, colors_v);
234 State *s = &glob_state;
236 use_vbo = Bool_val (use_vbo_v);
237 #ifdef _WIN32
238 if (use_vbo) {
239 GETPA (BindBuffer);
240 GETPA (GenBuffers);
241 GETPA (BufferData);
242 GETPA (MapBuffer);
243 GETPA (UnmapBuffer);
245 #endif
246 vertexa_v = Field (geom_v, 0);
247 normala_v = Field (geom_v, 1);
248 uva_v = Field (geom_v, 2);
249 skin_v = Field (geom_v, 3);
250 colors_v = Field (geom_v, 4);
252 skin_init (s, vertexa_v, normala_v, uva_v, skin_v, colors_v);
253 CAMLreturn (Val_unit);
256 #ifdef TIMING
257 #include <err.h>
258 #include <sys/time.h>
259 static double now (void)
261 struct timeval tv;
263 if (gettimeofday (&tv, NULL)) err (1, "gettimeofday");
264 return tv.tv_sec + tv.tv_usec * 1e-6;
266 #endif
268 static void translate (State *s, float *vdst, float *ndst)
270 int i, j;
271 struct bone *b;
272 float *vsrc = s->ptrs[V_IDX];
273 float *nsrc = s->ptrs[N_IDX];
274 struct skin *skin = s->skin;
276 #ifdef TIMING
277 double S = now (), E;
278 #endif
280 #ifdef USE_ALTIVEC
281 for (i = 0; i < s->num_vertices; ++i, ++skin) {
282 vector float v, n, vs, ns, vz;
283 vector float r0, r1, r2, r3, nx, ny, nz;
285 v = n = vz = (vector float) vec_splat_u32 (0);
287 vs = vec_ld (i<<4, vsrc);
288 ns = vec_ld (i<<4, nsrc);
290 nx = vec_splat (ns, 0);
291 ny = vec_splat (ns, 1);
292 nz = vec_splat (ns, 2);
294 for (j = 0; j < skin->num_bones; ++j) {
295 vector float vw, v1, x, y, z, t0, t1, t2;
297 b = &s->bones[skin->boneindices[j]];
299 vw = vec_ld (j<<4, skin->weights);
301 r0 = vec_ld ( 0, b->cm);
302 r1 = vec_ld (16, b->cm);
303 r2 = vec_ld (32, b->cm);
304 r3 = vec_ld (48, b->cm);
306 v1 = vec_sub (vs, vec_ld (0, b->mv));
308 r0 = vec_madd (r0, vw, vz);
309 r1 = vec_madd (r1, vw, vz);
310 r2 = vec_madd (r2, vw, vz);
311 r3 = vec_madd (r3, vw, vz);
313 x = vec_splat (v1, 0);
314 y = vec_splat (v1, 1);
315 z = vec_splat (v1, 2);
317 t0 = vec_madd (r0, x, r3);
318 t1 = vec_madd (r1, y, t0);
319 t2 = vec_madd (r2, z, t1);
320 v = vec_add (v, t2);
322 t0 = vec_madd (r0, nx, n);
323 t1 = vec_madd (r1, ny, t0);
324 n = vec_madd (r2, nz, t1);
326 vec_st (v, i<<4, vdst);
327 vec_st (n, i<<4, ndst);
329 #else
330 for (i = 0; i < s->num_vertices; ++i,
331 vsrc += 3, nsrc += 3, vdst += 3, ndst += 3, ++skin)
333 if (skin->num_bones == 1) {
334 float v0[4];
336 b = &s->bones[skin->boneindices[0]];
338 vsub (v0, vsrc, b->mv);
339 mapply_to_point (vdst, b->cm, v0);
340 mapply_to_vector (ndst, b->cm, nsrc);
342 else
344 int z = 0;
345 float v[3] = {0,0,0}, n[3] = {0,0,0}, v0[4], v1[4], w, m[12];
347 for (j = 0; j < skin->num_bones; ++j) {
348 w = skin->weights[j];
349 b = &s->bones[skin->boneindices[j]];
351 if (w < 0.0) z = 1;
352 vsub (v0, vsrc, b->mv);
354 mscale (m, b->cm, w);
355 mapply_to_point (v0, m, v0);
356 mapply_to_vector (v1, m, nsrc);
357 vaddto (v, v0);
358 vaddto (n, v1);
361 /* hack hack */
362 if (z) vcopy (vdst, vsrc);
363 else vcopy (vdst, v);
364 vcopy (ndst, n);
367 #endif
369 #ifdef TIMING
370 E = now ();
371 printf ("took %f sec\n", E - S);
372 #endif
375 CAMLprim value ml_skin_set_skel (value skel_v)
377 int i;
378 size_t size;
379 struct bone *b;
380 CAMLparam1 (skel_v);
381 CAMLlocal2 (v, floats_v);
382 State *s = &glob_state;
384 s->num_bones = Wosize_val (skel_v);
385 size = (s->num_bones + 1) * sizeof (struct bone);
386 s->bones = b = simd_alloc (size);
388 memset (b, 0, size);
389 b->parent = -1;
390 b->q[3] = 1.0;
391 b->mq[3] = 1.0;
392 b->aq[3] = 1.0;
393 b->amq[3] = 1.0;
394 b++;
396 for (i = 0; i < s->num_bones; ++i, ++b) {
397 v = Field (skel_v, i);
398 floats_v = Field (v, 1);
400 b->parent = Int_val (Field (v, 0)) + 1;
402 b->v[0] = Double_field (floats_v, 1);
403 b->v[1] = Double_field (floats_v, 2);
404 b->v[2] = Double_field (floats_v, 3);
406 b->q[0] = Double_field (floats_v, 5);
407 b->q[1] = Double_field (floats_v, 6);
408 b->q[2] = Double_field (floats_v, 7);
409 b->q[3] = Double_field (floats_v, 8);
412 b = s->bones + 1;
413 for (i = 0; i < s->num_bones; ++i, ++b) {
414 float v[3];
415 struct bone *parent = &s->bones[b->parent];
417 qapply (v, parent->mq, b->v);
418 qcompose (b->mq, b->q, parent->mq);
419 vadd (b->mv, v, parent->mv);
420 #ifdef USE_ALTIVEC
421 b->cm[3] = b->mv[0];
422 b->cm[7] = b->mv[1];
423 b->cm[11] = b->mv[2];
424 #endif
427 CAMLreturn (Val_unit);
430 CAMLprim value ml_skin_set_anim (value anim_v)
432 int i;
433 CAMLparam1 (anim_v);
434 CAMLlocal1 (floats_v);
435 State *s = &glob_state;
436 struct bone *b = s->bones + 1;
438 for (i = 0; i < s->num_bones; ++i, ++b) {
439 floats_v = Field (anim_v, i);
440 b->aq[0] = Double_field (floats_v, 0);
441 b->aq[1] = Double_field (floats_v, 1);
442 b->aq[2] = Double_field (floats_v, 2);
443 b->aq[3] = Double_field (floats_v, 3);
446 b = s->bones + 1;
447 for (i = 0; i < s->num_bones; ++i, ++b) {
448 float v[3], q[4], q1[4];
449 struct bone *parent = &s->bones[b->parent];
451 qapply (v, parent->amq, b->v);
452 qcompose (b->amq, b->aq, parent->amq);
453 vadd (b->amv, v, parent->amv);
455 qconjugate (q1, b->mq);
456 qcompose (q, q1, b->amq);
457 q2matrixt (b->cm, q, b->amv);
460 CAMLreturn (Val_unit);
463 CAMLprim value ml_skin_anim (value unit_v)
465 GLboolean ret;
466 CAMLparam1 (unit_v);
467 float *vdst, *vsrc, *ndst, *nsrc;
468 State *s = &glob_state;
470 if (use_vbo) {
471 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
472 vdst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
473 if (!vdst) {
474 fprintf (stderr, "glMapBuffer for vertices failed\n");
475 exit (EXIT_FAILURE);
478 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
479 ndst = glMapBuffer (GL_ARRAY_BUFFER, GL_WRITE_ONLY);
480 if (!ndst) {
481 fprintf (stderr, "glMapBuffer for normals failed\n");
482 exit (EXIT_FAILURE);
485 else {
486 vdst = s->bufs[V_IDX];
487 ndst = s->bufs[N_IDX];
490 vsrc = s->ptrs[V_IDX];
491 nsrc = s->ptrs[N_IDX];
493 translate (s, vdst, ndst);
495 if (use_vbo) {
496 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[V_IDX]);
497 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
498 if (ret == GL_FALSE) {
499 fprintf (stderr, "glUnmapBuffer for vertices failed\n");
500 exit (EXIT_FAILURE);
503 glBindBuffer (GL_ARRAY_BUFFER, s->bufid[N_IDX]);
504 ret = glUnmapBuffer (GL_ARRAY_BUFFER);
505 if (ret == GL_FALSE) {
506 fprintf (stderr, "glUnmapBuffer for normals failed\n");
507 exit (EXIT_FAILURE);
511 CAMLreturn (Val_unit);
514 #ifndef GL_GENERATE_MIPMAP
515 #define GL_GENERATE_MIPMAP 0x8191
516 #endif
518 CAMLprim value ml_set_generate_mipmaps (value unit_v)
520 (void) unit_v;
521 glTexParameteri (GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
522 return Val_unit;