7 #include <caml/alloc.h>
8 #include <caml/memory.h>
13 enum {V_IDX
, N_IDX
, UV_IDX
, C_IDX
, COUNT
};
15 #define ALNN(n, i) (((i)+(n-1))&~(n-1))
24 /* Altivec code derived from: */
25 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
28 #define simd_alloc(b, s) memalign (b, s)
29 #define A16 __attribute__ ((aligned (16)))
30 #define AL16(i) ALNN (16, i)
31 #define AL32(i) ALNN (32, i)
34 #define simd_alloc(b, s) malloc (s)
76 static State glob_state
;
79 static void copy_vertices (float *p
, int num_vertices
, value a_v
)
83 for (i
= 0, k
= 0; i
< num_vertices
; ++i
, p
+= 3) {
84 p
[0] = Double_field (a_v
, k
++);
85 p
[1] = Double_field (a_v
, k
++);
86 p
[2] = Double_field (a_v
, k
++);
90 static void set_geom (State
*s
, void **ptrs
, value vertexa_v
, value normala_v
,
91 value uva_v
, value skin_v
, value colors_v
)
98 num_vertices
= Wosize_val (vertexa_v
) / (Double_wosize
* 3);
100 copy_vertices (ptrs
[V_IDX
], num_vertices
, vertexa_v
);
101 copy_vertices (ptrs
[N_IDX
], num_vertices
, normala_v
);
103 for (i
= 0, p
= ptrs
[UV_IDX
]; i
< num_vertices
* 2; ++i
) {
104 p
[i
] = Double_field (uva_v
, i
);
106 memcpy (ptrs
[C_IDX
], String_val (colors_v
), num_vertices
* 4);
109 for (i
= 0; i
< num_vertices
; ++i
) {
113 v
= Field (skin_v
, i
);
114 skin
[i
].boneinfo
= Int_val (Field (v
, 3));
116 for (j
= 0; j
< Int_val (Field (v
, 3)); ++j
) {
119 const int shifts
[] = {2,12,22};
121 val
= Double_val (Bp_val (Field (v
, j
)));
123 boneindex
= (int) val
;
124 skin
[i
].weights
[j
] = val
- boneindex
;
125 skin
[i
].boneinfo
|= (boneindex
+ 1) << shifts
[j
];
130 static void skin_init (State
*s
, value vertexa_v
, value normala_v
,
131 value uva_v
, value skin_v
, value colors_v
)
134 GLsizei sizevn
, sizev
, sizeu
, sizec
;
137 s
->num_vertices
= Wosize_val (vertexa_v
) / (Double_wosize
* 3);
139 sizev
= AL32 (3 * sizeof (GLfloat
) * s
->num_vertices
);
140 sizeu
= 2 * sizeof (GLfloat
) * s
->num_vertices
;
141 sizec
= 4 * s
->num_vertices
;
145 p
= simd_alloc (16, AL16 (sizevn
) + s
->num_vertices
* sizeof (struct skin
));
146 s
->skin
= (struct skin
*) (p
+ AL16 (sizevn
));
147 s
->ptrs
[0] = ptrs
[V_IDX
] = p
;
148 ptrs
[N_IDX
] = p
+ sizev
;
150 p
= stat_alloc (sizec
+ sizeu
);
151 s
->ptrs
[1] = ptrs
[UV_IDX
] = p
;
152 ptrs
[C_IDX
] = p
+ sizeu
;
154 set_geom (s
, ptrs
, vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
157 glGenBuffers (2, s
->bufid
);
159 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[0]);
160 glBufferData (GL_ARRAY_BUFFER
, sizevn
, s
->ptrs
[0], GL_DYNAMIC_DRAW
);
162 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[1]);
163 glBufferData (GL_ARRAY_BUFFER
, sizeu
+sizec
, s
->ptrs
[1], GL_STATIC_DRAW
);
165 glBindBuffer (GL_ARRAY_BUFFER
, 0);
166 stat_free (s
->ptrs
[1]);
170 s
->bufs
[N_IDX
] = p
+ sizev
;
172 s
->bufs
[C_IDX
] = p
+ sizeu
;
175 p
= simd_alloc (DSTAL
, sizevn
);
177 s
->bufs
[N_IDX
] = p
+ sizev
;
178 s
->bufs
[UV_IDX
] = ptrs
[UV_IDX
];
179 s
->bufs
[C_IDX
] = ptrs
[C_IDX
];
181 memcpy (p
, s
->ptrs
[0], sizevn
);
183 s
->num_vertices
/= 1;
186 CAMLprim value
ml_skin_draw_begin (value unit_v
)
188 State
*s
= &glob_state
;
192 glEnableClientState (GL_VERTEX_ARRAY
);
193 glEnableClientState (GL_NORMAL_ARRAY
);
194 glEnableClientState (GL_TEXTURE_COORD_ARRAY
);
195 glEnableClientState (GL_COLOR_ARRAY
);
197 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[0]);
198 glVertexPointer (3, GL_FLOAT
, 3 * sizeof (GLfloat
), s
->bufs
[V_IDX
]);
199 glNormalPointer (GL_FLOAT
, 3 * sizeof (GLfloat
), s
->bufs
[N_IDX
]);
201 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[1]);
202 glTexCoordPointer (2, GL_FLOAT
, 0, s
->bufs
[UV_IDX
]);
203 glColorPointer (4, GL_UNSIGNED_BYTE
, 0, s
->bufs
[C_IDX
]);
208 CAMLprim value
ml_skin_draw_end (value unit_v
)
211 glDisableClientState (GL_VERTEX_ARRAY
);
212 glDisableClientState (GL_NORMAL_ARRAY
);
213 glDisableClientState (GL_TEXTURE_COORD_ARRAY
);
214 glDisableClientState (GL_COLOR_ARRAY
);
215 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, 0);
219 CAMLprim value
ml_skin_init (value use_vbo_v
, value geom_v
)
221 CAMLparam2 (use_vbo_v
, geom_v
);
222 CAMLlocal5 (vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
223 State
*s
= &glob_state
;
225 use_vbo
= Bool_val (use_vbo_v
);
231 GETPA (BufferSubData
);
236 vertexa_v
= Field (geom_v
, 0);
237 normala_v
= Field (geom_v
, 1);
238 uva_v
= Field (geom_v
, 2);
239 skin_v
= Field (geom_v
, 3);
240 colors_v
= Field (geom_v
, 4);
242 skin_init (s
, vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
243 CAMLreturn (Val_unit
);
248 #include <sys/time.h>
249 static double now (void)
253 if (gettimeofday (&tv
, NULL
)) err (1, "gettimeofday");
254 return tv
.tv_sec
+ tv
.tv_usec
* 1e-6;
260 #define DCB(o, b, i) __asm__ __volatile__ (#o " %0, %1" ::"b"(b),"r"(i))
262 static vector
float appbones (State
*s
,
276 vector
float vz
= (vector
float) vec_splat_u32 (0);
277 vector
float v
, w
, n
;
278 vector
unsigned char S
= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4<<3};
281 w
= vec_ld (0, skin
->weights
);
283 num_bones
= skin
->boneinfo
& 3;
284 bone_index
= skin
->boneinfo
>> 2;
285 for (j
= 0; j
< num_bones
; ++j
) {
286 vector
float t0
, t1
, t2
, t3
, t4
, t5
, r0
, r1
, r2
, r3
, vw
;
288 b
= &s
->abones
[bone_index
& 0x3ff];
290 vw
= vec_splat (w
, 0);
293 r0
= vec_ld ( 0, b
->cm
);
294 r1
= vec_ld (16, b
->cm
);
295 r2
= vec_ld (32, b
->cm
);
296 r3
= vec_ld (48, b
->cm
);
298 t0
= vec_madd (r0
, x
, r3
);
299 t1
= vec_madd (r1
, y
, t0
);
300 t2
= vec_madd (r2
, z
, t1
);
301 v
= vec_madd (t2
, vw
, v
);
303 t3
= vec_madd (r0
, nx
, vz
);
304 t4
= vec_madd (r1
, ny
, t3
);
305 t5
= vec_madd (r2
, nz
, t4
);
306 n
= vec_madd (t5
, vw
, n
);
314 static void translate (State
*s
, float *vdst
, float *ndst
)
318 float *vsrc
= s
->ptrs
[0];
320 (float *) ((char *) vsrc
+ AL32 (s
->num_vertices
* 3 * sizeof (GLfloat
)));
321 struct skin
*skin
= s
->skin
;
324 double S
= now (), E
;
328 vector
unsigned char p0
=
329 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19 };
330 vector
unsigned char p1
=
331 { 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23 };
332 vector
unsigned char p2
=
333 { 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 };
335 for (i
= 0, j
= 0; i
< s
->num_vertices
>> 2; ++i
, j
+= 48) {
336 vector
float v0
, v1
, v2
, n0
, n1
, n2
;
337 vector
float vx
, vy
, vz
, nx
, ny
, nz
;
338 vector
float vr0
, vr1
, vr2
, vr3
;
339 vector
float nr0
, nr1
, nr2
, nr3
;
347 DCB (dcbz
, vdst
, j
+ 32);
348 DCB (dcbz
, ndst
, j
+ 32);
352 DCB (dcbt
, skin
+ 1, 0);
353 DCB (dcbt
, skin
+ 2, 0);
354 DCB (dcbt
, skin
+ 3, 0);
356 DCB (dcbt
, vsrc
, j
+ 64);
357 DCB (dcbt
, nsrc
, j
+ 64);
358 DCB (dcbt
, vsrc
, j
+ 96);
359 DCB (dcbt
, nsrc
, j
+ 96);
362 v0
= vec_ld (j
, vsrc
);
363 v1
= vec_ld (j
+ 16, vsrc
);
364 v2
= vec_ld (j
+ 32, vsrc
);
365 n0
= vec_ld (j
, nsrc
);
366 n1
= vec_ld (j
+ 16, nsrc
);
367 n2
= vec_ld (j
+ 32, nsrc
);
369 /* First vertex/normal */
370 vx
= vec_splat (v0
, 0);
371 vy
= vec_splat (v0
, 1);
372 vz
= vec_splat (v0
, 2);
373 nx
= vec_splat (n0
, 0);
374 ny
= vec_splat (n0
, 1);
375 nz
= vec_splat (n0
, 2);
377 vr0
= appbones (s
, skin
, vx
, vy
, vz
, nx
, ny
, nz
, &nr0
);
380 /* Second vertex/normal */
381 vx
= vec_splat (v0
, 3);
382 vy
= vec_splat (v1
, 0);
383 vz
= vec_splat (v1
, 1);
384 nx
= vec_splat (n0
, 3);
385 ny
= vec_splat (n1
, 0);
386 nz
= vec_splat (n1
, 1);
388 vr1
= appbones (s
, skin
, vx
, vy
, vz
, nx
, ny
, nz
, &nr1
);
391 /* Third vertex/normal */
392 vx
= vec_splat (v1
, 2);
393 vy
= vec_splat (v1
, 3);
394 vz
= vec_splat (v2
, 0);
395 nx
= vec_splat (n1
, 2);
396 ny
= vec_splat (n1
, 3);
397 nz
= vec_splat (n2
, 0);
399 vr2
= appbones (s
, skin
, vx
, vy
, vz
, nx
, ny
, nz
, &nr2
);
402 /* Fourth vertex/normal */
403 vx
= vec_splat (v2
, 1);
404 vy
= vec_splat (v2
, 2);
405 vz
= vec_splat (v2
, 3);
406 nx
= vec_splat (n2
, 1);
407 ny
= vec_splat (n2
, 2);
408 nz
= vec_splat (n2
, 3);
410 vr3
= appbones (s
, skin
, vx
, vy
, vz
, nx
, ny
, nz
, &nr3
);
414 v0
= vec_perm (vr0
, vr1
, p0
);
415 v1
= vec_perm (vr1
, vr2
, p1
);
416 v2
= vec_perm (vr2
, vr3
, p2
);
418 n0
= vec_perm (nr0
, nr1
, p0
);
419 n1
= vec_perm (nr1
, nr2
, p1
);
420 n2
= vec_perm (nr2
, nr3
, p2
);
423 vec_st (v0
, j
, vdst
);
424 vec_st (v1
, j
+ 16, vdst
);
425 vec_st (v2
, j
+ 32, vdst
);
427 vec_st (n0
, j
, ndst
);
428 vec_st (n1
, j
+ 16, ndst
);
429 vec_st (n2
, j
+ 32, ndst
);
441 for (; i
< s
->num_vertices
; ++i
, vsrc
+= 3, nsrc
+= 3, vdst
+= 3, ndst
+= 3,
444 int num_bones
, bone_index
;
445 float v
[3] = {0,0,0}, n
[3] = {0,0,0}, v0
[4], v1
[4], w
;
447 num_bones
= skin
->boneinfo
& 3;
448 bone_index
= skin
->boneinfo
>> 2;
449 for (j
= 0; j
< num_bones
; ++j
) {
450 w
= skin
->weights
[j
];
451 b
= &s
->abones
[bone_index
& 0x3ff];
454 mapply_to_point (v1
, b
->cm
, vsrc
);
459 mapply_to_vector (v0
, b
->cm
, nsrc
);
474 printf ("took %f sec\n", E
- S
);
478 CAMLprim value
ml_skin_set_skel (value skel_v
)
485 CAMLlocal2 (v
, floats_v
);
486 State
*s
= &glob_state
;
488 s
->num_bones
= Wosize_val (skel_v
);
489 size
= (s
->num_bones
+ 1) * sizeof (*b
);
490 s
->bones
= b
= simd_alloc (16, size
);
491 s
->abones
= ab
= simd_alloc (16, (s
->num_bones
+ 1) * sizeof (*ab
));
501 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
502 v
= Field (skel_v
, i
);
503 floats_v
= Field (v
, 1);
505 b
->parent
= Int_val (Field (v
, 0)) + 1;
507 b
->v
[0] = Double_field (floats_v
, 1);
508 b
->v
[1] = Double_field (floats_v
, 2);
509 b
->v
[2] = Double_field (floats_v
, 3);
511 b
->q
[0] = Double_field (floats_v
, 5);
512 b
->q
[1] = Double_field (floats_v
, 6);
513 b
->q
[2] = Double_field (floats_v
, 7);
514 b
->q
[3] = Double_field (floats_v
, 8);
519 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
, ++ab
) {
521 struct bone
*parent
= &s
->bones
[b
->parent
];
523 qapply (v
, parent
->mq
, b
->v
);
524 qcompose (b
->mq
, b
->q
, parent
->mq
);
525 vadd (b
->mv
, v
, parent
->mv
);
528 CAMLreturn (Val_unit
);
531 CAMLprim value
ml_skin_set_anim (value anim_v
)
535 CAMLlocal1 (floats_v
);
536 State
*s
= &glob_state
;
537 struct bone
*b
= s
->bones
+ 1;
538 struct abone
*ab
= s
->abones
+ 1;
540 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
541 floats_v
= Field (anim_v
, i
);
542 b
->aq
[0] = Double_field (floats_v
, 0);
543 b
->aq
[1] = Double_field (floats_v
, 1);
544 b
->aq
[2] = Double_field (floats_v
, 2);
545 b
->aq
[3] = Double_field (floats_v
, 3);
549 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
, ++ab
) {
550 float v
[4], v1
[4], q
[4], q1
[4];
551 struct bone
*parent
= &s
->bones
[b
->parent
];
553 qapply (v
, parent
->amq
, b
->v
);
554 qcompose (b
->amq
, b
->aq
, parent
->amq
);
555 vadd (b
->amv
, v
, parent
->amv
);
557 qconjugate (q1
, b
->mq
);
558 qcompose (q
, q1
, b
->amq
);
560 qapply (v
, q
, b
->mv
);
561 vsub (v1
, b
->amv
, v
);
562 q2matrixt (ab
->cm
, q
, v1
);
565 CAMLreturn (Val_unit
);
568 CAMLprim value
ml_skin_anim (value unit_v
)
573 State
*s
= &glob_state
;
576 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[0]);
577 vdst
= ndst
= glMapBuffer (GL_ARRAY_BUFFER
, GL_WRITE_ONLY
);
578 if (!vdst
) caml_failwith ("glMapBuffer failed");
580 ndst
+= (float *) s
->bufs
[N_IDX
] - (float *) s
->bufs
[V_IDX
];
583 vdst
= s
->bufs
[V_IDX
];
584 ndst
= s
->bufs
[N_IDX
];
587 translate (s
, vdst
, ndst
);
590 ret
= glUnmapBuffer (GL_ARRAY_BUFFER
);
591 if (ret
== GL_FALSE
) caml_failwith ("glUnmapBuffer failed");
594 CAMLreturn (Val_unit
);
597 #ifndef GL_GENERATE_MIPMAP
598 #define GL_GENERATE_MIPMAP 0x8191
601 CAMLprim value
ml_set_generate_mipmaps (value unit_v
)
604 glTexParameteri (GL_TEXTURE_2D
, GL_GENERATE_MIPMAP
, GL_TRUE
);