7 #include <caml/alloc.h>
8 #include <caml/memory.h>
13 enum {V_IDX
, N_IDX
, UV_IDX
, C_IDX
, COUNT
};
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
22 #define simd_alloc(s) memalign (16, s)
24 #define simd_alloc malloc
26 #define A16 __attribute__ ((aligned (16)))
29 #define AL16(i) (((i)+15)&~15)
35 #define simd_alloc(s) malloc (s)
41 const int usage
[COUNT
] = {GL_DYNAMIC_DRAW
, GL_DYNAMIC_DRAW
,
42 GL_STATIC_DRAW
, GL_STATIC_DRAW
};
47 float weights
[12] A16
;
80 static State glob_state
;
83 static void copy_vertices (float *p
, int num_vertices
, value a_v
)
87 for (i
= 0, j
= 0, k
= 0; i
< num_vertices
; ++i
) {
88 p
[j
++] = Double_field (a_v
, k
++);
89 p
[j
++] = Double_field (a_v
, k
++);
90 p
[j
++] = Double_field (a_v
, k
++);
97 static void set_geom (State
*s
, value vertexa_v
, value normala_v
,
98 value uva_v
, value skin_v
, value colors_v
)
106 num_vertices
= Wosize_val (vertexa_v
) / (Double_wosize
* 3);
108 copy_vertices (s
->ptrs
[V_IDX
], num_vertices
, vertexa_v
);
109 copy_vertices (s
->ptrs
[N_IDX
], num_vertices
, normala_v
);
111 for (i
= 0, p
= s
->ptrs
[UV_IDX
]; i
< num_vertices
* 2; ++i
) {
112 p
[i
] = Double_field (uva_v
, i
);
114 memcpy (s
->ptrs
[C_IDX
], String_val (colors_v
), num_vertices
* 4);
117 for (i
= 0; i
< num_vertices
; ++i
) {
121 v
= Field (skin_v
, i
);
122 skin
[i
].num_bones
= Int_val (Field (v
, 3));
124 for (j
= 0; j
< skin
[i
].num_bones
; ++j
) {
127 val
= Double_val (Bp_val (Field (v
, j
)));
129 skin
[i
].boneindices
[j
] = (int) val
;
130 w
= val
- skin
[i
].boneindices
[j
];
132 vector
float vw
= {w
,w
,w
,w
};
134 vec_st (vw
, j
*16, skin
[i
].weights
);
136 skin
[i
].weights
[j
] = w
;
138 skin
[i
].boneindices
[j
] += 1;
143 static void skin_init (State
*s
, value vertexa_v
, value normala_v
,
144 value uva_v
, value skin_v
, value colors_v
)
149 int sizes
[COUNT
], offsets
[COUNT
];
151 s
->num_vertices
= Wosize_val (vertexa_v
) / (Double_wosize
* 3);
153 sizes
[V_IDX
] = V_ELEMS
* sizeof (GLfloat
);
154 sizes
[N_IDX
] = V_ELEMS
* sizeof (GLfloat
);
155 sizes
[UV_IDX
] = 2 * sizeof (GLfloat
);
158 for (i
= 0, size
= 0; i
< COUNT
; ++i
) {
160 sizes
[i
] *= s
->num_vertices
;
164 p
= simd_alloc (AL16 (size
) + s
->num_vertices
* sizeof (struct skin
));
165 s
->skin
= (struct skin
*) (p
+ AL16 (size
));
167 for (i
= 0; i
< COUNT
; ++i
) s
->ptrs
[i
] = p
+ offsets
[i
];
169 set_geom (s
, vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
172 glGenBuffers (COUNT
, s
->bufid
);
174 for (i
= 0; i
< COUNT
; ++i
) {
175 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[i
]);
176 glBufferData (GL_ARRAY_BUFFER
, sizes
[i
], NULL
, usage
[i
]);
177 glBufferSubData (GL_ARRAY_BUFFER
, 0, sizes
[i
], s
->ptrs
[i
]);
180 glBindBuffer (GL_ARRAY_BUFFER
, 0);
183 for (i
= 0; i
< COUNT
; ++i
) {
184 if (usage
[i
] == GL_STATIC_DRAW
) {
185 s
->bufs
[i
] = s
->ptrs
[i
];
188 s
->bufs
[i
] = simd_alloc (sizes
[i
]);
189 memcpy (s
->bufs
[i
], s
->ptrs
[i
], sizes
[i
]);
195 CAMLprim value
ml_skin_draw_begin (value unit_v
)
197 State
*s
= &glob_state
;
200 glEnableClientState (GL_VERTEX_ARRAY
);
201 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[V_IDX
]);
202 glVertexPointer (3, GL_FLOAT
, STRIDE
, s
->bufs
[V_IDX
]);
204 glEnableClientState (GL_NORMAL_ARRAY
);
205 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[N_IDX
]);
206 glNormalPointer (GL_FLOAT
, STRIDE
, s
->bufs
[N_IDX
]);
208 glEnableClientState (GL_TEXTURE_COORD_ARRAY
);
209 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[UV_IDX
]);
210 glTexCoordPointer (2, GL_FLOAT
, 0, s
->bufs
[UV_IDX
]);
212 glEnableClientState (GL_COLOR_ARRAY
);
213 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[C_IDX
]);
214 glColorPointer (4, GL_UNSIGNED_BYTE
, 0, s
->bufs
[C_IDX
]);
219 CAMLprim value
ml_skin_draw_end (value unit_v
)
222 glDisableClientState (GL_VERTEX_ARRAY
);
223 glDisableClientState (GL_NORMAL_ARRAY
);
224 glDisableClientState (GL_TEXTURE_COORD_ARRAY
);
225 glDisableClientState (GL_COLOR_ARRAY
);
226 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, 0);
230 CAMLprim value
ml_skin_init (value use_vbo_v
, value geom_v
)
232 CAMLparam2 (use_vbo_v
, geom_v
);
233 CAMLlocal5 (vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
234 State
*s
= &glob_state
;
236 use_vbo
= Bool_val (use_vbo_v
);
246 vertexa_v
= Field (geom_v
, 0);
247 normala_v
= Field (geom_v
, 1);
248 uva_v
= Field (geom_v
, 2);
249 skin_v
= Field (geom_v
, 3);
250 colors_v
= Field (geom_v
, 4);
252 skin_init (s
, vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
253 CAMLreturn (Val_unit
);
258 #include <sys/time.h>
259 static double now (void)
263 if (gettimeofday (&tv
, NULL
)) err (1, "gettimeofday");
264 return tv
.tv_sec
+ tv
.tv_usec
* 1e-6;
268 static void translate (State
*s
, float *vdst
, float *ndst
)
272 float *vsrc
= s
->ptrs
[V_IDX
];
273 float *nsrc
= s
->ptrs
[N_IDX
];
274 struct skin
*skin
= s
->skin
;
277 double S
= now (), E
;
281 for (i
= 0; i
< s
->num_vertices
; ++i
, ++skin
) {
282 vector
float v
, n
, vs
, ns
, vz
;
283 vector
float r0
, r1
, r2
, r3
, nx
, ny
, nz
;
285 v
= n
= vz
= (vector
float) vec_splat_u32 (0);
287 vs
= vec_ld (i
<<4, vsrc
);
288 ns
= vec_ld (i
<<4, nsrc
);
290 nx
= vec_splat (ns
, 0);
291 ny
= vec_splat (ns
, 1);
292 nz
= vec_splat (ns
, 2);
294 for (j
= 0; j
< skin
->num_bones
; ++j
) {
295 vector
float vw
, v1
, x
, y
, z
, t0
, t1
, t2
;
297 b
= &s
->bones
[skin
->boneindices
[j
]];
299 vw
= vec_ld (j
<<4, skin
->weights
);
301 r0
= vec_ld ( 0, b
->cm
);
302 r1
= vec_ld (16, b
->cm
);
303 r2
= vec_ld (32, b
->cm
);
304 r3
= vec_ld (48, b
->cm
);
306 v1
= vec_sub (vs
, vec_ld (0, b
->mv
));
308 r0
= vec_madd (r0
, vw
, vz
);
309 r1
= vec_madd (r1
, vw
, vz
);
310 r2
= vec_madd (r2
, vw
, vz
);
311 r3
= vec_madd (r3
, vw
, vz
);
313 x
= vec_splat (v1
, 0);
314 y
= vec_splat (v1
, 1);
315 z
= vec_splat (v1
, 2);
317 t0
= vec_madd (r0
, x
, r3
);
318 t1
= vec_madd (r1
, y
, t0
);
319 t2
= vec_madd (r2
, z
, t1
);
322 t0
= vec_madd (r0
, nx
, n
);
323 t1
= vec_madd (r1
, ny
, t0
);
324 n
= vec_madd (r2
, nz
, t1
);
326 vec_st (v
, i
<<4, vdst
);
327 vec_st (n
, i
<<4, ndst
);
330 for (i
= 0; i
< s
->num_vertices
; ++i
,
331 vsrc
+= 3, nsrc
+= 3, vdst
+= 3, ndst
+= 3, ++skin
)
333 if (skin
->num_bones
== 1) {
336 b
= &s
->bones
[skin
->boneindices
[0]];
338 vsub (v0
, vsrc
, b
->mv
);
339 mapply_to_point (vdst
, b
->cm
, v0
);
340 mapply_to_vector (ndst
, b
->cm
, nsrc
);
345 float v
[3] = {0,0,0}, n
[3] = {0,0,0}, v0
[4], v1
[4], w
, m
[12];
347 for (j
= 0; j
< skin
->num_bones
; ++j
) {
348 w
= skin
->weights
[j
];
349 b
= &s
->bones
[skin
->boneindices
[j
]];
352 vsub (v0
, vsrc
, b
->mv
);
354 mscale (m
, b
->cm
, w
);
355 mapply_to_point (v0
, m
, v0
);
356 mapply_to_vector (v1
, m
, nsrc
);
362 if (z
) vcopy (vdst
, vsrc
);
363 else vcopy (vdst
, v
);
371 printf ("took %f sec\n", E
- S
);
375 CAMLprim value
ml_skin_set_skel (value skel_v
)
381 CAMLlocal2 (v
, floats_v
);
382 State
*s
= &glob_state
;
384 s
->num_bones
= Wosize_val (skel_v
);
385 size
= (s
->num_bones
+ 1) * sizeof (struct bone
);
386 s
->bones
= b
= simd_alloc (size
);
396 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
397 v
= Field (skel_v
, i
);
398 floats_v
= Field (v
, 1);
400 b
->parent
= Int_val (Field (v
, 0)) + 1;
402 b
->v
[0] = Double_field (floats_v
, 1);
403 b
->v
[1] = Double_field (floats_v
, 2);
404 b
->v
[2] = Double_field (floats_v
, 3);
406 b
->q
[0] = Double_field (floats_v
, 5);
407 b
->q
[1] = Double_field (floats_v
, 6);
408 b
->q
[2] = Double_field (floats_v
, 7);
409 b
->q
[3] = Double_field (floats_v
, 8);
413 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
415 struct bone
*parent
= &s
->bones
[b
->parent
];
417 qapply (v
, parent
->mq
, b
->v
);
418 qcompose (b
->mq
, b
->q
, parent
->mq
);
419 vadd (b
->mv
, v
, parent
->mv
);
423 b
->cm
[11] = b
->mv
[2];
427 CAMLreturn (Val_unit
);
430 CAMLprim value
ml_skin_set_anim (value anim_v
)
434 CAMLlocal1 (floats_v
);
435 State
*s
= &glob_state
;
436 struct bone
*b
= s
->bones
+ 1;
438 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
439 floats_v
= Field (anim_v
, i
);
440 b
->aq
[0] = Double_field (floats_v
, 0);
441 b
->aq
[1] = Double_field (floats_v
, 1);
442 b
->aq
[2] = Double_field (floats_v
, 2);
443 b
->aq
[3] = Double_field (floats_v
, 3);
447 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
448 float v
[3], q
[4], q1
[4];
449 struct bone
*parent
= &s
->bones
[b
->parent
];
451 qapply (v
, parent
->amq
, b
->v
);
452 qcompose (b
->amq
, b
->aq
, parent
->amq
);
453 vadd (b
->amv
, v
, parent
->amv
);
455 qconjugate (q1
, b
->mq
);
456 qcompose (q
, q1
, b
->amq
);
457 q2matrixt (b
->cm
, q
, b
->amv
);
460 CAMLreturn (Val_unit
);
463 CAMLprim value
ml_skin_anim (value unit_v
)
467 float *vdst
, *vsrc
, *ndst
, *nsrc
;
468 State
*s
= &glob_state
;
471 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[V_IDX
]);
472 vdst
= glMapBuffer (GL_ARRAY_BUFFER
, GL_WRITE_ONLY
);
474 fprintf (stderr
, "glMapBuffer for vertices failed\n");
478 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[N_IDX
]);
479 ndst
= glMapBuffer (GL_ARRAY_BUFFER
, GL_WRITE_ONLY
);
481 fprintf (stderr
, "glMapBuffer for normals failed\n");
486 vdst
= s
->bufs
[V_IDX
];
487 ndst
= s
->bufs
[N_IDX
];
490 vsrc
= s
->ptrs
[V_IDX
];
491 nsrc
= s
->ptrs
[N_IDX
];
493 translate (s
, vdst
, ndst
);
496 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[V_IDX
]);
497 ret
= glUnmapBuffer (GL_ARRAY_BUFFER
);
498 if (ret
== GL_FALSE
) {
499 fprintf (stderr
, "glUnmapBuffer for vertices failed\n");
503 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[N_IDX
]);
504 ret
= glUnmapBuffer (GL_ARRAY_BUFFER
);
505 if (ret
== GL_FALSE
) {
506 fprintf (stderr
, "glUnmapBuffer for normals failed\n");
511 CAMLreturn (Val_unit
);
514 #ifndef GL_GENERATE_MIPMAP
515 #define GL_GENERATE_MIPMAP 0x8191
518 CAMLprim value
ml_set_generate_mipmaps (value unit_v
)
521 glTexParameteri (GL_TEXTURE_2D
, GL_GENERATE_MIPMAP
, GL_TRUE
);