7 #include <caml/alloc.h>
8 #include <caml/memory.h>
13 enum {V_IDX
, N_IDX
, UV_IDX
, C_IDX
, COUNT
};
16 /* Altivec code derived from: */
17 /* http://www.freevec.org/category/simd/algorithms/algebra/matrix_operations */
22 #define simd_alloc(s) memalign (16, s)
24 #define simd_alloc malloc
26 #define A16 __attribute__ ((aligned (16)))
29 #define AL16(i) (((i)+15)&~15)
35 #define simd_alloc(s) malloc (s)
76 static State glob_state
;
79 static void copy_vertices (float *p
, int num_vertices
, value a_v
)
83 for (i
= 0, k
= 0; i
< num_vertices
; ++i
, p
+= V_ELEMS
) {
84 p
[0] = Double_field (a_v
, k
++);
85 p
[1] = Double_field (a_v
, k
++);
86 p
[2] = Double_field (a_v
, k
++);
93 static void set_geom (State
*s
, void **ptrs
, value vertexa_v
, value normala_v
,
94 value uva_v
, value skin_v
, value colors_v
)
101 num_vertices
= Wosize_val (vertexa_v
) / (Double_wosize
* 3);
103 copy_vertices (ptrs
[V_IDX
], num_vertices
, vertexa_v
);
104 copy_vertices (ptrs
[N_IDX
], num_vertices
, normala_v
);
106 for (i
= 0, p
= ptrs
[UV_IDX
]; i
< num_vertices
* 2; ++i
) {
107 p
[i
] = Double_field (uva_v
, i
);
109 memcpy (ptrs
[C_IDX
], String_val (colors_v
), num_vertices
* 4);
112 for (i
= 0; i
< num_vertices
; ++i
) {
116 v
= Field (skin_v
, i
);
117 skin
[i
].num_bones
= Int_val (Field (v
, 3));
119 for (j
= 0; j
< skin
[i
].num_bones
; ++j
) {
122 val
= Double_val (Bp_val (Field (v
, j
)));
124 skin
[i
].boneindices
[j
] = (int) val
;
125 w
= val
- skin
[i
].boneindices
[j
];
127 vector
float vw
= {w
,w
,w
,w
};
129 vec_st (vw
, j
*16, skin
[i
].weights
);
131 skin
[i
].weights
[j
] = w
;
133 skin
[i
].boneindices
[j
] += 1;
138 static void skin_init (State
*s
, value vertexa_v
, value normala_v
,
139 value uva_v
, value skin_v
, value colors_v
)
142 GLsizei sizevn
, sizev
, sizeu
, sizec
;
145 s
->num_vertices
= Wosize_val (vertexa_v
) / (Double_wosize
* 3);
147 sizev
= V_ELEMS
* sizeof (GLfloat
) * s
->num_vertices
;
148 sizeu
= 2 * sizeof (GLfloat
) * s
->num_vertices
;
149 sizec
= 4 * s
->num_vertices
;
153 p
= simd_alloc (AL16 (sizevn
) + s
->num_vertices
* sizeof (struct skin
));
154 s
->skin
= (struct skin
*) (p
+ AL16 (sizevn
));
155 s
->ptrs
[0] = ptrs
[V_IDX
] = p
;
156 ptrs
[N_IDX
] = p
+ sizev
;
158 p
= stat_alloc (sizec
+ sizeu
);
159 s
->ptrs
[1] = ptrs
[UV_IDX
] = p
;
160 ptrs
[C_IDX
] = p
+ sizeu
;
162 set_geom (s
, ptrs
, vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
165 glGenBuffers (2, s
->bufid
);
167 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[0]);
168 glBufferData (GL_ARRAY_BUFFER
, sizevn
, s
->ptrs
[0], GL_DYNAMIC_DRAW
);
170 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[1]);
171 glBufferData (GL_ARRAY_BUFFER
, sizeu
+sizec
, s
->ptrs
[1], GL_STATIC_DRAW
);
173 glBindBuffer (GL_ARRAY_BUFFER
, 0);
174 stat_free (s
->ptrs
[1]);
178 s
->bufs
[N_IDX
] = p
+ sizev
;
180 s
->bufs
[C_IDX
] = p
+ sizeu
;
183 p
= simd_alloc (sizevn
);
185 s
->bufs
[N_IDX
] = p
+ sizev
;
186 s
->bufs
[UV_IDX
] = ptrs
[UV_IDX
];
187 s
->bufs
[C_IDX
] = ptrs
[C_IDX
];
189 memcpy (p
, s
->ptrs
[0], sizevn
);
193 CAMLprim value
ml_skin_draw_begin (value unit_v
)
195 State
*s
= &glob_state
;
199 glEnableClientState (GL_VERTEX_ARRAY
);
200 glEnableClientState (GL_NORMAL_ARRAY
);
201 glEnableClientState (GL_TEXTURE_COORD_ARRAY
);
202 glEnableClientState (GL_COLOR_ARRAY
);
204 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[0]);
205 glVertexPointer (3, GL_FLOAT
, V_ELEMS
* sizeof (GLfloat
), s
->bufs
[V_IDX
]);
206 glNormalPointer (GL_FLOAT
, V_ELEMS
* sizeof (GLfloat
), s
->bufs
[N_IDX
]);
208 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[1]);
209 glTexCoordPointer (2, GL_FLOAT
, 0, s
->bufs
[UV_IDX
]);
210 glColorPointer (4, GL_UNSIGNED_BYTE
, 0, s
->bufs
[C_IDX
]);
215 CAMLprim value
ml_skin_draw_end (value unit_v
)
218 glDisableClientState (GL_VERTEX_ARRAY
);
219 glDisableClientState (GL_NORMAL_ARRAY
);
220 glDisableClientState (GL_TEXTURE_COORD_ARRAY
);
221 glDisableClientState (GL_COLOR_ARRAY
);
222 if (use_vbo
) glBindBuffer (GL_ARRAY_BUFFER
, 0);
226 CAMLprim value
ml_skin_init (value use_vbo_v
, value geom_v
)
228 CAMLparam2 (use_vbo_v
, geom_v
);
229 CAMLlocal5 (vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
230 State
*s
= &glob_state
;
232 use_vbo
= Bool_val (use_vbo_v
);
238 GETPA (BufferSubData
);
243 vertexa_v
= Field (geom_v
, 0);
244 normala_v
= Field (geom_v
, 1);
245 uva_v
= Field (geom_v
, 2);
246 skin_v
= Field (geom_v
, 3);
247 colors_v
= Field (geom_v
, 4);
249 skin_init (s
, vertexa_v
, normala_v
, uva_v
, skin_v
, colors_v
);
250 CAMLreturn (Val_unit
);
255 #include <sys/time.h>
256 static double now (void)
260 if (gettimeofday (&tv
, NULL
)) err (1, "gettimeofday");
261 return tv
.tv_sec
+ tv
.tv_usec
* 1e-6;
265 static void translate (State
*s
, float *vdst
, float *ndst
)
269 float *vsrc
= s
->ptrs
[0];
270 float *nsrc
= vsrc
+ s
->num_vertices
* V_ELEMS
;
271 struct skin
*skin
= s
->skin
;
274 double S
= now (), E
;
278 for (i
= 0; i
< s
->num_vertices
; ++i
, ++skin
) {
279 vector
float v
, n
, vs
, ns
, vz
;
280 vector
float r0
, r1
, r2
, r3
, nx
, ny
, nz
;
282 v
= n
= vz
= (vector
float) vec_splat_u32 (0);
284 vs
= vec_ld (i
<<4, vsrc
);
285 ns
= vec_ld (i
<<4, nsrc
);
287 nx
= vec_splat (ns
, 0);
288 ny
= vec_splat (ns
, 1);
289 nz
= vec_splat (ns
, 2);
291 for (j
= 0; j
< skin
->num_bones
; ++j
) {
292 vector
float vw
, x
, y
, z
, t0
, t1
, t2
;
294 b
= &s
->bones
[skin
->boneindices
[j
]];
296 vw
= vec_ld (j
<<4, skin
->weights
);
298 r0
= vec_ld ( 0, b
->cm
);
299 r1
= vec_ld (16, b
->cm
);
300 r2
= vec_ld (32, b
->cm
);
301 r3
= vec_ld (48, b
->cm
);
303 x
= vec_splat (vs
, 0);
304 y
= vec_splat (vs
, 1);
305 z
= vec_splat (vs
, 2);
307 t0
= vec_madd (r0
, x
, r3
);
308 t1
= vec_madd (r1
, y
, t0
);
309 t2
= vec_madd (r2
, z
, t1
);
310 v
= vec_madd (t2
, vw
, v
);
312 t0
= vec_madd (r0
, nx
, vz
);
313 t1
= vec_madd (r1
, ny
, t0
);
314 t2
= vec_madd (r2
, nz
, t1
);
315 n
= vec_madd (t2
, vw
, n
);
317 vec_st (v
, i
<<4, vdst
);
318 vec_st (n
, i
<<4, ndst
);
321 for (i
= 0; i
< s
->num_vertices
; ++i
,
322 vsrc
+= 3, nsrc
+= 3, vdst
+= 3, ndst
+= 3, ++skin
)
324 if (skin
->num_bones
== 1) {
325 b
= &s
->bones
[skin
->boneindices
[0]];
327 mapply_to_point (vdst
, b
->cm
, vsrc
);
328 mapply_to_vector (ndst
, b
->cm
, nsrc
);
333 float v
[3] = {0,0,0}, n
[3] = {0,0,0}, v0
[4], v1
[4], w
;
335 for (j
= 0; j
< skin
->num_bones
; ++j
) {
336 w
= skin
->weights
[j
];
337 b
= &s
->bones
[skin
->boneindices
[j
]];
341 mapply_to_point (v1
, b
->cm
, vsrc
);
346 mapply_to_vector (v0
, b
->cm
, nsrc
);
356 if (z
) vcopy (vdst
, vsrc
);
357 else vcopy (vdst
, v
);
365 printf ("took %f sec\n", E
- S
);
369 CAMLprim value
ml_skin_set_skel (value skel_v
)
375 CAMLlocal2 (v
, floats_v
);
376 State
*s
= &glob_state
;
378 s
->num_bones
= Wosize_val (skel_v
);
379 size
= (s
->num_bones
+ 1) * sizeof (struct bone
);
380 s
->bones
= b
= simd_alloc (size
);
390 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
391 v
= Field (skel_v
, i
);
392 floats_v
= Field (v
, 1);
394 b
->parent
= Int_val (Field (v
, 0)) + 1;
396 b
->v
[0] = Double_field (floats_v
, 1);
397 b
->v
[1] = Double_field (floats_v
, 2);
398 b
->v
[2] = Double_field (floats_v
, 3);
400 b
->q
[0] = Double_field (floats_v
, 5);
401 b
->q
[1] = Double_field (floats_v
, 6);
402 b
->q
[2] = Double_field (floats_v
, 7);
403 b
->q
[3] = Double_field (floats_v
, 8);
407 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
409 struct bone
*parent
= &s
->bones
[b
->parent
];
411 qapply (v
, parent
->mq
, b
->v
);
412 qcompose (b
->mq
, b
->q
, parent
->mq
);
413 vadd (b
->mv
, v
, parent
->mv
);
417 b
->cm
[11] = b
->mv
[2];
421 CAMLreturn (Val_unit
);
424 CAMLprim value
ml_skin_set_anim (value anim_v
)
428 CAMLlocal1 (floats_v
);
429 State
*s
= &glob_state
;
430 struct bone
*b
= s
->bones
+ 1;
432 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
433 floats_v
= Field (anim_v
, i
);
434 b
->aq
[0] = Double_field (floats_v
, 0);
435 b
->aq
[1] = Double_field (floats_v
, 1);
436 b
->aq
[2] = Double_field (floats_v
, 2);
437 b
->aq
[3] = Double_field (floats_v
, 3);
441 for (i
= 0; i
< s
->num_bones
; ++i
, ++b
) {
442 float v
[4], v1
[4], q
[4], q1
[4];
443 struct bone
*parent
= &s
->bones
[b
->parent
];
445 qapply (v
, parent
->amq
, b
->v
);
446 qcompose (b
->amq
, b
->aq
, parent
->amq
);
447 vadd (b
->amv
, v
, parent
->amv
);
449 qconjugate (q1
, b
->mq
);
450 qcompose (q
, q1
, b
->amq
);
452 qapply (v
, q
, b
->mv
);
453 vsub (v1
, b
->amv
, v
);
454 q2matrixt (b
->cm
, q
, v1
);
457 CAMLreturn (Val_unit
);
460 CAMLprim value
ml_skin_anim (value unit_v
)
465 State
*s
= &glob_state
;
468 glBindBuffer (GL_ARRAY_BUFFER
, s
->bufid
[0]);
469 vdst
= ndst
= glMapBuffer (GL_ARRAY_BUFFER
, GL_WRITE_ONLY
);
470 if (!vdst
) caml_failwith ("glMapBuffer failed");
472 ndst
+= (float *) s
->bufs
[N_IDX
] - (float *) s
->bufs
[V_IDX
];
475 vdst
= s
->bufs
[V_IDX
];
476 ndst
= s
->bufs
[N_IDX
];
479 translate (s
, vdst
, ndst
);
482 ret
= glUnmapBuffer (GL_ARRAY_BUFFER
);
483 if (ret
== GL_FALSE
) caml_failwith ("glUnmapBuffer failed");
486 CAMLreturn (Val_unit
);
489 #ifndef GL_GENERATE_MIPMAP
490 #define GL_GENERATE_MIPMAP 0x8191
493 CAMLprim value
ml_set_generate_mipmaps (value unit_v
)
496 glTexParameteri (GL_TEXTURE_2D
, GL_GENERATE_MIPMAP
, GL_TRUE
);