Add config.h/config.mak bzlib variables missed in last commit.
[mplayer/glamo.git] / libvo / osd.c
blob80a8da7f1c06623384aca8129a2c2772f296f934
1 // Generic alpha renderers for all YUV modes and RGB depths.
2 // These are "reference implementations", should be optimized later (MMX, etc)
3 // Templating Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
5 //#define FAST_OSD
6 //#define FAST_OSD_TABLE
8 #include "config.h"
9 #include "osd.h"
10 #include "mp_msg.h"
11 #include <inttypes.h>
12 #include "cpudetect.h"
14 #if ARCH_X86
15 #define CAN_COMPILE_X86_ASM
16 #endif
18 #ifdef CAN_COMPILE_X86_ASM
19 static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
20 static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
21 static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
22 #endif
24 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
25 //Plain C versions
26 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT)
27 #define COMPILE_C
28 #endif
30 #ifdef CAN_COMPILE_X86_ASM
32 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
33 #define COMPILE_MMX
34 #endif
36 #if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
37 #define COMPILE_MMX2
38 #endif
40 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
41 #define COMPILE_3DNOW
42 #endif
43 #endif //CAN_COMPILE_X86_ASM
45 #undef HAVE_MMX
46 #undef HAVE_MMX2
47 #undef HAVE_AMD3DNOW
48 #define HAVE_MMX 0
49 #define HAVE_MMX2 0
50 #define HAVE_AMD3DNOW 0
52 #ifndef CAN_COMPILE_X86_ASM
54 #ifdef COMPILE_C
55 #undef HAVE_MMX
56 #undef HAVE_MMX2
57 #undef HAVE_AMD3DNOW
58 #define HAVE_MMX 0
59 #define HAVE_MMX2 0
60 #define HAVE_AMD3DNOW 0
61 #define RENAME(a) a ## _C
62 #include "osd_template.c"
63 #endif
65 #else
67 //X86 noMMX versions
68 #ifdef COMPILE_C
69 #undef RENAME
70 #undef HAVE_MMX
71 #undef HAVE_MMX2
72 #undef HAVE_AMD3DNOW
73 #define HAVE_MMX 0
74 #define HAVE_MMX2 0
75 #define HAVE_AMD3DNOW 0
76 #define RENAME(a) a ## _X86
77 #include "osd_template.c"
78 #endif
80 //MMX versions
81 #ifdef COMPILE_MMX
82 #undef RENAME
83 #undef HAVE_MMX
84 #undef HAVE_MMX2
85 #undef HAVE_AMD3DNOW
86 #define HAVE_MMX 1
87 #define HAVE_MMX2 0
88 #define HAVE_AMD3DNOW 0
89 #define RENAME(a) a ## _MMX
90 #include "osd_template.c"
91 #endif
93 //MMX2 versions
94 #ifdef COMPILE_MMX2
95 #undef RENAME
96 #undef HAVE_MMX
97 #undef HAVE_MMX2
98 #undef HAVE_AMD3DNOW
99 #define HAVE_MMX 1
100 #define HAVE_MMX2 1
101 #define HAVE_AMD3DNOW 0
102 #define RENAME(a) a ## _MMX2
103 #include "osd_template.c"
104 #endif
106 //3DNOW versions
107 #ifdef COMPILE_3DNOW
108 #undef RENAME
109 #undef HAVE_MMX
110 #undef HAVE_MMX2
111 #undef HAVE_AMD3DNOW
112 #define HAVE_MMX 1
113 #define HAVE_MMX2 0
114 #define HAVE_AMD3DNOW 1
115 #define RENAME(a) a ## _3DNow
116 #include "osd_template.c"
117 #endif
119 #endif //CAN_COMPILE_X86_ASM
121 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
122 #ifdef RUNTIME_CPUDETECT
123 #ifdef CAN_COMPILE_X86_ASM
124 // ordered by speed / fastest first
125 if(gCpuCaps.hasMMX2)
126 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
127 else if(gCpuCaps.has3DNow)
128 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
129 else if(gCpuCaps.hasMMX)
130 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
131 else
132 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
133 #else
134 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
135 #endif
136 #else //RUNTIME_CPUDETECT
137 #if HAVE_MMX2
138 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
139 #elif HAVE_AMD3DNOW
140 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
141 #elif HAVE_MMX
142 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
143 #elif ARCH_X86
144 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
145 #else
146 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
147 #endif
148 #endif //!RUNTIME_CPUDETECT
151 void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
152 #ifdef RUNTIME_CPUDETECT
153 #ifdef CAN_COMPILE_X86_ASM
154 // ordered by speed / fastest first
155 if(gCpuCaps.hasMMX2)
156 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
157 else if(gCpuCaps.has3DNow)
158 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
159 else if(gCpuCaps.hasMMX)
160 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
161 else
162 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
163 #else
164 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
165 #endif
166 #else //RUNTIME_CPUDETECT
167 #if HAVE_MMX2
168 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
169 #elif HAVE_AMD3DNOW
170 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
171 #elif HAVE_MMX
172 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
173 #elif ARCH_X86
174 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
175 #else
176 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
177 #endif
178 #endif //!RUNTIME_CPUDETECT
181 void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
182 #ifdef RUNTIME_CPUDETECT
183 #ifdef CAN_COMPILE_X86_ASM
184 // ordered by speed / fastest first
185 if(gCpuCaps.hasMMX2)
186 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
187 else if(gCpuCaps.has3DNow)
188 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
189 else if(gCpuCaps.hasMMX)
190 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
191 else
192 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
193 #else
194 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
195 #endif
196 #else //RUNTIME_CPUDETECT
197 #if HAVE_MMX2
198 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
199 #elif HAVE_AMD3DNOW
200 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
201 #elif HAVE_MMX
202 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
203 #elif ARCH_X86
204 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
205 #else
206 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
207 #endif
208 #endif //!RUNTIME_CPUDETECT
211 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
212 #ifdef RUNTIME_CPUDETECT
213 #ifdef CAN_COMPILE_X86_ASM
214 // ordered by speed / fastest first
215 if(gCpuCaps.hasMMX2)
216 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
217 else if(gCpuCaps.has3DNow)
218 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
219 else if(gCpuCaps.hasMMX)
220 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
221 else
222 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
223 #else
224 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
225 #endif
226 #else //RUNTIME_CPUDETECT
227 #if HAVE_MMX2
228 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
229 #elif HAVE_AMD3DNOW
230 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
231 #elif HAVE_MMX
232 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
233 #elif ARCH_X86
234 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
235 #else
236 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
237 #endif
238 #endif //!RUNTIME_CPUDETECT
241 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
242 #ifdef RUNTIME_CPUDETECT
243 #ifdef CAN_COMPILE_X86_ASM
244 // ordered by speed / fastest first
245 if(gCpuCaps.hasMMX2)
246 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
247 else if(gCpuCaps.has3DNow)
248 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
249 else if(gCpuCaps.hasMMX)
250 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
251 else
252 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
253 #else
254 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
255 #endif
256 #else //RUNTIME_CPUDETECT
257 #if HAVE_MMX2
258 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
259 #elif HAVE_AMD3DNOW
260 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
261 #elif HAVE_MMX
262 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
263 #elif ARCH_X86
264 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
265 #else
266 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
267 #endif
268 #endif //!RUNTIME_CPUDETECT
271 #ifdef FAST_OSD_TABLE
272 static unsigned short fast_osd_15bpp_table[256];
273 static unsigned short fast_osd_16bpp_table[256];
274 #endif
276 void vo_draw_alpha_init(void){
277 #ifdef FAST_OSD_TABLE
278 int i;
279 for(i=0;i<256;i++){
280 fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3);
281 fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3);
283 #endif
284 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
285 if( mp_msg_test(MSGT_OSD,MSGL_V) )
287 #ifdef RUNTIME_CPUDETECT
288 #ifdef CAN_COMPILE_X86_ASM
289 // ordered per speed fasterst first
290 if(gCpuCaps.hasMMX2)
291 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
292 else if(gCpuCaps.has3DNow)
293 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
294 else if(gCpuCaps.hasMMX)
295 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
296 else
297 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
298 #else
299 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
300 #endif
301 #else //RUNTIME_CPUDETECT
302 #if HAVE_MMX2
303 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
304 #elif HAVE_AMD3DNOW
305 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
306 #elif HAVE_MMX
307 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
308 #elif ARCH_X86
309 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
310 #else
311 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
312 #endif
313 #endif //!RUNTIME_CPUDETECT
317 void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
318 int y;
319 for(y=0;y<h;y++){
320 register unsigned short *dst = (unsigned short*) dstbase;
321 register int x;
322 for(x=0;x<w;x++){
323 if(srca[x]){
324 #ifdef FAST_OSD
325 #ifdef FAST_OSD_TABLE
326 dst[x]=fast_osd_15bpp_table[src[x]];
327 #else
328 register unsigned int a=src[x]>>3;
329 dst[x]=(a<<10)|(a<<5)|a;
330 #endif
331 #else
332 unsigned char r=dst[x]&0x1F;
333 unsigned char g=(dst[x]>>5)&0x1F;
334 unsigned char b=(dst[x]>>10)&0x1F;
335 r=(((r*srca[x])>>5)+src[x])>>3;
336 g=(((g*srca[x])>>5)+src[x])>>3;
337 b=(((b*srca[x])>>5)+src[x])>>3;
338 dst[x]=(b<<10)|(g<<5)|r;
339 #endif
342 src+=srcstride;
343 srca+=srcstride;
344 dstbase+=dststride;
346 return;
349 void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
350 int y;
351 for(y=0;y<h;y++){
352 register unsigned short *dst = (unsigned short*) dstbase;
353 register int x;
354 for(x=0;x<w;x++){
355 if(srca[x]){
356 #ifdef FAST_OSD
357 #ifdef FAST_OSD_TABLE
358 dst[x]=fast_osd_16bpp_table[src[x]];
359 #else
360 dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3);
361 #endif
362 #else
363 unsigned char r=dst[x]&0x1F;
364 unsigned char g=(dst[x]>>5)&0x3F;
365 unsigned char b=(dst[x]>>11)&0x1F;
366 r=(((r*srca[x])>>5)+src[x])>>3;
367 g=(((g*srca[x])>>6)+src[x])>>2;
368 b=(((b*srca[x])>>5)+src[x])>>3;
369 dst[x]=(b<<11)|(g<<5)|r;
370 #endif
373 src+=srcstride;
374 srca+=srcstride;
375 dstbase+=dststride;
377 return;