Fix bug introduced by me in r28756
[mplayer/glamo.git] / libvo / osd.c
blob6660500ecda8af45f46bd851dc47836cceb0796f
1 /*
2 * generic alpha renderers for all YUV modes and RGB depths
3 * These are "reference implementations", should be optimized later (MMX, etc).
4 * templating code by Michael Niedermayer (michaelni@gmx.at)
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 //#define FAST_OSD
24 //#define FAST_OSD_TABLE
26 #include "config.h"
27 #include "osd.h"
28 #include "mp_msg.h"
29 #include <inttypes.h>
30 #include "cpudetect.h"
32 #if ARCH_X86
33 #define CAN_COMPILE_X86_ASM
34 #endif
36 #ifdef CAN_COMPILE_X86_ASM
37 static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
38 static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
39 static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
40 #endif
42 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
43 //Plain C versions
44 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT)
45 #define COMPILE_C
46 #endif
48 #ifdef CAN_COMPILE_X86_ASM
50 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
51 #define COMPILE_MMX
52 #endif
54 #if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
55 #define COMPILE_MMX2
56 #endif
58 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
59 #define COMPILE_3DNOW
60 #endif
61 #endif //CAN_COMPILE_X86_ASM
63 #undef HAVE_MMX
64 #undef HAVE_MMX2
65 #undef HAVE_AMD3DNOW
66 #define HAVE_MMX 0
67 #define HAVE_MMX2 0
68 #define HAVE_AMD3DNOW 0
70 #ifndef CAN_COMPILE_X86_ASM
72 #ifdef COMPILE_C
73 #undef HAVE_MMX
74 #undef HAVE_MMX2
75 #undef HAVE_AMD3DNOW
76 #define HAVE_MMX 0
77 #define HAVE_MMX2 0
78 #define HAVE_AMD3DNOW 0
79 #define RENAME(a) a ## _C
80 #include "osd_template.c"
81 #endif
83 #else
85 //X86 noMMX versions
86 #ifdef COMPILE_C
87 #undef RENAME
88 #undef HAVE_MMX
89 #undef HAVE_MMX2
90 #undef HAVE_AMD3DNOW
91 #define HAVE_MMX 0
92 #define HAVE_MMX2 0
93 #define HAVE_AMD3DNOW 0
94 #define RENAME(a) a ## _X86
95 #include "osd_template.c"
96 #endif
98 //MMX versions
99 #ifdef COMPILE_MMX
100 #undef RENAME
101 #undef HAVE_MMX
102 #undef HAVE_MMX2
103 #undef HAVE_AMD3DNOW
104 #define HAVE_MMX 1
105 #define HAVE_MMX2 0
106 #define HAVE_AMD3DNOW 0
107 #define RENAME(a) a ## _MMX
108 #include "osd_template.c"
109 #endif
111 //MMX2 versions
112 #ifdef COMPILE_MMX2
113 #undef RENAME
114 #undef HAVE_MMX
115 #undef HAVE_MMX2
116 #undef HAVE_AMD3DNOW
117 #define HAVE_MMX 1
118 #define HAVE_MMX2 1
119 #define HAVE_AMD3DNOW 0
120 #define RENAME(a) a ## _MMX2
121 #include "osd_template.c"
122 #endif
124 //3DNOW versions
125 #ifdef COMPILE_3DNOW
126 #undef RENAME
127 #undef HAVE_MMX
128 #undef HAVE_MMX2
129 #undef HAVE_AMD3DNOW
130 #define HAVE_MMX 1
131 #define HAVE_MMX2 0
132 #define HAVE_AMD3DNOW 1
133 #define RENAME(a) a ## _3DNow
134 #include "osd_template.c"
135 #endif
137 #endif //CAN_COMPILE_X86_ASM
139 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
140 #ifdef RUNTIME_CPUDETECT
141 #ifdef CAN_COMPILE_X86_ASM
142 // ordered by speed / fastest first
143 if(gCpuCaps.hasMMX2)
144 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
145 else if(gCpuCaps.has3DNow)
146 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
147 else if(gCpuCaps.hasMMX)
148 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
149 else
150 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
151 #else
152 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
153 #endif
154 #else //RUNTIME_CPUDETECT
155 #if HAVE_MMX2
156 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
157 #elif HAVE_AMD3DNOW
158 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
159 #elif HAVE_MMX
160 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
161 #elif ARCH_X86
162 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
163 #else
164 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
165 #endif
166 #endif //!RUNTIME_CPUDETECT
169 void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
170 #ifdef RUNTIME_CPUDETECT
171 #ifdef CAN_COMPILE_X86_ASM
172 // ordered by speed / fastest first
173 if(gCpuCaps.hasMMX2)
174 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
175 else if(gCpuCaps.has3DNow)
176 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
177 else if(gCpuCaps.hasMMX)
178 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
179 else
180 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
181 #else
182 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
183 #endif
184 #else //RUNTIME_CPUDETECT
185 #if HAVE_MMX2
186 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
187 #elif HAVE_AMD3DNOW
188 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
189 #elif HAVE_MMX
190 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
191 #elif ARCH_X86
192 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
193 #else
194 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
195 #endif
196 #endif //!RUNTIME_CPUDETECT
199 void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
200 #ifdef RUNTIME_CPUDETECT
201 #ifdef CAN_COMPILE_X86_ASM
202 // ordered by speed / fastest first
203 if(gCpuCaps.hasMMX2)
204 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
205 else if(gCpuCaps.has3DNow)
206 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
207 else if(gCpuCaps.hasMMX)
208 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
209 else
210 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
211 #else
212 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
213 #endif
214 #else //RUNTIME_CPUDETECT
215 #if HAVE_MMX2
216 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
217 #elif HAVE_AMD3DNOW
218 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
219 #elif HAVE_MMX
220 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
221 #elif ARCH_X86
222 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
223 #else
224 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
225 #endif
226 #endif //!RUNTIME_CPUDETECT
229 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
230 #ifdef RUNTIME_CPUDETECT
231 #ifdef CAN_COMPILE_X86_ASM
232 // ordered by speed / fastest first
233 if(gCpuCaps.hasMMX2)
234 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
235 else if(gCpuCaps.has3DNow)
236 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
237 else if(gCpuCaps.hasMMX)
238 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
239 else
240 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
241 #else
242 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
243 #endif
244 #else //RUNTIME_CPUDETECT
245 #if HAVE_MMX2
246 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
247 #elif HAVE_AMD3DNOW
248 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
249 #elif HAVE_MMX
250 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
251 #elif ARCH_X86
252 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
253 #else
254 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
255 #endif
256 #endif //!RUNTIME_CPUDETECT
259 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
260 #ifdef RUNTIME_CPUDETECT
261 #ifdef CAN_COMPILE_X86_ASM
262 // ordered by speed / fastest first
263 if(gCpuCaps.hasMMX2)
264 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
265 else if(gCpuCaps.has3DNow)
266 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
267 else if(gCpuCaps.hasMMX)
268 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
269 else
270 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
271 #else
272 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
273 #endif
274 #else //RUNTIME_CPUDETECT
275 #if HAVE_MMX2
276 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
277 #elif HAVE_AMD3DNOW
278 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
279 #elif HAVE_MMX
280 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
281 #elif ARCH_X86
282 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
283 #else
284 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
285 #endif
286 #endif //!RUNTIME_CPUDETECT
289 #ifdef FAST_OSD_TABLE
290 static unsigned short fast_osd_15bpp_table[256];
291 static unsigned short fast_osd_16bpp_table[256];
292 #endif
294 void vo_draw_alpha_init(void){
295 #ifdef FAST_OSD_TABLE
296 int i;
297 for(i=0;i<256;i++){
298 fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3);
299 fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3);
301 #endif
302 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
303 if( mp_msg_test(MSGT_OSD,MSGL_V) )
305 #ifdef RUNTIME_CPUDETECT
306 #ifdef CAN_COMPILE_X86_ASM
307 // ordered per speed fasterst first
308 if(gCpuCaps.hasMMX2)
309 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
310 else if(gCpuCaps.has3DNow)
311 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
312 else if(gCpuCaps.hasMMX)
313 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
314 else
315 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
316 #else
317 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
318 #endif
319 #else //RUNTIME_CPUDETECT
320 #if HAVE_MMX2
321 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
322 #elif HAVE_AMD3DNOW
323 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
324 #elif HAVE_MMX
325 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
326 #elif ARCH_X86
327 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
328 #else
329 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
330 #endif
331 #endif //!RUNTIME_CPUDETECT
335 void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
336 int y;
337 for(y=0;y<h;y++){
338 register unsigned short *dst = (unsigned short*) dstbase;
339 register int x;
340 for(x=0;x<w;x++){
341 if(srca[x]){
342 #ifdef FAST_OSD
343 #ifdef FAST_OSD_TABLE
344 dst[x]=fast_osd_15bpp_table[src[x]];
345 #else
346 register unsigned int a=src[x]>>3;
347 dst[x]=(a<<10)|(a<<5)|a;
348 #endif
349 #else
350 unsigned char r=dst[x]&0x1F;
351 unsigned char g=(dst[x]>>5)&0x1F;
352 unsigned char b=(dst[x]>>10)&0x1F;
353 r=(((r*srca[x])>>5)+src[x])>>3;
354 g=(((g*srca[x])>>5)+src[x])>>3;
355 b=(((b*srca[x])>>5)+src[x])>>3;
356 dst[x]=(b<<10)|(g<<5)|r;
357 #endif
360 src+=srcstride;
361 srca+=srcstride;
362 dstbase+=dststride;
364 return;
367 void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
368 int y;
369 for(y=0;y<h;y++){
370 register unsigned short *dst = (unsigned short*) dstbase;
371 register int x;
372 for(x=0;x<w;x++){
373 if(srca[x]){
374 #ifdef FAST_OSD
375 #ifdef FAST_OSD_TABLE
376 dst[x]=fast_osd_16bpp_table[src[x]];
377 #else
378 dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3);
379 #endif
380 #else
381 unsigned char r=dst[x]&0x1F;
382 unsigned char g=(dst[x]>>5)&0x3F;
383 unsigned char b=(dst[x]>>11)&0x1F;
384 r=(((r*srca[x])>>5)+src[x])>>3;
385 g=(((g*srca[x])>>6)+src[x])>>2;
386 b=(((b*srca[x])>>5)+src[x])>>3;
387 dst[x]=(b<<11)|(g<<5)|r;
388 #endif
391 src+=srcstride;
392 srca+=srcstride;
393 dstbase+=dststride;
395 return;