Get rid of pointless preprocessor condition indirection and use ARCH_X86
[mplayer/glamo.git] / libvo / osd.c
blob42dd4fcc690ace02b92b0ccdf74289ade3dcffd9
1 /*
2 * generic alpha renderers for all YUV modes and RGB depths
3 * These are "reference implementations", should be optimized later (MMX, etc).
4 * templating code by Michael Niedermayer (michaelni@gmx.at)
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 //#define FAST_OSD
24 //#define FAST_OSD_TABLE
26 #include "config.h"
27 #include "osd.h"
28 #include "mp_msg.h"
29 #include <inttypes.h>
30 #include "cpudetect.h"
32 #if ARCH_X86
33 static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
34 static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
35 static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
36 #endif
38 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
39 //Plain C versions
40 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT)
41 #define COMPILE_C
42 #endif
44 #if ARCH_X86
46 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
47 #define COMPILE_MMX
48 #endif
50 #if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
51 #define COMPILE_MMX2
52 #endif
54 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
55 #define COMPILE_3DNOW
56 #endif
58 #endif /* ARCH_X86 */
60 #undef HAVE_MMX
61 #undef HAVE_MMX2
62 #undef HAVE_AMD3DNOW
63 #define HAVE_MMX 0
64 #define HAVE_MMX2 0
65 #define HAVE_AMD3DNOW 0
67 #if ! ARCH_X86
69 #ifdef COMPILE_C
70 #undef HAVE_MMX
71 #undef HAVE_MMX2
72 #undef HAVE_AMD3DNOW
73 #define HAVE_MMX 0
74 #define HAVE_MMX2 0
75 #define HAVE_AMD3DNOW 0
76 #define RENAME(a) a ## _C
77 #include "osd_template.c"
78 #endif
80 #else
82 //X86 noMMX versions
83 #ifdef COMPILE_C
84 #undef RENAME
85 #undef HAVE_MMX
86 #undef HAVE_MMX2
87 #undef HAVE_AMD3DNOW
88 #define HAVE_MMX 0
89 #define HAVE_MMX2 0
90 #define HAVE_AMD3DNOW 0
91 #define RENAME(a) a ## _X86
92 #include "osd_template.c"
93 #endif
95 //MMX versions
96 #ifdef COMPILE_MMX
97 #undef RENAME
98 #undef HAVE_MMX
99 #undef HAVE_MMX2
100 #undef HAVE_AMD3DNOW
101 #define HAVE_MMX 1
102 #define HAVE_MMX2 0
103 #define HAVE_AMD3DNOW 0
104 #define RENAME(a) a ## _MMX
105 #include "osd_template.c"
106 #endif
108 //MMX2 versions
109 #ifdef COMPILE_MMX2
110 #undef RENAME
111 #undef HAVE_MMX
112 #undef HAVE_MMX2
113 #undef HAVE_AMD3DNOW
114 #define HAVE_MMX 1
115 #define HAVE_MMX2 1
116 #define HAVE_AMD3DNOW 0
117 #define RENAME(a) a ## _MMX2
118 #include "osd_template.c"
119 #endif
121 //3DNOW versions
122 #ifdef COMPILE_3DNOW
123 #undef RENAME
124 #undef HAVE_MMX
125 #undef HAVE_MMX2
126 #undef HAVE_AMD3DNOW
127 #define HAVE_MMX 1
128 #define HAVE_MMX2 0
129 #define HAVE_AMD3DNOW 1
130 #define RENAME(a) a ## _3DNow
131 #include "osd_template.c"
132 #endif
134 #endif /* ARCH_X86 */
136 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
137 #ifdef RUNTIME_CPUDETECT
138 #if ARCH_X86
139 // ordered by speed / fastest first
140 if(gCpuCaps.hasMMX2)
141 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
142 else if(gCpuCaps.has3DNow)
143 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
144 else if(gCpuCaps.hasMMX)
145 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
146 else
147 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
148 #else
149 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
150 #endif
151 #else //RUNTIME_CPUDETECT
152 #if HAVE_MMX2
153 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
154 #elif HAVE_AMD3DNOW
155 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
156 #elif HAVE_MMX
157 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
158 #elif ARCH_X86
159 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
160 #else
161 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
162 #endif
163 #endif //!RUNTIME_CPUDETECT
166 void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
167 #ifdef RUNTIME_CPUDETECT
168 #if ARCH_X86
169 // ordered by speed / fastest first
170 if(gCpuCaps.hasMMX2)
171 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
172 else if(gCpuCaps.has3DNow)
173 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
174 else if(gCpuCaps.hasMMX)
175 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
176 else
177 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
178 #else
179 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
180 #endif
181 #else //RUNTIME_CPUDETECT
182 #if HAVE_MMX2
183 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
184 #elif HAVE_AMD3DNOW
185 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
186 #elif HAVE_MMX
187 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
188 #elif ARCH_X86
189 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
190 #else
191 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
192 #endif
193 #endif //!RUNTIME_CPUDETECT
196 void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
197 #ifdef RUNTIME_CPUDETECT
198 #if ARCH_X86
199 // ordered by speed / fastest first
200 if(gCpuCaps.hasMMX2)
201 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
202 else if(gCpuCaps.has3DNow)
203 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
204 else if(gCpuCaps.hasMMX)
205 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
206 else
207 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
208 #else
209 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
210 #endif
211 #else //RUNTIME_CPUDETECT
212 #if HAVE_MMX2
213 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
214 #elif HAVE_AMD3DNOW
215 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
216 #elif HAVE_MMX
217 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
218 #elif ARCH_X86
219 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
220 #else
221 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
222 #endif
223 #endif //!RUNTIME_CPUDETECT
226 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
227 #ifdef RUNTIME_CPUDETECT
228 #if ARCH_X86
229 // ordered by speed / fastest first
230 if(gCpuCaps.hasMMX2)
231 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
232 else if(gCpuCaps.has3DNow)
233 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
234 else if(gCpuCaps.hasMMX)
235 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
236 else
237 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
238 #else
239 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
240 #endif
241 #else //RUNTIME_CPUDETECT
242 #if HAVE_MMX2
243 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
244 #elif HAVE_AMD3DNOW
245 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
246 #elif HAVE_MMX
247 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
248 #elif ARCH_X86
249 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
250 #else
251 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
252 #endif
253 #endif //!RUNTIME_CPUDETECT
256 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
257 #ifdef RUNTIME_CPUDETECT
258 #if ARCH_X86
259 // ordered by speed / fastest first
260 if(gCpuCaps.hasMMX2)
261 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
262 else if(gCpuCaps.has3DNow)
263 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
264 else if(gCpuCaps.hasMMX)
265 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
266 else
267 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
268 #else
269 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
270 #endif
271 #else //RUNTIME_CPUDETECT
272 #if HAVE_MMX2
273 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
274 #elif HAVE_AMD3DNOW
275 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
276 #elif HAVE_MMX
277 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
278 #elif ARCH_X86
279 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
280 #else
281 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
282 #endif
283 #endif //!RUNTIME_CPUDETECT
286 #ifdef FAST_OSD_TABLE
287 static unsigned short fast_osd_15bpp_table[256];
288 static unsigned short fast_osd_16bpp_table[256];
289 #endif
291 void vo_draw_alpha_init(void){
292 #ifdef FAST_OSD_TABLE
293 int i;
294 for(i=0;i<256;i++){
295 fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3);
296 fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3);
298 #endif
299 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
300 if( mp_msg_test(MSGT_OSD,MSGL_V) )
302 #ifdef RUNTIME_CPUDETECT
303 #if ARCH_X86
304 // ordered per speed fasterst first
305 if(gCpuCaps.hasMMX2)
306 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
307 else if(gCpuCaps.has3DNow)
308 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
309 else if(gCpuCaps.hasMMX)
310 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
311 else
312 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
313 #else
314 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
315 #endif
316 #else //RUNTIME_CPUDETECT
317 #if HAVE_MMX2
318 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
319 #elif HAVE_AMD3DNOW
320 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
321 #elif HAVE_MMX
322 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
323 #elif ARCH_X86
324 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
325 #else
326 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
327 #endif
328 #endif //!RUNTIME_CPUDETECT
332 void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
333 int y;
334 for(y=0;y<h;y++){
335 register unsigned short *dst = (unsigned short*) dstbase;
336 register int x;
337 for(x=0;x<w;x++){
338 if(srca[x]){
339 #ifdef FAST_OSD
340 #ifdef FAST_OSD_TABLE
341 dst[x]=fast_osd_15bpp_table[src[x]];
342 #else
343 register unsigned int a=src[x]>>3;
344 dst[x]=(a<<10)|(a<<5)|a;
345 #endif
346 #else
347 unsigned char r=dst[x]&0x1F;
348 unsigned char g=(dst[x]>>5)&0x1F;
349 unsigned char b=(dst[x]>>10)&0x1F;
350 r=(((r*srca[x])>>5)+src[x])>>3;
351 g=(((g*srca[x])>>5)+src[x])>>3;
352 b=(((b*srca[x])>>5)+src[x])>>3;
353 dst[x]=(b<<10)|(g<<5)|r;
354 #endif
357 src+=srcstride;
358 srca+=srcstride;
359 dstbase+=dststride;
361 return;
364 void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
365 int y;
366 for(y=0;y<h;y++){
367 register unsigned short *dst = (unsigned short*) dstbase;
368 register int x;
369 for(x=0;x<w;x++){
370 if(srca[x]){
371 #ifdef FAST_OSD
372 #ifdef FAST_OSD_TABLE
373 dst[x]=fast_osd_16bpp_table[src[x]];
374 #else
375 dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3);
376 #endif
377 #else
378 unsigned char r=dst[x]&0x1F;
379 unsigned char g=(dst[x]>>5)&0x3F;
380 unsigned char b=(dst[x]>>11)&0x1F;
381 r=(((r*srca[x])>>5)+src[x])>>3;
382 g=(((g*srca[x])>>6)+src[x])>>2;
383 b=(((b*srca[x])>>5)+src[x])>>3;
384 dst[x]=(b<<11)|(g<<5)|r;
385 #endif
388 src+=srcstride;
389 srca+=srcstride;
390 dstbase+=dststride;
392 return;