Makefile: Don't use "install -d" on existing directories
[mplayer.git] / libvo / osd.c
blobda7d61fb507d85cc950bbe59c00266835b1e815c
1 // Generic alpha renderers for all YUV modes and RGB depths.
2 // These are "reference implementations", should be optimized later (MMX, etc)
3 // Templating Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
5 //#define FAST_OSD
6 //#define FAST_OSD_TABLE
8 #include "config.h"
9 #include "osd.h"
10 #include "mp_msg.h"
11 #include <inttypes.h>
12 #include "cpudetect.h"
14 #ifdef ARCH_X86
15 #define CAN_COMPILE_X86_ASM
16 #endif
18 #ifdef CAN_COMPILE_X86_ASM
19 static const uint64_t bFF __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
20 static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
21 static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
22 #endif
24 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
25 //Plain C versions
26 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
27 #define COMPILE_C
28 #endif
30 #ifdef CAN_COMPILE_X86_ASM
32 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
33 #define COMPILE_MMX
34 #endif
36 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
37 #define COMPILE_MMX2
38 #endif
40 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
41 #define COMPILE_3DNOW
42 #endif
43 #endif //CAN_COMPILE_X86_ASM
45 #undef HAVE_MMX
46 #undef HAVE_MMX2
47 #undef HAVE_3DNOW
49 #ifndef CAN_COMPILE_X86_ASM
51 #ifdef COMPILE_C
52 #undef HAVE_MMX
53 #undef HAVE_MMX2
54 #undef HAVE_3DNOW
55 #define RENAME(a) a ## _C
56 #include "osd_template.c"
57 #endif
59 #else
61 //X86 noMMX versions
62 #ifdef COMPILE_C
63 #undef RENAME
64 #undef HAVE_MMX
65 #undef HAVE_MMX2
66 #undef HAVE_3DNOW
67 #define RENAME(a) a ## _X86
68 #include "osd_template.c"
69 #endif
71 //MMX versions
72 #ifdef COMPILE_MMX
73 #undef RENAME
74 #define HAVE_MMX
75 #undef HAVE_MMX2
76 #undef HAVE_3DNOW
77 #define RENAME(a) a ## _MMX
78 #include "osd_template.c"
79 #endif
81 //MMX2 versions
82 #ifdef COMPILE_MMX2
83 #undef RENAME
84 #define HAVE_MMX
85 #define HAVE_MMX2
86 #undef HAVE_3DNOW
87 #define RENAME(a) a ## _MMX2
88 #include "osd_template.c"
89 #endif
91 //3DNOW versions
92 #ifdef COMPILE_3DNOW
93 #undef RENAME
94 #define HAVE_MMX
95 #undef HAVE_MMX2
96 #define HAVE_3DNOW
97 #define RENAME(a) a ## _3DNow
98 #include "osd_template.c"
99 #endif
101 #endif //CAN_COMPILE_X86_ASM
103 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
104 #ifdef RUNTIME_CPUDETECT
105 #ifdef CAN_COMPILE_X86_ASM
106 // ordered by speed / fastest first
107 if(gCpuCaps.hasMMX2)
108 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
109 else if(gCpuCaps.has3DNow)
110 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
111 else if(gCpuCaps.hasMMX)
112 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
113 else
114 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
115 #else
116 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
117 #endif
118 #else //RUNTIME_CPUDETECT
119 #ifdef HAVE_MMX2
120 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
121 #elif defined (HAVE_3DNOW)
122 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
123 #elif defined (HAVE_MMX)
124 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
125 #elif defined(ARCH_X86)
126 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
127 #else
128 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
129 #endif
130 #endif //!RUNTIME_CPUDETECT
133 void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
134 #ifdef RUNTIME_CPUDETECT
135 #ifdef CAN_COMPILE_X86_ASM
136 // ordered by speed / fastest first
137 if(gCpuCaps.hasMMX2)
138 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
139 else if(gCpuCaps.has3DNow)
140 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
141 else if(gCpuCaps.hasMMX)
142 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
143 else
144 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
145 #else
146 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
147 #endif
148 #else //RUNTIME_CPUDETECT
149 #ifdef HAVE_MMX2
150 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
151 #elif defined (HAVE_3DNOW)
152 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
153 #elif defined (HAVE_MMX)
154 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
155 #elif defined(ARCH_X86)
156 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
157 #else
158 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
159 #endif
160 #endif //!RUNTIME_CPUDETECT
163 void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
164 #ifdef RUNTIME_CPUDETECT
165 #ifdef CAN_COMPILE_X86_ASM
166 // ordered by speed / fastest first
167 if(gCpuCaps.hasMMX2)
168 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
169 else if(gCpuCaps.has3DNow)
170 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
171 else if(gCpuCaps.hasMMX)
172 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
173 else
174 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
175 #else
176 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
177 #endif
178 #else //RUNTIME_CPUDETECT
179 #ifdef HAVE_MMX2
180 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
181 #elif defined (HAVE_3DNOW)
182 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
183 #elif defined (HAVE_MMX)
184 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
185 #elif defined(ARCH_X86)
186 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
187 #else
188 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
189 #endif
190 #endif //!RUNTIME_CPUDETECT
193 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
194 #ifdef RUNTIME_CPUDETECT
195 #ifdef CAN_COMPILE_X86_ASM
196 // ordered by speed / fastest first
197 if(gCpuCaps.hasMMX2)
198 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
199 else if(gCpuCaps.has3DNow)
200 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
201 else if(gCpuCaps.hasMMX)
202 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
203 else
204 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
205 #else
206 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
207 #endif
208 #else //RUNTIME_CPUDETECT
209 #ifdef HAVE_MMX2
210 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
211 #elif defined (HAVE_3DNOW)
212 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
213 #elif defined (HAVE_MMX)
214 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
215 #elif defined(ARCH_X86)
216 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
217 #else
218 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
219 #endif
220 #endif //!RUNTIME_CPUDETECT
223 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
224 #ifdef RUNTIME_CPUDETECT
225 #ifdef CAN_COMPILE_X86_ASM
226 // ordered by speed / fastest first
227 if(gCpuCaps.hasMMX2)
228 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
229 else if(gCpuCaps.has3DNow)
230 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
231 else if(gCpuCaps.hasMMX)
232 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
233 else
234 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
235 #else
236 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
237 #endif
238 #else //RUNTIME_CPUDETECT
239 #ifdef HAVE_MMX2
240 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
241 #elif defined (HAVE_3DNOW)
242 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
243 #elif defined (HAVE_MMX)
244 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
245 #elif defined(ARCH_X86)
246 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
247 #else
248 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
249 #endif
250 #endif //!RUNTIME_CPUDETECT
253 #ifdef FAST_OSD_TABLE
254 static unsigned short fast_osd_15bpp_table[256];
255 static unsigned short fast_osd_16bpp_table[256];
256 #endif
258 void vo_draw_alpha_init(void){
259 #ifdef FAST_OSD_TABLE
260 int i;
261 for(i=0;i<256;i++){
262 fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3);
263 fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3);
265 #endif
266 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
267 if( mp_msg_test(MSGT_OSD,MSGL_V) )
269 #ifdef RUNTIME_CPUDETECT
270 #ifdef CAN_COMPILE_X86_ASM
271 // ordered per speed fasterst first
272 if(gCpuCaps.hasMMX2)
273 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
274 else if(gCpuCaps.has3DNow)
275 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
276 else if(gCpuCaps.hasMMX)
277 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
278 else
279 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
280 #else
281 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
282 #endif
283 #else //RUNTIME_CPUDETECT
284 #ifdef HAVE_MMX2
285 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
286 #elif defined (HAVE_3DNOW)
287 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
288 #elif defined (HAVE_MMX)
289 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
290 #elif defined(ARCH_X86)
291 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
292 #else
293 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
294 #endif
295 #endif //!RUNTIME_CPUDETECT
299 void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
300 int y;
301 for(y=0;y<h;y++){
302 register unsigned short *dst = (unsigned short*) dstbase;
303 register int x;
304 for(x=0;x<w;x++){
305 if(srca[x]){
306 #ifdef FAST_OSD
307 #ifdef FAST_OSD_TABLE
308 dst[x]=fast_osd_15bpp_table[src[x]];
309 #else
310 register unsigned int a=src[x]>>3;
311 dst[x]=(a<<10)|(a<<5)|a;
312 #endif
313 #else
314 unsigned char r=dst[x]&0x1F;
315 unsigned char g=(dst[x]>>5)&0x1F;
316 unsigned char b=(dst[x]>>10)&0x1F;
317 r=(((r*srca[x])>>5)+src[x])>>3;
318 g=(((g*srca[x])>>5)+src[x])>>3;
319 b=(((b*srca[x])>>5)+src[x])>>3;
320 dst[x]=(b<<10)|(g<<5)|r;
321 #endif
324 src+=srcstride;
325 srca+=srcstride;
326 dstbase+=dststride;
328 return;
331 void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
332 int y;
333 for(y=0;y<h;y++){
334 register unsigned short *dst = (unsigned short*) dstbase;
335 register int x;
336 for(x=0;x<w;x++){
337 if(srca[x]){
338 #ifdef FAST_OSD
339 #ifdef FAST_OSD_TABLE
340 dst[x]=fast_osd_16bpp_table[src[x]];
341 #else
342 dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3);
343 #endif
344 #else
345 unsigned char r=dst[x]&0x1F;
346 unsigned char g=(dst[x]>>5)&0x3F;
347 unsigned char b=(dst[x]>>11)&0x1F;
348 r=(((r*srca[x])>>5)+src[x])>>3;
349 g=(((g*srca[x])>>6)+src[x])>>2;
350 b=(((b*srca[x])>>5)+src[x])>>3;
351 dst[x]=(b<<11)|(g<<5)|r;
352 #endif
355 src+=srcstride;
356 srca+=srcstride;
357 dstbase+=dststride;
359 return;