Another round of subtitle code cleanup
[mplayer/glamo.git] / libvo / osd.c
bloba307558f68e0eb7a41c852a6faeef9cfc3cfc16f
1 // Generic alpha renderers for all YUV modes and RGB depths.
2 // These are "reference implementations", should be optimized later (MMX, etc)
3 // Templating Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
5 //#define FAST_OSD
6 //#define FAST_OSD_TABLE
8 #include "config.h"
9 #include "osd.h"
10 #include "mp_msg.h"
11 #include <inttypes.h>
12 #include "cpudetect.h"
13 #include "mangle.h"
15 #ifdef ARCH_X86
16 #define CAN_COMPILE_X86_ASM
17 #endif
19 #ifdef CAN_COMPILE_X86_ASM
20 static const uint64_t bFF attribute_used __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
21 static const unsigned long long mask24lh __attribute__((aligned(8))) = 0xFFFF000000000000ULL;
22 static const unsigned long long mask24hl __attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL;
23 #endif
25 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
26 //Plain C versions
27 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
28 #define COMPILE_C
29 #endif
31 #ifdef CAN_COMPILE_X86_ASM
33 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
34 #define COMPILE_MMX
35 #endif
37 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
38 #define COMPILE_MMX2
39 #endif
41 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
42 #define COMPILE_3DNOW
43 #endif
44 #endif //CAN_COMPILE_X86_ASM
46 #undef HAVE_MMX
47 #undef HAVE_MMX2
48 #undef HAVE_3DNOW
50 #ifndef CAN_COMPILE_X86_ASM
52 #ifdef COMPILE_C
53 #undef HAVE_MMX
54 #undef HAVE_MMX2
55 #undef HAVE_3DNOW
56 #define RENAME(a) a ## _C
57 #include "osd_template.c"
58 #endif
60 #else
62 //X86 noMMX versions
63 #ifdef COMPILE_C
64 #undef RENAME
65 #undef HAVE_MMX
66 #undef HAVE_MMX2
67 #undef HAVE_3DNOW
68 #define RENAME(a) a ## _X86
69 #include "osd_template.c"
70 #endif
72 //MMX versions
73 #ifdef COMPILE_MMX
74 #undef RENAME
75 #define HAVE_MMX
76 #undef HAVE_MMX2
77 #undef HAVE_3DNOW
78 #define RENAME(a) a ## _MMX
79 #include "osd_template.c"
80 #endif
82 //MMX2 versions
83 #ifdef COMPILE_MMX2
84 #undef RENAME
85 #define HAVE_MMX
86 #define HAVE_MMX2
87 #undef HAVE_3DNOW
88 #define RENAME(a) a ## _MMX2
89 #include "osd_template.c"
90 #endif
92 //3DNOW versions
93 #ifdef COMPILE_3DNOW
94 #undef RENAME
95 #define HAVE_MMX
96 #undef HAVE_MMX2
97 #define HAVE_3DNOW
98 #define RENAME(a) a ## _3DNow
99 #include "osd_template.c"
100 #endif
102 #endif //CAN_COMPILE_X86_ASM
104 void vo_draw_alpha_yv12(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
105 #ifdef RUNTIME_CPUDETECT
106 #ifdef CAN_COMPILE_X86_ASM
107 // ordered by speed / fastest first
108 if(gCpuCaps.hasMMX2)
109 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
110 else if(gCpuCaps.has3DNow)
111 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
112 else if(gCpuCaps.hasMMX)
113 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
114 else
115 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
116 #else
117 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
118 #endif
119 #else //RUNTIME_CPUDETECT
120 #ifdef HAVE_MMX2
121 vo_draw_alpha_yv12_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
122 #elif defined (HAVE_3DNOW)
123 vo_draw_alpha_yv12_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
124 #elif defined (HAVE_MMX)
125 vo_draw_alpha_yv12_MMX(w, h, src, srca, srcstride, dstbase, dststride);
126 #elif defined(ARCH_X86)
127 vo_draw_alpha_yv12_X86(w, h, src, srca, srcstride, dstbase, dststride);
128 #else
129 vo_draw_alpha_yv12_C(w, h, src, srca, srcstride, dstbase, dststride);
130 #endif
131 #endif //!RUNTIME_CPUDETECT
134 void vo_draw_alpha_yuy2(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
135 #ifdef RUNTIME_CPUDETECT
136 #ifdef CAN_COMPILE_X86_ASM
137 // ordered by speed / fastest first
138 if(gCpuCaps.hasMMX2)
139 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
140 else if(gCpuCaps.has3DNow)
141 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
142 else if(gCpuCaps.hasMMX)
143 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
144 else
145 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
146 #else
147 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
148 #endif
149 #else //RUNTIME_CPUDETECT
150 #ifdef HAVE_MMX2
151 vo_draw_alpha_yuy2_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
152 #elif defined (HAVE_3DNOW)
153 vo_draw_alpha_yuy2_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
154 #elif defined (HAVE_MMX)
155 vo_draw_alpha_yuy2_MMX(w, h, src, srca, srcstride, dstbase, dststride);
156 #elif defined(ARCH_X86)
157 vo_draw_alpha_yuy2_X86(w, h, src, srca, srcstride, dstbase, dststride);
158 #else
159 vo_draw_alpha_yuy2_C(w, h, src, srca, srcstride, dstbase, dststride);
160 #endif
161 #endif //!RUNTIME_CPUDETECT
164 void vo_draw_alpha_uyvy(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
165 #ifdef RUNTIME_CPUDETECT
166 #ifdef CAN_COMPILE_X86_ASM
167 // ordered by speed / fastest first
168 if(gCpuCaps.hasMMX2)
169 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
170 else if(gCpuCaps.has3DNow)
171 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
172 else if(gCpuCaps.hasMMX)
173 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
174 else
175 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
176 #else
177 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
178 #endif
179 #else //RUNTIME_CPUDETECT
180 #ifdef HAVE_MMX2
181 vo_draw_alpha_uyvy_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
182 #elif defined (HAVE_3DNOW)
183 vo_draw_alpha_uyvy_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
184 #elif defined (HAVE_MMX)
185 vo_draw_alpha_uyvy_MMX(w, h, src, srca, srcstride, dstbase, dststride);
186 #elif defined(ARCH_X86)
187 vo_draw_alpha_uyvy_X86(w, h, src, srca, srcstride, dstbase, dststride);
188 #else
189 vo_draw_alpha_uyvy_C(w, h, src, srca, srcstride, dstbase, dststride);
190 #endif
191 #endif //!RUNTIME_CPUDETECT
194 void vo_draw_alpha_rgb24(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
195 #ifdef RUNTIME_CPUDETECT
196 #ifdef CAN_COMPILE_X86_ASM
197 // ordered by speed / fastest first
198 if(gCpuCaps.hasMMX2)
199 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
200 else if(gCpuCaps.has3DNow)
201 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
202 else if(gCpuCaps.hasMMX)
203 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
204 else
205 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
206 #else
207 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
208 #endif
209 #else //RUNTIME_CPUDETECT
210 #ifdef HAVE_MMX2
211 vo_draw_alpha_rgb24_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
212 #elif defined (HAVE_3DNOW)
213 vo_draw_alpha_rgb24_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
214 #elif defined (HAVE_MMX)
215 vo_draw_alpha_rgb24_MMX(w, h, src, srca, srcstride, dstbase, dststride);
216 #elif defined(ARCH_X86)
217 vo_draw_alpha_rgb24_X86(w, h, src, srca, srcstride, dstbase, dststride);
218 #else
219 vo_draw_alpha_rgb24_C(w, h, src, srca, srcstride, dstbase, dststride);
220 #endif
221 #endif //!RUNTIME_CPUDETECT
224 void vo_draw_alpha_rgb32(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
225 #ifdef RUNTIME_CPUDETECT
226 #ifdef CAN_COMPILE_X86_ASM
227 // ordered by speed / fastest first
228 if(gCpuCaps.hasMMX2)
229 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
230 else if(gCpuCaps.has3DNow)
231 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
232 else if(gCpuCaps.hasMMX)
233 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
234 else
235 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
236 #else
237 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
238 #endif
239 #else //RUNTIME_CPUDETECT
240 #ifdef HAVE_MMX2
241 vo_draw_alpha_rgb32_MMX2(w, h, src, srca, srcstride, dstbase, dststride);
242 #elif defined (HAVE_3DNOW)
243 vo_draw_alpha_rgb32_3DNow(w, h, src, srca, srcstride, dstbase, dststride);
244 #elif defined (HAVE_MMX)
245 vo_draw_alpha_rgb32_MMX(w, h, src, srca, srcstride, dstbase, dststride);
246 #elif defined(ARCH_X86)
247 vo_draw_alpha_rgb32_X86(w, h, src, srca, srcstride, dstbase, dststride);
248 #else
249 vo_draw_alpha_rgb32_C(w, h, src, srca, srcstride, dstbase, dststride);
250 #endif
251 #endif //!RUNTIME_CPUDETECT
254 #ifdef FAST_OSD_TABLE
255 static unsigned short fast_osd_15bpp_table[256];
256 static unsigned short fast_osd_16bpp_table[256];
257 #endif
259 void vo_draw_alpha_init(void){
260 #ifdef FAST_OSD_TABLE
261 int i;
262 for(i=0;i<256;i++){
263 fast_osd_15bpp_table[i]=((i>>3)<<10)|((i>>3)<<5)|(i>>3);
264 fast_osd_16bpp_table[i]=((i>>3)<<11)|((i>>2)<<5)|(i>>3);
266 #endif
267 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
268 if( mp_msg_test(MSGT_OSD,MSGL_V) )
270 #ifdef RUNTIME_CPUDETECT
271 #ifdef CAN_COMPILE_X86_ASM
272 // ordered per speed fasterst first
273 if(gCpuCaps.hasMMX2)
274 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
275 else if(gCpuCaps.has3DNow)
276 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
277 else if(gCpuCaps.hasMMX)
278 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
279 else
280 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
281 #else
282 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
283 #endif
284 #else //RUNTIME_CPUDETECT
285 #ifdef HAVE_MMX2
286 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
287 #elif defined (HAVE_3DNOW)
288 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
289 #elif defined (HAVE_MMX)
290 mp_msg(MSGT_OSD,MSGL_INFO,"Using MMX Optimized OnScreenDisplay\n");
291 #elif defined(ARCH_X86)
292 mp_msg(MSGT_OSD,MSGL_INFO,"Using X86 Optimized OnScreenDisplay\n");
293 #else
294 mp_msg(MSGT_OSD,MSGL_INFO,"Using Unoptimized OnScreenDisplay\n");
295 #endif
296 #endif //!RUNTIME_CPUDETECT
300 void vo_draw_alpha_rgb15(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
301 int y;
302 for(y=0;y<h;y++){
303 register unsigned short *dst = (unsigned short*) dstbase;
304 register int x;
305 for(x=0;x<w;x++){
306 if(srca[x]){
307 #ifdef FAST_OSD
308 #ifdef FAST_OSD_TABLE
309 dst[x]=fast_osd_15bpp_table[src[x]];
310 #else
311 register unsigned int a=src[x]>>3;
312 dst[x]=(a<<10)|(a<<5)|a;
313 #endif
314 #else
315 unsigned char r=dst[x]&0x1F;
316 unsigned char g=(dst[x]>>5)&0x1F;
317 unsigned char b=(dst[x]>>10)&0x1F;
318 r=(((r*srca[x])>>5)+src[x])>>3;
319 g=(((g*srca[x])>>5)+src[x])>>3;
320 b=(((b*srca[x])>>5)+src[x])>>3;
321 dst[x]=(b<<10)|(g<<5)|r;
322 #endif
325 src+=srcstride;
326 srca+=srcstride;
327 dstbase+=dststride;
329 return;
332 void vo_draw_alpha_rgb16(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
333 int y;
334 for(y=0;y<h;y++){
335 register unsigned short *dst = (unsigned short*) dstbase;
336 register int x;
337 for(x=0;x<w;x++){
338 if(srca[x]){
339 #ifdef FAST_OSD
340 #ifdef FAST_OSD_TABLE
341 dst[x]=fast_osd_16bpp_table[src[x]];
342 #else
343 dst[x]=((src[x]>>3)<<11)|((src[x]>>2)<<5)|(src[x]>>3);
344 #endif
345 #else
346 unsigned char r=dst[x]&0x1F;
347 unsigned char g=(dst[x]>>5)&0x3F;
348 unsigned char b=(dst[x]>>11)&0x1F;
349 r=(((r*srca[x])>>5)+src[x])>>3;
350 g=(((g*srca[x])>>6)+src[x])>>2;
351 b=(((b*srca[x])>>5)+src[x])>>3;
352 dst[x]=(b<<11)|(g<<5)|r;
353 #endif
356 src+=srcstride;
357 srca+=srcstride;
358 dstbase+=dststride;
360 return;