1 // Generic alpha renderers for all YUV modes and RGB depths.
2 // These are "reference implementations", should be optimized later (MMX, etc)
3 // Templating Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
6 //#define FAST_OSD_TABLE
12 #include "../cpudetect.h"
13 #include "../mangle.h"
15 extern int verbose
; // defined in mplayer.c
18 #define CAN_COMPILE_X86_ASM
21 #ifdef CAN_COMPILE_X86_ASM
22 static const uint64_t bFF
__attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL
;
23 static const unsigned long long mask24lh
__attribute__((aligned(8))) = 0xFFFF000000000000ULL
;
24 static const unsigned long long mask24hl
__attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL
;
27 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
29 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
33 #ifdef CAN_COMPILE_X86_ASM
35 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
39 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
43 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
46 #endif //CAN_COMPILE_X86_ASM
58 #define RENAME(a) a ## _C
59 #include "osd_template.c"
62 #ifdef CAN_COMPILE_X86_ASM
71 #define RENAME(a) a ## _X86
72 #include "osd_template.c"
82 #define RENAME(a) a ## _MMX
83 #include "osd_template.c"
93 #define RENAME(a) a ## _MMX2
94 #include "osd_template.c"
104 #define RENAME(a) a ## _3DNow
105 #include "osd_template.c"
108 #endif //CAN_COMPILE_X86_ASM
110 void vo_draw_alpha_yv12(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
111 #ifdef RUNTIME_CPUDETECT
112 #ifdef CAN_COMPILE_X86_ASM
113 // ordered per speed fasterst first
115 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
116 else if(gCpuCaps
.has3DNow
)
117 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
118 else if(gCpuCaps
.hasMMX
)
119 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
121 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
123 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
125 #else //RUNTIME_CPUDETECT
127 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
128 #elif defined (HAVE_3DNOW)
129 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
130 #elif defined (HAVE_MMX)
131 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
132 #elif defined (ARCH_X86)
133 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
135 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
137 #endif //!RUNTIME_CPUDETECT
140 void vo_draw_alpha_yuy2(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
141 #ifdef RUNTIME_CPUDETECT
142 #ifdef CAN_COMPILE_X86_ASM
143 // ordered per speed fasterst first
145 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
146 else if(gCpuCaps
.has3DNow
)
147 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
148 else if(gCpuCaps
.hasMMX
)
149 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
151 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
153 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
155 #else //RUNTIME_CPUDETECT
157 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
158 #elif defined (HAVE_3DNOW)
159 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
160 #elif defined (HAVE_MMX)
161 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
162 #elif defined (ARCH_X86)
163 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
165 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
167 #endif //!RUNTIME_CPUDETECT
170 void vo_draw_alpha_rgb24(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
171 #ifdef RUNTIME_CPUDETECT
172 #ifdef CAN_COMPILE_X86_ASM
173 // ordered per speed fasterst first
175 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
176 else if(gCpuCaps
.has3DNow
)
177 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
178 else if(gCpuCaps
.hasMMX
)
179 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
181 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
183 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
185 #else //RUNTIME_CPUDETECT
187 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
188 #elif defined (HAVE_3DNOW)
189 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
190 #elif defined (HAVE_MMX)
191 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
192 #elif defined (ARCH_X86)
193 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
195 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
197 #endif //!RUNTIME_CPUDETECT
200 void vo_draw_alpha_rgb32(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
201 #ifdef RUNTIME_CPUDETECT
202 #ifdef CAN_COMPILE_X86_ASM
203 // ordered per speed fasterst first
205 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
206 else if(gCpuCaps
.has3DNow
)
207 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
208 else if(gCpuCaps
.hasMMX
)
209 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
211 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
213 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
215 #else //RUNTIME_CPUDETECT
217 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
218 #elif defined (HAVE_3DNOW)
219 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
220 #elif defined (HAVE_MMX)
221 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
222 #elif defined (ARCH_X86)
223 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
225 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
227 #endif //!RUNTIME_CPUDETECT
230 #ifdef FAST_OSD_TABLE
231 static unsigned short fast_osd_15bpp_table
[256];
232 static unsigned short fast_osd_16bpp_table
[256];
235 void vo_draw_alpha_init(){
236 #ifdef FAST_OSD_TABLE
239 fast_osd_15bpp_table
[i
]=((i
>>3)<<10)|((i
>>3)<<5)|(i
>>3);
240 fast_osd_16bpp_table
[i
]=((i
>>3)<<11)|((i
>>2)<<5)|(i
>>3);
243 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
246 #ifdef RUNTIME_CPUDETECT
247 #ifdef CAN_COMPILE_X86_ASM
248 // ordered per speed fasterst first
250 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
251 else if(gCpuCaps
.has3DNow
)
252 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
253 else if(gCpuCaps
.hasMMX
)
254 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
256 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
258 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
260 #else //RUNTIME_CPUDETECT
262 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
263 #elif defined (HAVE_3DNOW)
264 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
265 #elif defined (HAVE_MMX)
266 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
267 #elif defined (ARCH_X86)
268 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
270 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
272 #endif //!RUNTIME_CPUDETECT
276 void vo_draw_alpha_rgb15(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
279 register unsigned short *dst
= (unsigned short*) dstbase
;
284 #ifdef FAST_OSD_TABLE
285 dst
[x
]=fast_osd_15bpp_table
[src
[x
]];
287 register unsigned int a
=src
[x
]>>3;
288 dst
[x
]=(a
<<10)|(a
<<5)|a
;
291 unsigned char r
=dst
[x
]&0x1F;
292 unsigned char g
=(dst
[x
]>>5)&0x1F;
293 unsigned char b
=(dst
[x
]>>10)&0x1F;
294 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
295 g
=(((g
*srca
[x
])>>5)+src
[x
])>>3;
296 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
297 dst
[x
]=(b
<<10)|(g
<<5)|r
;
308 void vo_draw_alpha_rgb16(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
311 register unsigned short *dst
= (unsigned short*) dstbase
;
316 #ifdef FAST_OSD_TABLE
317 dst
[x
]=fast_osd_16bpp_table
[src
[x
]];
319 dst
[x
]=((src
[x
]>>3)<<11)|((src
[x
]>>2)<<5)|(src
[x
]>>3);
322 unsigned char r
=dst
[x
]&0x1F;
323 unsigned char g
=(dst
[x
]>>5)&0x3F;
324 unsigned char b
=(dst
[x
]>>11)&0x1F;
325 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
326 g
=(((g
*srca
[x
])>>6)+src
[x
])>>2;
327 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
328 dst
[x
]=(b
<<11)|(g
<<5)|r
;