2 * generic alpha renderers for all YUV modes and RGB depths
3 * These are "reference implementations", should be optimized later (MMX, etc).
4 * templating code by Michael Niedermayer (michaelni@gmx.at)
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 //#define FAST_OSD_TABLE
30 #include "cpudetect.h"
33 #define CAN_COMPILE_X86_ASM
36 #ifdef CAN_COMPILE_X86_ASM
37 static const uint64_t bFF
__attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL
;
38 static const unsigned long long mask24lh
__attribute__((aligned(8))) = 0xFFFF000000000000ULL
;
39 static const unsigned long long mask24hl
__attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL
;
42 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
44 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT)
48 #ifdef CAN_COMPILE_X86_ASM
50 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
54 #if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
58 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
61 #endif //CAN_COMPILE_X86_ASM
68 #define HAVE_AMD3DNOW 0
70 #ifndef CAN_COMPILE_X86_ASM
78 #define HAVE_AMD3DNOW 0
79 #define RENAME(a) a ## _C
80 #include "osd_template.c"
93 #define HAVE_AMD3DNOW 0
94 #define RENAME(a) a ## _X86
95 #include "osd_template.c"
106 #define HAVE_AMD3DNOW 0
107 #define RENAME(a) a ## _MMX
108 #include "osd_template.c"
119 #define HAVE_AMD3DNOW 0
120 #define RENAME(a) a ## _MMX2
121 #include "osd_template.c"
132 #define HAVE_AMD3DNOW 1
133 #define RENAME(a) a ## _3DNow
134 #include "osd_template.c"
137 #endif //CAN_COMPILE_X86_ASM
139 void vo_draw_alpha_yv12(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
140 #ifdef RUNTIME_CPUDETECT
141 #ifdef CAN_COMPILE_X86_ASM
142 // ordered by speed / fastest first
144 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
145 else if(gCpuCaps
.has3DNow
)
146 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
147 else if(gCpuCaps
.hasMMX
)
148 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
150 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
152 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
154 #else //RUNTIME_CPUDETECT
156 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
158 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
160 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
162 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
164 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
166 #endif //!RUNTIME_CPUDETECT
169 void vo_draw_alpha_yuy2(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
170 #ifdef RUNTIME_CPUDETECT
171 #ifdef CAN_COMPILE_X86_ASM
172 // ordered by speed / fastest first
174 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
175 else if(gCpuCaps
.has3DNow
)
176 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
177 else if(gCpuCaps
.hasMMX
)
178 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
180 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
182 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
184 #else //RUNTIME_CPUDETECT
186 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
188 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
190 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
192 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
194 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
196 #endif //!RUNTIME_CPUDETECT
199 void vo_draw_alpha_uyvy(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
200 #ifdef RUNTIME_CPUDETECT
201 #ifdef CAN_COMPILE_X86_ASM
202 // ordered by speed / fastest first
204 vo_draw_alpha_uyvy_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
205 else if(gCpuCaps
.has3DNow
)
206 vo_draw_alpha_uyvy_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
207 else if(gCpuCaps
.hasMMX
)
208 vo_draw_alpha_uyvy_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
210 vo_draw_alpha_uyvy_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
212 vo_draw_alpha_uyvy_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
214 #else //RUNTIME_CPUDETECT
216 vo_draw_alpha_uyvy_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
218 vo_draw_alpha_uyvy_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
220 vo_draw_alpha_uyvy_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
222 vo_draw_alpha_uyvy_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
224 vo_draw_alpha_uyvy_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
226 #endif //!RUNTIME_CPUDETECT
229 void vo_draw_alpha_rgb24(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
230 #ifdef RUNTIME_CPUDETECT
231 #ifdef CAN_COMPILE_X86_ASM
232 // ordered by speed / fastest first
234 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
235 else if(gCpuCaps
.has3DNow
)
236 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
237 else if(gCpuCaps
.hasMMX
)
238 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
240 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
242 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
244 #else //RUNTIME_CPUDETECT
246 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
248 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
250 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
252 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
254 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
256 #endif //!RUNTIME_CPUDETECT
259 void vo_draw_alpha_rgb32(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
260 #ifdef RUNTIME_CPUDETECT
261 #ifdef CAN_COMPILE_X86_ASM
262 // ordered by speed / fastest first
264 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
265 else if(gCpuCaps
.has3DNow
)
266 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
267 else if(gCpuCaps
.hasMMX
)
268 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
270 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
272 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
274 #else //RUNTIME_CPUDETECT
276 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
278 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
280 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
282 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
284 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
286 #endif //!RUNTIME_CPUDETECT
289 #ifdef FAST_OSD_TABLE
290 static unsigned short fast_osd_15bpp_table
[256];
291 static unsigned short fast_osd_16bpp_table
[256];
294 void vo_draw_alpha_init(void){
295 #ifdef FAST_OSD_TABLE
298 fast_osd_15bpp_table
[i
]=((i
>>3)<<10)|((i
>>3)<<5)|(i
>>3);
299 fast_osd_16bpp_table
[i
]=((i
>>3)<<11)|((i
>>2)<<5)|(i
>>3);
302 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
303 if( mp_msg_test(MSGT_OSD
,MSGL_V
) )
305 #ifdef RUNTIME_CPUDETECT
306 #ifdef CAN_COMPILE_X86_ASM
307 // ordered per speed fasterst first
309 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
310 else if(gCpuCaps
.has3DNow
)
311 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
312 else if(gCpuCaps
.hasMMX
)
313 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
315 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
317 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
319 #else //RUNTIME_CPUDETECT
321 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
323 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
325 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
327 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
329 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
331 #endif //!RUNTIME_CPUDETECT
335 void vo_draw_alpha_rgb15(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
338 register unsigned short *dst
= (unsigned short*) dstbase
;
343 #ifdef FAST_OSD_TABLE
344 dst
[x
]=fast_osd_15bpp_table
[src
[x
]];
346 register unsigned int a
=src
[x
]>>3;
347 dst
[x
]=(a
<<10)|(a
<<5)|a
;
350 unsigned char r
=dst
[x
]&0x1F;
351 unsigned char g
=(dst
[x
]>>5)&0x1F;
352 unsigned char b
=(dst
[x
]>>10)&0x1F;
353 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
354 g
=(((g
*srca
[x
])>>5)+src
[x
])>>3;
355 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
356 dst
[x
]=(b
<<10)|(g
<<5)|r
;
367 void vo_draw_alpha_rgb16(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
370 register unsigned short *dst
= (unsigned short*) dstbase
;
375 #ifdef FAST_OSD_TABLE
376 dst
[x
]=fast_osd_16bpp_table
[src
[x
]];
378 dst
[x
]=((src
[x
]>>3)<<11)|((src
[x
]>>2)<<5)|(src
[x
]>>3);
381 unsigned char r
=dst
[x
]&0x1F;
382 unsigned char g
=(dst
[x
]>>5)&0x3F;
383 unsigned char b
=(dst
[x
]>>11)&0x1F;
384 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
385 g
=(((g
*srca
[x
])>>6)+src
[x
])>>2;
386 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
387 dst
[x
]=(b
<<11)|(g
<<5)|r
;