2 * generic alpha renderers for all YUV modes and RGB depths
3 * These are "reference implementations", should be optimized later (MMX, etc).
4 * templating code by Michael Niedermayer (michaelni@gmx.at)
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 //#define FAST_OSD_TABLE
30 #include "cpudetect.h"
33 static const uint64_t bFF
__attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL
;
34 static const unsigned long long mask24lh
__attribute__((aligned(8))) = 0xFFFF000000000000ULL
;
35 static const unsigned long long mask24hl
__attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL
;
38 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
40 #if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT
46 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
50 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
54 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
65 #define HAVE_AMD3DNOW 0
75 #define HAVE_AMD3DNOW 0
76 #define RENAME(a) a ## _C
77 #include "osd_template.c"
90 #define HAVE_AMD3DNOW 0
91 #define RENAME(a) a ## _X86
92 #include "osd_template.c"
103 #define HAVE_AMD3DNOW 0
104 #define RENAME(a) a ## _MMX
105 #include "osd_template.c"
116 #define HAVE_AMD3DNOW 0
117 #define RENAME(a) a ## _MMX2
118 #include "osd_template.c"
129 #define HAVE_AMD3DNOW 1
130 #define RENAME(a) a ## _3DNow
131 #include "osd_template.c"
134 #endif /* ARCH_X86 */
136 void vo_draw_alpha_yv12(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
137 #if CONFIG_RUNTIME_CPUDETECT
139 // ordered by speed / fastest first
141 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
142 else if(gCpuCaps
.has3DNow
)
143 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
144 else if(gCpuCaps
.hasMMX
)
145 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
147 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
149 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
151 #else //CONFIG_RUNTIME_CPUDETECT
153 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
155 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
157 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
159 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
161 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
163 #endif //!CONFIG_RUNTIME_CPUDETECT
166 void vo_draw_alpha_yuy2(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
167 #if CONFIG_RUNTIME_CPUDETECT
169 // ordered by speed / fastest first
171 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
172 else if(gCpuCaps
.has3DNow
)
173 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
174 else if(gCpuCaps
.hasMMX
)
175 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
177 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
179 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
181 #else //CONFIG_RUNTIME_CPUDETECT
183 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
185 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
187 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
189 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
191 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
193 #endif //!CONFIG_RUNTIME_CPUDETECT
196 void vo_draw_alpha_uyvy(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
197 #if CONFIG_RUNTIME_CPUDETECT
199 // ordered by speed / fastest first
201 vo_draw_alpha_uyvy_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
202 else if(gCpuCaps
.has3DNow
)
203 vo_draw_alpha_uyvy_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
204 else if(gCpuCaps
.hasMMX
)
205 vo_draw_alpha_uyvy_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
207 vo_draw_alpha_uyvy_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
209 vo_draw_alpha_uyvy_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
211 #else //CONFIG_RUNTIME_CPUDETECT
213 vo_draw_alpha_uyvy_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
215 vo_draw_alpha_uyvy_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
217 vo_draw_alpha_uyvy_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
219 vo_draw_alpha_uyvy_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
221 vo_draw_alpha_uyvy_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
223 #endif //!CONFIG_RUNTIME_CPUDETECT
226 void vo_draw_alpha_rgb24(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
227 #if CONFIG_RUNTIME_CPUDETECT
229 // ordered by speed / fastest first
231 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
232 else if(gCpuCaps
.has3DNow
)
233 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
234 else if(gCpuCaps
.hasMMX
)
235 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
237 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
239 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
241 #else //CONFIG_RUNTIME_CPUDETECT
243 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
245 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
247 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
249 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
251 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
253 #endif //!CONFIG_RUNTIME_CPUDETECT
256 void vo_draw_alpha_rgb32(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
257 #if CONFIG_RUNTIME_CPUDETECT
259 // ordered by speed / fastest first
261 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
262 else if(gCpuCaps
.has3DNow
)
263 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
264 else if(gCpuCaps
.hasMMX
)
265 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
267 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
269 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
271 #else //CONFIG_RUNTIME_CPUDETECT
273 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
275 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
277 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
279 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
281 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
283 #endif //!CONFIG_RUNTIME_CPUDETECT
286 #ifdef FAST_OSD_TABLE
287 static unsigned short fast_osd_12bpp_table
[256];
288 static unsigned short fast_osd_15bpp_table
[256];
289 static unsigned short fast_osd_16bpp_table
[256];
292 void vo_draw_alpha_init(void){
293 #ifdef FAST_OSD_TABLE
296 fast_osd_12bpp_table
[i
]=((i
>>4)<< 8)|((i
>>4)<<4)|(i
>>4);
297 fast_osd_15bpp_table
[i
]=((i
>>3)<<10)|((i
>>3)<<5)|(i
>>3);
298 fast_osd_16bpp_table
[i
]=((i
>>3)<<11)|((i
>>2)<<5)|(i
>>3);
301 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
302 if( mp_msg_test(MSGT_OSD
,MSGL_V
) )
304 #if CONFIG_RUNTIME_CPUDETECT
306 // ordered per speed fasterst first
308 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
309 else if(gCpuCaps
.has3DNow
)
310 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
311 else if(gCpuCaps
.hasMMX
)
312 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
314 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
316 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
318 #else //CONFIG_RUNTIME_CPUDETECT
320 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
322 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
324 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
326 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
328 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
330 #endif //!CONFIG_RUNTIME_CPUDETECT
334 void vo_draw_alpha_rgb12(int w
, int h
, unsigned char* src
, unsigned char *srca
,
335 int srcstride
, unsigned char* dstbase
, int dststride
) {
337 for (y
= 0; y
< h
; y
++) {
338 register unsigned short *dst
= (unsigned short*) dstbase
;
340 for (x
= 0; x
< w
; x
++) {
343 #ifdef FAST_OSD_TABLE
344 dst
[x
] = fast_osd_12bpp_table
[src
[x
]];
346 register unsigned int a
= src
[x
] >> 4;
347 dst
[x
] = (a
<< 8) | (a
<< 4) | a
;
350 unsigned char r
= dst
[x
] & 0x0F;
351 unsigned char g
= (dst
[x
] >> 4) & 0x0F;
352 unsigned char b
= (dst
[x
] >> 8) & 0x0F;
353 r
= (((r
*srca
[x
]) >> 4) + src
[x
]) >> 4;
354 g
= (((g
*srca
[x
]) >> 4) + src
[x
]) >> 4;
355 b
= (((b
*srca
[x
]) >> 4) + src
[x
]) >> 4;
356 dst
[x
] = (b
<< 8) | (g
<< 4) | r
;
362 dstbase
+= dststride
;
367 void vo_draw_alpha_rgb15(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
370 register unsigned short *dst
= (unsigned short*) dstbase
;
375 #ifdef FAST_OSD_TABLE
376 dst
[x
]=fast_osd_15bpp_table
[src
[x
]];
378 register unsigned int a
=src
[x
]>>3;
379 dst
[x
]=(a
<<10)|(a
<<5)|a
;
382 unsigned char r
=dst
[x
]&0x1F;
383 unsigned char g
=(dst
[x
]>>5)&0x1F;
384 unsigned char b
=(dst
[x
]>>10)&0x1F;
385 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
386 g
=(((g
*srca
[x
])>>5)+src
[x
])>>3;
387 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
388 dst
[x
]=(b
<<10)|(g
<<5)|r
;
399 void vo_draw_alpha_rgb16(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
402 register unsigned short *dst
= (unsigned short*) dstbase
;
407 #ifdef FAST_OSD_TABLE
408 dst
[x
]=fast_osd_16bpp_table
[src
[x
]];
410 dst
[x
]=((src
[x
]>>3)<<11)|((src
[x
]>>2)<<5)|(src
[x
]>>3);
413 unsigned char r
=dst
[x
]&0x1F;
414 unsigned char g
=(dst
[x
]>>5)&0x3F;
415 unsigned char b
=(dst
[x
]>>11)&0x1F;
416 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
417 g
=(((g
*srca
[x
])>>6)+src
[x
])>>2;
418 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
419 dst
[x
]=(b
<<11)|(g
<<5)|r
;