2 * generic alpha renderers for all YUV modes and RGB depths
3 * These are "reference implementations", should be optimized later (MMX, etc).
4 * templating code by Michael Niedermayer (michaelni@gmx.at)
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include "cpudetect.h"
30 static const uint64_t bFF
__attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL
;
31 static const unsigned long long mask24lh
__attribute__((aligned(8))) = 0xFFFF000000000000ULL
;
32 static const unsigned long long mask24hl
__attribute__((aligned(8))) = 0x0000FFFFFFFFFFFFULL
;
35 //Note: we have C, X86-nommx, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
37 #if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT
43 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
47 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
51 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
62 #define HAVE_AMD3DNOW 0
72 #define HAVE_AMD3DNOW 0
73 #define RENAME(a) a ## _C
74 #include "osd_template.c"
87 #define HAVE_AMD3DNOW 0
88 #define RENAME(a) a ## _X86
89 #include "osd_template.c"
100 #define HAVE_AMD3DNOW 0
101 #define RENAME(a) a ## _MMX
102 #include "osd_template.c"
113 #define HAVE_AMD3DNOW 0
114 #define RENAME(a) a ## _MMX2
115 #include "osd_template.c"
126 #define HAVE_AMD3DNOW 1
127 #define RENAME(a) a ## _3DNow
128 #include "osd_template.c"
131 #endif /* ARCH_X86 */
133 void vo_draw_alpha_yv12(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
134 #if CONFIG_RUNTIME_CPUDETECT
136 // ordered by speed / fastest first
138 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
139 else if(gCpuCaps
.has3DNow
)
140 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
141 else if(gCpuCaps
.hasMMX
)
142 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
144 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
146 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
148 #else //CONFIG_RUNTIME_CPUDETECT
150 vo_draw_alpha_yv12_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
152 vo_draw_alpha_yv12_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
154 vo_draw_alpha_yv12_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
156 vo_draw_alpha_yv12_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
158 vo_draw_alpha_yv12_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
160 #endif //!CONFIG_RUNTIME_CPUDETECT
163 void vo_draw_alpha_yuy2(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
164 #if CONFIG_RUNTIME_CPUDETECT
166 // ordered by speed / fastest first
168 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
169 else if(gCpuCaps
.has3DNow
)
170 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
171 else if(gCpuCaps
.hasMMX
)
172 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
174 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
176 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
178 #else //CONFIG_RUNTIME_CPUDETECT
180 vo_draw_alpha_yuy2_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
182 vo_draw_alpha_yuy2_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
184 vo_draw_alpha_yuy2_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
186 vo_draw_alpha_yuy2_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
188 vo_draw_alpha_yuy2_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
190 #endif //!CONFIG_RUNTIME_CPUDETECT
193 void vo_draw_alpha_rgb24(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
194 #if CONFIG_RUNTIME_CPUDETECT
196 // ordered by speed / fastest first
198 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
199 else if(gCpuCaps
.has3DNow
)
200 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
201 else if(gCpuCaps
.hasMMX
)
202 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
204 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
206 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
208 #else //CONFIG_RUNTIME_CPUDETECT
210 vo_draw_alpha_rgb24_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
212 vo_draw_alpha_rgb24_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
214 vo_draw_alpha_rgb24_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
216 vo_draw_alpha_rgb24_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
218 vo_draw_alpha_rgb24_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
220 #endif //!CONFIG_RUNTIME_CPUDETECT
223 void vo_draw_alpha_rgb32(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
224 #if CONFIG_RUNTIME_CPUDETECT
226 // ordered by speed / fastest first
228 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
229 else if(gCpuCaps
.has3DNow
)
230 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
231 else if(gCpuCaps
.hasMMX
)
232 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
234 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
236 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
238 #else //CONFIG_RUNTIME_CPUDETECT
240 vo_draw_alpha_rgb32_MMX2(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
242 vo_draw_alpha_rgb32_3DNow(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
244 vo_draw_alpha_rgb32_MMX(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
246 vo_draw_alpha_rgb32_X86(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
248 vo_draw_alpha_rgb32_C(w
, h
, src
, srca
, srcstride
, dstbase
, dststride
);
250 #endif //!CONFIG_RUNTIME_CPUDETECT
253 void vo_draw_alpha_init(void){
254 //FIXME the optimized stuff is a lie for 15/16bpp as they aren't optimized yet
255 if( mp_msg_test(MSGT_OSD
,MSGL_V
) )
257 #if CONFIG_RUNTIME_CPUDETECT
259 // ordered per speed fasterst first
261 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
262 else if(gCpuCaps
.has3DNow
)
263 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
264 else if(gCpuCaps
.hasMMX
)
265 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
267 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
269 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
271 #else //CONFIG_RUNTIME_CPUDETECT
273 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit MMX2) Optimized OnScreenDisplay\n");
275 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX (with tiny bit 3DNow) Optimized OnScreenDisplay\n");
277 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using MMX Optimized OnScreenDisplay\n");
279 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using X86 Optimized OnScreenDisplay\n");
281 mp_msg(MSGT_OSD
,MSGL_INFO
,"Using Unoptimized OnScreenDisplay\n");
283 #endif //!CONFIG_RUNTIME_CPUDETECT
287 void vo_draw_alpha_rgb12(int w
, int h
, unsigned char* src
, unsigned char *srca
,
288 int srcstride
, unsigned char* dstbase
, int dststride
) {
290 for (y
= 0; y
< h
; y
++) {
291 register unsigned short *dst
= (unsigned short*) dstbase
;
293 for (x
= 0; x
< w
; x
++) {
295 unsigned char r
= dst
[x
] & 0x0F;
296 unsigned char g
= (dst
[x
] >> 4) & 0x0F;
297 unsigned char b
= (dst
[x
] >> 8) & 0x0F;
298 r
= (((r
*srca
[x
]) >> 4) + src
[x
]) >> 4;
299 g
= (((g
*srca
[x
]) >> 4) + src
[x
]) >> 4;
300 b
= (((b
*srca
[x
]) >> 4) + src
[x
]) >> 4;
301 dst
[x
] = (b
<< 8) | (g
<< 4) | r
;
306 dstbase
+= dststride
;
311 void vo_draw_alpha_rgb15(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
314 register unsigned short *dst
= (unsigned short*) dstbase
;
318 unsigned char r
=dst
[x
]&0x1F;
319 unsigned char g
=(dst
[x
]>>5)&0x1F;
320 unsigned char b
=(dst
[x
]>>10)&0x1F;
321 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
322 g
=(((g
*srca
[x
])>>5)+src
[x
])>>3;
323 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
324 dst
[x
]=(b
<<10)|(g
<<5)|r
;
334 void vo_draw_alpha_rgb16(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
337 register unsigned short *dst
= (unsigned short*) dstbase
;
341 unsigned char r
=dst
[x
]&0x1F;
342 unsigned char g
=(dst
[x
]>>5)&0x3F;
343 unsigned char b
=(dst
[x
]>>11)&0x1F;
344 r
=(((r
*srca
[x
])>>5)+src
[x
])>>3;
345 g
=(((g
*srca
[x
])>>6)+src
[x
])>>2;
346 b
=(((b
*srca
[x
])>>5)+src
[x
])>>3;
347 dst
[x
]=(b
<<11)|(g
<<5)|r
;