2 * generic alpha renderers for all YUV modes and RGB depths
3 * Optimized by Nick and Michael.
5 * This file is part of MPlayer.
7 * MPlayer is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * MPlayer is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #define PREFETCH "prefetch"
29 #define PREFETCHW "prefetchw"
30 #define PAVGB "pavgusb"
32 #define PREFETCH "prefetchnta"
33 #define PREFETCHW "prefetcht0"
36 #define PREFETCH " # nop"
37 #define PREFETCHW " # nop"
41 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
47 static inline void RENAME(vo_draw_alpha_yv12
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
49 #if defined(FAST_OSD) && !HAVE_MMX
54 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
55 "movq %%mm5, %%mm4\n\t"
56 "movq %%mm5, %%mm7\n\t"
57 "psllw $8, %%mm5\n\t" //FF00FF00FF00
58 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
68 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
77 "movq %0, %%mm0\n\t" // dstbase
78 "movq %%mm0, %%mm1\n\t"
79 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
80 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
81 "movq %1, %%mm2\n\t" //srca HGFEDCBA
82 "paddb %%mm7, %%mm2\n\t"
83 "movq %%mm2, %%mm3\n\t"
84 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
85 "psrlw $8, %%mm3\n\t" //0H0F0D0B
86 "pmullw %%mm2, %%mm0\n\t"
87 "pmullw %%mm3, %%mm1\n\t"
89 "pand %%mm5, %%mm1\n\t"
90 "por %%mm1, %%mm0\n\t"
94 :: "m" (dstbase
[x
]), "m" (srca
[x
]), "m" (src
[x
])
100 if(srca
[2*x
+0]) dstbase
[2*x
+0]=src
[2*x
+0];
101 if(srca
[2*x
+1]) dstbase
[2*x
+1]=src
[2*x
+1];
103 if(srca
[x
]) dstbase
[x
]=((dstbase
[x
]*srca
[x
])>>8)+src
[x
];
112 __asm__
volatile(EMMS:::"memory");
117 static inline void RENAME(vo_draw_alpha_yuy2
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
119 #if defined(FAST_OSD) && !HAVE_MMX
124 "pxor %%mm7, %%mm7\n\t"
125 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
126 "movq %%mm5, %%mm6\n\t"
127 "movq %%mm5, %%mm4\n\t"
128 "psllw $8, %%mm5\n\t" //FF00FF00FF00
129 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
139 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
));
143 "orl %%eax, %%eax\n\t"
148 "movq %0, %%mm0\n\t" // dstbase
149 "movq %%mm0, %%mm1\n\t"
150 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
151 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
152 "paddb %%mm6, %%mm2\n\t"
153 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
154 "pmullw %%mm2, %%mm0\n\t"
155 "psrlw $8, %%mm0\n\t"
156 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
157 "movd %2, %%mm2\n\t" //src 0000DCBA
158 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
159 "por %%mm1, %%mm0\n\t"
160 "paddb %%mm2, %%mm0\n\t"
163 :: "m" (dstbase
[x
*2]), "m" (srca
[x
]), "m" (src
[x
])
169 if(srca
[2*x
+0]) dstbase
[4*x
+0]=src
[2*x
+0];
170 if(srca
[2*x
+1]) dstbase
[4*x
+2]=src
[2*x
+1];
173 dstbase
[2*x
]=((dstbase
[2*x
]*srca
[x
])>>8)+src
[x
];
174 dstbase
[2*x
+1]=((((signed)dstbase
[2*x
+1]-128)*srca
[x
])>>8)+128;
184 __asm__
volatile(EMMS:::"memory");
189 static inline void RENAME(vo_draw_alpha_rgb24
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
193 "pxor %%mm7, %%mm7\n\t"
194 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
198 register unsigned char *dst
= dstbase
;
200 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
206 ::"m"(*dst
),"m"(*srca
),"m"(*src
):"memory");
208 if(srca
[x
] || srca
[x
+1])
213 "movq %0, %%mm0\n\t" // dstbase
214 "movq %%mm0, %%mm1\n\t"
215 "movq %%mm0, %%mm5\n\t"
216 "punpcklbw %%mm7, %%mm0\n\t"
217 "punpckhbw %%mm7, %%mm1\n\t"
218 "movd %1, %%mm2\n\t" // srca ABCD0000
219 "paddb %%mm6, %%mm2\n\t"
220 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
221 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
222 "psrlq $8, %%mm2\n\t" // srca AAABBBB0
223 "movq %%mm2, %%mm3\n\t"
224 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
225 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
226 "pmullw %%mm2, %%mm0\n\t"
227 "pmullw %%mm3, %%mm1\n\t"
228 "psrlw $8, %%mm0\n\t"
229 "psrlw $8, %%mm1\n\t"
230 "packuswb %%mm1, %%mm0\n\t"
231 "movd %2, %%mm2 \n\t" // src ABCD0000
232 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
233 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
234 "psrlq $8, %%mm2\n\t" // src AAABBBB0
235 "paddb %%mm2, %%mm0\n\t"
238 "por %%mm0, %%mm5\n\t"
240 :: "m" (dst
[0]), "m" (srca
[x
]), "m" (src
[x
]), "m"(mask24hl
), "m"(mask24lh
));
247 "movzbl (%0), %%ecx\n\t"
248 "movzbl 1(%0), %%eax\n\t"
250 "imull %1, %%ecx\n\t"
251 "imull %1, %%eax\n\t"
256 "movb %%ch, (%0)\n\t"
257 "movb %%ah, 1(%0)\n\t"
259 "movzbl 2(%0), %%eax\n\t"
260 "imull %1, %%eax\n\t"
262 "movb %%ah, 2(%0)\n\t"
265 "r" ((unsigned)srca
[x
]),
266 "r" (((unsigned)src
[x
])<<8)
272 #endif /* !HAVE_MMX */
273 #else /*non x86 arch or x86_64 with MMX disabled */
277 dst
[0]=dst
[1]=dst
[2]=src
[x
];
279 dst
[0]=((dst
[0]*srca
[x
])>>8)+src
[x
];
280 dst
[1]=((dst
[1]*srca
[x
])>>8)+src
[x
];
281 dst
[2]=((dst
[2]*srca
[x
])>>8)+src
[x
];
286 #endif /* arch_x86 */
292 __asm__
volatile(EMMS:::"memory");
297 static inline void RENAME(vo_draw_alpha_rgb32
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
305 "pxor %%mm7, %%mm7\n\t"
306 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
308 #else /* HAVE_AMD3DNOW */
310 "pxor %%mm7, %%mm7\n\t"
311 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
312 "movq %%mm5, %%mm4\n\t"
313 "psllw $8, %%mm5\n\t" //FF00FF00FF00
314 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
316 #endif /* HAVE_AMD3DNOW */
317 #endif /* HAVE_MMX */
320 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
327 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
329 if(srca
[x
] || srca
[x
+1])
334 "movq %0, %%mm0\n\t" // dstbase
335 "movq %%mm0, %%mm1\n\t"
336 "punpcklbw %%mm7, %%mm0\n\t"
337 "punpckhbw %%mm7, %%mm1\n\t"
338 "movd %1, %%mm2\n\t" // srca ABCD0000
339 "paddb %%mm6, %%mm2\n\t"
340 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
341 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
342 "movq %%mm2, %%mm3\n\t"
343 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
344 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
345 "pmullw %%mm2, %%mm0\n\t"
346 "pmullw %%mm3, %%mm1\n\t"
347 "psrlw $8, %%mm0\n\t"
348 "psrlw $8, %%mm1\n\t"
349 "packuswb %%mm1, %%mm0\n\t"
350 "movd %2, %%mm2 \n\t" // src ABCD0000
351 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
352 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
353 "paddb %%mm2, %%mm0\n\t"
355 :: "m" (dstbase
[4*x
]), "m" (srca
[x
]), "m" (src
[x
]));
357 #else //this is faster for intels crap
362 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
366 "orl %%eax, %%eax\n\t"
371 "movq %0, %%mm0\n\t" // dstbase
372 "movq %%mm0, %%mm1\n\t"
373 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
374 "psrlw $8, %%mm1\n\t" //0?0G0?0G
375 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
376 "paddb %3, %%mm2\n\t"
377 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
378 "movq %%mm2, %%mm3\n\t"
379 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
380 "pmullw %%mm2, %%mm0\n\t"
381 "pmullw %%mm2, %%mm1\n\t"
382 "psrlw $8, %%mm0\n\t"
383 "pand %%mm5, %%mm1\n\t"
384 "por %%mm1, %%mm0\n\t"
385 "movd %2, %%mm2 \n\t" //src 0000DCBA
386 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
387 "movq %%mm2, %%mm6\n\t"
388 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
389 "paddb %%mm2, %%mm0\n\t"
392 "movq 8%0, %%mm0\n\t" // dstbase
393 "movq %%mm0, %%mm1\n\t"
394 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
395 "psrlw $8, %%mm1\n\t" //0?0G0?0G
396 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
397 "pmullw %%mm3, %%mm0\n\t"
398 "pmullw %%mm3, %%mm1\n\t"
399 "psrlw $8, %%mm0\n\t"
400 "pand %%mm5, %%mm1\n\t"
401 "por %%mm1, %%mm0\n\t"
402 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
403 "paddb %%mm6, %%mm0\n\t"
404 "movq %%mm0, 8%0\n\t"
406 :: "m" (dstbase
[4*x
]), "m" (srca
[x
]), "m" (src
[x
]), "m" (bFF
)
414 "movzbl (%0), %%ecx\n\t"
415 "movzbl 1(%0), %%eax\n\t"
416 "movzbl 2(%0), %%edx\n\t"
418 "imull %1, %%ecx\n\t"
419 "imull %1, %%eax\n\t"
420 "imull %1, %%edx\n\t"
426 "movb %%ch, (%0)\n\t"
427 "movb %%ah, 1(%0)\n\t"
428 "movb %%dh, 2(%0)\n\t"
431 :"r" (&dstbase
[4*x
]),
432 "r" ((unsigned)srca
[x
]),
433 "r" (((unsigned)src
[x
])<<8)
434 :"%eax", "%ecx", "%edx"
438 #endif /* HAVE_MMX */
439 #else /*non x86 arch or x86_64 with MMX disabled */
443 dstbase
[4*x
+0]=dstbase
[4*x
+1]=dstbase
[4*x
+2]=src
[x
];
445 dstbase
[4*x
+0]=((dstbase
[4*x
+0]*srca
[x
])>>8)+src
[x
];
446 dstbase
[4*x
+1]=((dstbase
[4*x
+1]*srca
[x
])>>8)+src
[x
];
447 dstbase
[4*x
+2]=((dstbase
[4*x
+2]*srca
[x
])>>8)+src
[x
];
451 #endif /* arch_x86 */
457 __asm__
volatile(EMMS:::"memory");