1 // Generic alpha renderers for all YUV modes and RGB depths.
2 // Optimized by Nick and Michael
3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
11 #define PREFETCH "prefetch"
12 #define PREFETCHW "prefetchw"
13 #define PAVGB "pavgusb"
14 #elif defined ( HAVE_MMX2 )
15 #define PREFETCH "prefetchnta"
16 #define PREFETCHW "prefetcht0"
19 #define PREFETCH "/nop"
20 #define PREFETCHW "/nop"
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
30 static inline void RENAME(vo_draw_alpha_yv12
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
32 #if defined(FAST_OSD) && !defined(HAVE_MMX)
42 // "pxor %%mm7, %%mm7\n\t"
43 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
44 "movq %%mm5, %%mm4\n\t"
45 "psllw $8, %%mm5\n\t" //FF00FF00FF00
46 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
47 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
56 "movq %0, %%mm0\n\t" // dstbase
57 "movq %%mm0, %%mm1\n\t"
58 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
59 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
60 "movq %1, %%mm2\n\t" //srca HGFEDCBA
61 "paddb "MANGLE(bFF
)", %%mm2\n\t"
62 "movq %%mm2, %%mm3\n\t"
63 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
64 "psrlw $8, %%mm3\n\t" //0H0F0D0B
65 "pmullw %%mm2, %%mm0\n\t"
66 "pmullw %%mm3, %%mm1\n\t"
68 "pand %%mm5, %%mm1\n\t"
69 "por %%mm1, %%mm0\n\t"
73 :: "m" (dstbase
[x
]), "m" (srca
[x
]), "m" (src
[x
])
79 if(srca
[2*x
+0]) dstbase
[2*x
+0]=src
[2*x
+0];
80 if(srca
[2*x
+1]) dstbase
[2*x
+1]=src
[2*x
+1];
82 if(srca
[x
]) dstbase
[x
]=((dstbase
[x
]*srca
[x
])>>8)+src
[x
];
91 asm volatile(EMMS:::"memory");
96 static inline void RENAME(vo_draw_alpha_yuy2
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
98 #if defined(FAST_OSD) && !defined(HAVE_MMX)
108 "pxor %%mm7, %%mm7\n\t"
109 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
110 "movq %%mm5, %%mm4\n\t"
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
113 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
));
117 "orl %%eax, %%eax\n\t"
122 "movq %0, %%mm0\n\t" // dstbase
123 "movq %%mm0, %%mm1\n\t"
124 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
125 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
126 "paddb "MANGLE(bFF
)", %%mm2\n\t"
127 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
128 "pmullw %%mm2, %%mm0\n\t"
129 "psrlw $8, %%mm0\n\t"
130 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
131 "movd %2, %%mm2\n\t" //src 0000DCBA
132 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
133 "por %%mm1, %%mm0\n\t"
134 "paddb %%mm2, %%mm0\n\t"
137 :: "m" (dstbase
[x
*2]), "m" (srca
[x
]), "m" (src
[x
])
143 if(srca
[2*x
+0]) dstbase
[4*x
+0]=src
[2*x
+0];
144 if(srca
[2*x
+1]) dstbase
[4*x
+2]=src
[2*x
+1];
147 dstbase
[2*x
]=((dstbase
[2*x
]*srca
[x
])>>8)+src
[x
];
148 dstbase
[2*x
+1]=((((signed)dstbase
[2*x
+1]-128)*srca
[x
])>>8)+128;
158 asm volatile(EMMS:::"memory");
163 static inline void RENAME(vo_draw_alpha_uyvy
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
165 #if defined(FAST_OSD)
172 if(srca
[2*x
+0]) dstbase
[4*x
+2]=src
[2*x
+0];
173 if(srca
[2*x
+1]) dstbase
[4*x
+0]=src
[2*x
+1];
176 dstbase
[2*x
+1]=((dstbase
[2*x
+1]*srca
[x
])>>8)+src
[x
];
177 dstbase
[2*x
]=((((signed)dstbase
[2*x
]-128)*srca
[x
])>>8)+128;
187 static inline void RENAME(vo_draw_alpha_rgb24
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
190 register unsigned char *dst
= dstbase
;
192 #if defined(ARCH_X86) || defined(ARCH_X86_64)
198 "pxor %%mm7, %%mm7\n\t"
199 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
200 ::"m"(*dst
),"m"(*srca
),"m"(*src
):"memory");
202 if(srca
[x
] || srca
[x
+1])
207 "movq %0, %%mm0\n\t" // dstbase
208 "movq %%mm0, %%mm1\n\t"
209 "movq %%mm0, %%mm5\n\t"
210 "punpcklbw %%mm7, %%mm0\n\t"
211 "punpckhbw %%mm7, %%mm1\n\t"
212 "movd %1, %%mm2\n\t" // srca ABCD0000
213 "paddb %%mm6, %%mm2\n\t"
214 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
215 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
216 "psrlq $8, %%mm2\n\t" // srca AAABBBB0
217 "movq %%mm2, %%mm3\n\t"
218 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
219 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
220 "pmullw %%mm2, %%mm0\n\t"
221 "pmullw %%mm3, %%mm1\n\t"
222 "psrlw $8, %%mm0\n\t"
223 "psrlw $8, %%mm1\n\t"
224 "packuswb %%mm1, %%mm0\n\t"
225 "movd %2, %%mm2 \n\t" // src ABCD0000
226 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
227 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
228 "psrlq $8, %%mm2\n\t" // src AAABBBB0
229 "paddb %%mm2, %%mm0\n\t"
232 "por %%mm0, %%mm5\n\t"
234 :: "m" (dst
[0]), "m" (srca
[x
]), "m" (src
[x
]), "m"(mask24hl
), "m"(mask24lh
));
241 "movzbl (%0), %%ecx\n\t"
242 "movzbl 1(%0), %%eax\n\t"
244 "imull %1, %%ecx\n\t"
245 "imull %1, %%eax\n\t"
250 "movb %%ch, (%0)\n\t"
251 "movb %%ah, 1(%0)\n\t"
253 "movzbl 2(%0), %%eax\n\t"
254 "imull %1, %%eax\n\t"
256 "movb %%ah, 2(%0)\n\t"
259 "r" ((unsigned)srca
[x
]),
260 "r" (((unsigned)src
[x
])<<8)
266 #endif /* !HAVE_MMX */
267 #else /*non x86 arch*/
271 dst
[0]=dst
[1]=dst
[2]=src
[x
];
273 dst
[0]=((dst
[0]*srca
[x
])>>8)+src
[x
];
274 dst
[1]=((dst
[1]*srca
[x
])>>8)+src
[x
];
275 dst
[2]=((dst
[2]*srca
[x
])>>8)+src
[x
];
280 #endif /* arch_x86 */
286 asm volatile(EMMS:::"memory");
291 static inline void RENAME(vo_draw_alpha_rgb32
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
293 #ifdef WORDS_BIGENDIAN
298 #if defined(ARCH_X86) || defined(ARCH_X86_64)
305 "pxor %%mm7, %%mm7\n\t"
306 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
307 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
309 if(srca
[x
] || srca
[x
+1])
314 "movq %0, %%mm0\n\t" // dstbase
315 "movq %%mm0, %%mm1\n\t"
316 "punpcklbw %%mm7, %%mm0\n\t"
317 "punpckhbw %%mm7, %%mm1\n\t"
318 "movd %1, %%mm2\n\t" // srca ABCD0000
319 "paddb %%mm6, %%mm2\n\t"
320 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
321 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
322 "movq %%mm2, %%mm3\n\t"
323 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
324 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
325 "pmullw %%mm2, %%mm0\n\t"
326 "pmullw %%mm3, %%mm1\n\t"
327 "psrlw $8, %%mm0\n\t"
328 "psrlw $8, %%mm1\n\t"
329 "packuswb %%mm1, %%mm0\n\t"
330 "movd %2, %%mm2 \n\t" // src ABCD0000
331 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
332 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
333 "paddb %%mm2, %%mm0\n\t"
335 :: "m" (dstbase
[4*x
]), "m" (srca
[x
]), "m" (src
[x
]));
337 #else //this is faster for intels crap
342 "pxor %%mm7, %%mm7\n\t"
343 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
344 "movq %%mm5, %%mm4\n\t"
345 "psllw $8, %%mm5\n\t" //FF00FF00FF00
346 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
347 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
351 "orl %%eax, %%eax\n\t"
356 "movq %0, %%mm0\n\t" // dstbase
357 "movq %%mm0, %%mm1\n\t"
358 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
359 "psrlw $8, %%mm1\n\t" //0?0G0?0G
360 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
361 "paddb "MANGLE(bFF
)", %%mm2\n\t"
362 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
363 "movq %%mm2, %%mm3\n\t"
364 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
365 "pmullw %%mm2, %%mm0\n\t"
366 "pmullw %%mm2, %%mm1\n\t"
367 "psrlw $8, %%mm0\n\t"
368 "pand %%mm5, %%mm1\n\t"
369 "por %%mm1, %%mm0\n\t"
370 "movd %2, %%mm2 \n\t" //src 0000DCBA
371 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
372 "movq %%mm2, %%mm6\n\t"
373 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
374 "paddb %%mm2, %%mm0\n\t"
377 "movq 8%0, %%mm0\n\t" // dstbase
378 "movq %%mm0, %%mm1\n\t"
379 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
380 "psrlw $8, %%mm1\n\t" //0?0G0?0G
381 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
382 "pmullw %%mm3, %%mm0\n\t"
383 "pmullw %%mm3, %%mm1\n\t"
384 "psrlw $8, %%mm0\n\t"
385 "pand %%mm5, %%mm1\n\t"
386 "por %%mm1, %%mm0\n\t"
387 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
388 "paddb %%mm6, %%mm0\n\t"
389 "movq %%mm0, 8%0\n\t"
391 :: "m" (dstbase
[4*x
]), "m" (srca
[x
]), "m" (src
[x
])
399 "movzbl (%0), %%ecx\n\t"
400 "movzbl 1(%0), %%eax\n\t"
401 "movzbl 2(%0), %%edx\n\t"
403 "imull %1, %%ecx\n\t"
404 "imull %1, %%eax\n\t"
405 "imull %1, %%edx\n\t"
411 "movb %%ch, (%0)\n\t"
412 "movb %%ah, 1(%0)\n\t"
413 "movb %%dh, 2(%0)\n\t"
416 :"r" (&dstbase
[4*x
]),
417 "r" ((unsigned)srca
[x
]),
418 "r" (((unsigned)src
[x
])<<8)
419 :"%eax", "%ecx", "%edx"
423 #endif /* HAVE_MMX */
424 #else /*non x86 arch*/
428 dstbase
[4*x
+0]=dstbase
[4*x
+1]=dstbase
[4*x
+2]=src
[x
];
430 dstbase
[4*x
+0]=((dstbase
[4*x
+0]*srca
[x
])>>8)+src
[x
];
431 dstbase
[4*x
+1]=((dstbase
[4*x
+1]*srca
[x
])>>8)+src
[x
];
432 dstbase
[4*x
+2]=((dstbase
[4*x
+2]*srca
[x
])>>8)+src
[x
];
436 #endif /* arch_x86 */
442 asm volatile(EMMS:::"memory");