typo fixes
[mplayer/greg.git] / libvo / osd_template.c
blobaf014b94807ff0dafd40c54e571e6b02a0e96d3d
1 // Generic alpha renderers for all YUV modes and RGB depths.
2 // Optimized by Nick and Michael
3 // Code from Michael Niedermayer (michaelni@gmx.at) is under GPL
5 #undef PREFETCH
6 #undef EMMS
7 #undef PREFETCHW
8 #undef PAVGB
10 #ifdef HAVE_3DNOW
11 #define PREFETCH "prefetch"
12 #define PREFETCHW "prefetchw"
13 #define PAVGB "pavgusb"
14 #elif defined ( HAVE_MMX2 )
15 #define PREFETCH "prefetchnta"
16 #define PREFETCHW "prefetcht0"
17 #define PAVGB "pavgb"
18 #else
19 #define PREFETCH "/nop"
20 #define PREFETCHW "/nop"
21 #endif
23 #ifdef HAVE_3DNOW
24 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
25 #define EMMS "femms"
26 #else
27 #define EMMS "emms"
28 #endif
30 static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
31 int y;
32 #if defined(FAST_OSD) && !defined(HAVE_MMX)
33 w=w>>1;
34 #endif
35 for(y=0;y<h;y++){
36 register int x;
37 #ifdef HAVE_MMX
38 asm volatile(
39 PREFETCHW" %0\n\t"
40 PREFETCH" %1\n\t"
41 PREFETCH" %2\n\t"
42 // "pxor %%mm7, %%mm7\n\t"
43 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
44 "movq %%mm5, %%mm4\n\t"
45 "psllw $8, %%mm5\n\t" //FF00FF00FF00
46 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
47 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
48 for(x=0;x<w;x+=8){
49 asm volatile(
50 "movl %1, %%eax\n\t"
51 "orl 4%1, %%eax\n\t"
52 " jz 1f\n\t"
53 PREFETCHW" 32%0\n\t"
54 PREFETCH" 32%1\n\t"
55 PREFETCH" 32%2\n\t"
56 "movq %0, %%mm0\n\t" // dstbase
57 "movq %%mm0, %%mm1\n\t"
58 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
59 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
60 "movq %1, %%mm2\n\t" //srca HGFEDCBA
61 "paddb "MANGLE(bFF)", %%mm2\n\t"
62 "movq %%mm2, %%mm3\n\t"
63 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
64 "psrlw $8, %%mm3\n\t" //0H0F0D0B
65 "pmullw %%mm2, %%mm0\n\t"
66 "pmullw %%mm3, %%mm1\n\t"
67 "psrlw $8, %%mm0\n\t"
68 "pand %%mm5, %%mm1\n\t"
69 "por %%mm1, %%mm0\n\t"
70 "paddb %2, %%mm0\n\t"
71 "movq %%mm0, %0\n\t"
72 "1:\n\t"
73 :: "m" (dstbase[x]), "m" (srca[x]), "m" (src[x])
74 : "%eax");
76 #else
77 for(x=0;x<w;x++){
78 #ifdef FAST_OSD
79 if(srca[2*x+0]) dstbase[2*x+0]=src[2*x+0];
80 if(srca[2*x+1]) dstbase[2*x+1]=src[2*x+1];
81 #else
82 if(srca[x]) dstbase[x]=((dstbase[x]*srca[x])>>8)+src[x];
83 #endif
85 #endif
86 src+=srcstride;
87 srca+=srcstride;
88 dstbase+=dststride;
90 #ifdef HAVE_MMX
91 asm volatile(EMMS:::"memory");
92 #endif
93 return;
96 static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
97 int y;
98 #if defined(FAST_OSD) && !defined(HAVE_MMX)
99 w=w>>1;
100 #endif
101 for(y=0;y<h;y++){
102 register int x;
103 #ifdef HAVE_MMX
104 asm volatile(
105 PREFETCHW" %0\n\t"
106 PREFETCH" %1\n\t"
107 PREFETCH" %2\n\t"
108 "pxor %%mm7, %%mm7\n\t"
109 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
110 "movq %%mm5, %%mm4\n\t"
111 "psllw $8, %%mm5\n\t" //FF00FF00FF00
112 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
113 ::"m"(*dstbase),"m"(*srca),"m"(*src));
114 for(x=0;x<w;x+=4){
115 asm volatile(
116 "movl %1, %%eax\n\t"
117 "orl %%eax, %%eax\n\t"
118 " jz 1f\n\t"
119 PREFETCHW" 32%0\n\t"
120 PREFETCH" 32%1\n\t"
121 PREFETCH" 32%2\n\t"
122 "movq %0, %%mm0\n\t" // dstbase
123 "movq %%mm0, %%mm1\n\t"
124 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
125 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
126 "paddb "MANGLE(bFF)", %%mm2\n\t"
127 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
128 "pmullw %%mm2, %%mm0\n\t"
129 "psrlw $8, %%mm0\n\t"
130 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
131 "movd %2, %%mm2\n\t" //src 0000DCBA
132 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
133 "por %%mm1, %%mm0\n\t"
134 "paddb %%mm2, %%mm0\n\t"
135 "movq %%mm0, %0\n\t"
136 "1:\n\t"
137 :: "m" (dstbase[x*2]), "m" (srca[x]), "m" (src[x])
138 : "%eax");
140 #else
141 for(x=0;x<w;x++){
142 #ifdef FAST_OSD
143 if(srca[2*x+0]) dstbase[4*x+0]=src[2*x+0];
144 if(srca[2*x+1]) dstbase[4*x+2]=src[2*x+1];
145 #else
146 if(srca[x]) {
147 dstbase[2*x]=((dstbase[2*x]*srca[x])>>8)+src[x];
148 dstbase[2*x+1]=((((signed)dstbase[2*x+1]-128)*srca[x])>>8)+128;
150 #endif
152 #endif
153 src+=srcstride;
154 srca+=srcstride;
155 dstbase+=dststride;
157 #ifdef HAVE_MMX
158 asm volatile(EMMS:::"memory");
159 #endif
160 return;
163 static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
164 int y;
165 #if defined(FAST_OSD)
166 w=w>>1;
167 #endif
168 for(y=0;y<h;y++){
169 register int x;
170 for(x=0;x<w;x++){
171 #ifdef FAST_OSD
172 if(srca[2*x+0]) dstbase[4*x+2]=src[2*x+0];
173 if(srca[2*x+1]) dstbase[4*x+0]=src[2*x+1];
174 #else
175 if(srca[x]) {
176 dstbase[2*x+1]=((dstbase[2*x+1]*srca[x])>>8)+src[x];
177 dstbase[2*x]=((((signed)dstbase[2*x]-128)*srca[x])>>8)+128;
179 #endif
181 src+=srcstride;
182 srca+=srcstride;
183 dstbase+=dststride;
187 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
188 int y;
189 for(y=0;y<h;y++){
190 register unsigned char *dst = dstbase;
191 register int x;
192 #if defined(ARCH_X86) || defined(ARCH_X86_64)
193 #ifdef HAVE_MMX
194 asm volatile(
195 PREFETCHW" %0\n\t"
196 PREFETCH" %1\n\t"
197 PREFETCH" %2\n\t"
198 "pxor %%mm7, %%mm7\n\t"
199 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
200 ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
201 for(x=0;x<w;x+=2){
202 if(srca[x] || srca[x+1])
203 asm volatile(
204 PREFETCHW" 32%0\n\t"
205 PREFETCH" 32%1\n\t"
206 PREFETCH" 32%2\n\t"
207 "movq %0, %%mm0\n\t" // dstbase
208 "movq %%mm0, %%mm1\n\t"
209 "movq %%mm0, %%mm5\n\t"
210 "punpcklbw %%mm7, %%mm0\n\t"
211 "punpckhbw %%mm7, %%mm1\n\t"
212 "movd %1, %%mm2\n\t" // srca ABCD0000
213 "paddb %%mm6, %%mm2\n\t"
214 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
215 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
216 "psrlq $8, %%mm2\n\t" // srca AAABBBB0
217 "movq %%mm2, %%mm3\n\t"
218 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
219 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
220 "pmullw %%mm2, %%mm0\n\t"
221 "pmullw %%mm3, %%mm1\n\t"
222 "psrlw $8, %%mm0\n\t"
223 "psrlw $8, %%mm1\n\t"
224 "packuswb %%mm1, %%mm0\n\t"
225 "movd %2, %%mm2 \n\t" // src ABCD0000
226 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
227 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
228 "psrlq $8, %%mm2\n\t" // src AAABBBB0
229 "paddb %%mm2, %%mm0\n\t"
230 "pand %4, %%mm5\n\t"
231 "pand %3, %%mm0\n\t"
232 "por %%mm0, %%mm5\n\t"
233 "movq %%mm5, %0\n\t"
234 :: "m" (dst[0]), "m" (srca[x]), "m" (src[x]), "m"(mask24hl), "m"(mask24lh));
235 dst += 6;
237 #else /* HAVE_MMX */
238 for(x=0;x<w;x++){
239 if(srca[x]){
240 asm volatile(
241 "movzbl (%0), %%ecx\n\t"
242 "movzbl 1(%0), %%eax\n\t"
244 "imull %1, %%ecx\n\t"
245 "imull %1, %%eax\n\t"
247 "addl %2, %%ecx\n\t"
248 "addl %2, %%eax\n\t"
250 "movb %%ch, (%0)\n\t"
251 "movb %%ah, 1(%0)\n\t"
253 "movzbl 2(%0), %%eax\n\t"
254 "imull %1, %%eax\n\t"
255 "addl %2, %%eax\n\t"
256 "movb %%ah, 2(%0)\n\t"
258 :"D" (dst),
259 "r" ((unsigned)srca[x]),
260 "r" (((unsigned)src[x])<<8)
261 :"%eax", "%ecx"
264 dst += 3;
266 #endif /* !HAVE_MMX */
267 #else /*non x86 arch*/
268 for(x=0;x<w;x++){
269 if(srca[x]){
270 #ifdef FAST_OSD
271 dst[0]=dst[1]=dst[2]=src[x];
272 #else
273 dst[0]=((dst[0]*srca[x])>>8)+src[x];
274 dst[1]=((dst[1]*srca[x])>>8)+src[x];
275 dst[2]=((dst[2]*srca[x])>>8)+src[x];
276 #endif
278 dst+=3; // 24bpp
280 #endif /* arch_x86 */
281 src+=srcstride;
282 srca+=srcstride;
283 dstbase+=dststride;
285 #ifdef HAVE_MMX
286 asm volatile(EMMS:::"memory");
287 #endif
288 return;
291 static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
292 int y;
293 #ifdef WORDS_BIGENDIAN
294 dstbase++;
295 #endif
296 for(y=0;y<h;y++){
297 register int x;
298 #if defined(ARCH_X86) || defined(ARCH_X86_64)
299 #ifdef HAVE_MMX
300 #ifdef HAVE_3DNOW
301 asm volatile(
302 PREFETCHW" %0\n\t"
303 PREFETCH" %1\n\t"
304 PREFETCH" %2\n\t"
305 "pxor %%mm7, %%mm7\n\t"
306 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
307 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
308 for(x=0;x<w;x+=2){
309 if(srca[x] || srca[x+1])
310 asm volatile(
311 PREFETCHW" 32%0\n\t"
312 PREFETCH" 32%1\n\t"
313 PREFETCH" 32%2\n\t"
314 "movq %0, %%mm0\n\t" // dstbase
315 "movq %%mm0, %%mm1\n\t"
316 "punpcklbw %%mm7, %%mm0\n\t"
317 "punpckhbw %%mm7, %%mm1\n\t"
318 "movd %1, %%mm2\n\t" // srca ABCD0000
319 "paddb %%mm6, %%mm2\n\t"
320 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
321 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
322 "movq %%mm2, %%mm3\n\t"
323 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
324 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
325 "pmullw %%mm2, %%mm0\n\t"
326 "pmullw %%mm3, %%mm1\n\t"
327 "psrlw $8, %%mm0\n\t"
328 "psrlw $8, %%mm1\n\t"
329 "packuswb %%mm1, %%mm0\n\t"
330 "movd %2, %%mm2 \n\t" // src ABCD0000
331 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
332 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
333 "paddb %%mm2, %%mm0\n\t"
334 "movq %%mm0, %0\n\t"
335 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
337 #else //this is faster for intels crap
338 asm volatile(
339 PREFETCHW" %0\n\t"
340 PREFETCH" %1\n\t"
341 PREFETCH" %2\n\t"
342 "pxor %%mm7, %%mm7\n\t"
343 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
344 "movq %%mm5, %%mm4\n\t"
345 "psllw $8, %%mm5\n\t" //FF00FF00FF00
346 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
347 ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
348 for(x=0;x<w;x+=4){
349 asm volatile(
350 "movl %1, %%eax\n\t"
351 "orl %%eax, %%eax\n\t"
352 " jz 1f\n\t"
353 PREFETCHW" 32%0\n\t"
354 PREFETCH" 32%1\n\t"
355 PREFETCH" 32%2\n\t"
356 "movq %0, %%mm0\n\t" // dstbase
357 "movq %%mm0, %%mm1\n\t"
358 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
359 "psrlw $8, %%mm1\n\t" //0?0G0?0G
360 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
361 "paddb "MANGLE(bFF)", %%mm2\n\t"
362 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
363 "movq %%mm2, %%mm3\n\t"
364 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
365 "pmullw %%mm2, %%mm0\n\t"
366 "pmullw %%mm2, %%mm1\n\t"
367 "psrlw $8, %%mm0\n\t"
368 "pand %%mm5, %%mm1\n\t"
369 "por %%mm1, %%mm0\n\t"
370 "movd %2, %%mm2 \n\t" //src 0000DCBA
371 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
372 "movq %%mm2, %%mm6\n\t"
373 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
374 "paddb %%mm2, %%mm0\n\t"
375 "movq %%mm0, %0\n\t"
377 "movq 8%0, %%mm0\n\t" // dstbase
378 "movq %%mm0, %%mm1\n\t"
379 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
380 "psrlw $8, %%mm1\n\t" //0?0G0?0G
381 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
382 "pmullw %%mm3, %%mm0\n\t"
383 "pmullw %%mm3, %%mm1\n\t"
384 "psrlw $8, %%mm0\n\t"
385 "pand %%mm5, %%mm1\n\t"
386 "por %%mm1, %%mm0\n\t"
387 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
388 "paddb %%mm6, %%mm0\n\t"
389 "movq %%mm0, 8%0\n\t"
390 "1:\n\t"
391 :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])
392 : "%eax");
394 #endif
395 #else /* HAVE_MMX */
396 for(x=0;x<w;x++){
397 if(srca[x]){
398 asm volatile(
399 "movzbl (%0), %%ecx\n\t"
400 "movzbl 1(%0), %%eax\n\t"
401 "movzbl 2(%0), %%edx\n\t"
403 "imull %1, %%ecx\n\t"
404 "imull %1, %%eax\n\t"
405 "imull %1, %%edx\n\t"
407 "addl %2, %%ecx\n\t"
408 "addl %2, %%eax\n\t"
409 "addl %2, %%edx\n\t"
411 "movb %%ch, (%0)\n\t"
412 "movb %%ah, 1(%0)\n\t"
413 "movb %%dh, 2(%0)\n\t"
416 :"r" (&dstbase[4*x]),
417 "r" ((unsigned)srca[x]),
418 "r" (((unsigned)src[x])<<8)
419 :"%eax", "%ecx", "%edx"
423 #endif /* HAVE_MMX */
424 #else /*non x86 arch*/
425 for(x=0;x<w;x++){
426 if(srca[x]){
427 #ifdef FAST_OSD
428 dstbase[4*x+0]=dstbase[4*x+1]=dstbase[4*x+2]=src[x];
429 #else
430 dstbase[4*x+0]=((dstbase[4*x+0]*srca[x])>>8)+src[x];
431 dstbase[4*x+1]=((dstbase[4*x+1]*srca[x])>>8)+src[x];
432 dstbase[4*x+2]=((dstbase[4*x+2]*srca[x])>>8)+src[x];
433 #endif
436 #endif /* arch_x86 */
437 src+=srcstride;
438 srca+=srcstride;
439 dstbase+=dststride;
441 #ifdef HAVE_MMX
442 asm volatile(EMMS:::"memory");
443 #endif
444 return;