2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 * software YUV to YUV converter
5 * software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * The C code (not assembly, MMX, ...) of this file can be used
27 * under the LGPL license.
46 #define PREFETCH "prefetch"
47 #define PAVGB "pavgusb"
49 #define PREFETCH "prefetchnta"
52 #define PREFETCH " # nop"
56 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
63 #define MOVNTQ "movntq"
64 #define SFENCE "sfence"
67 #define SFENCE " # nop"
70 static inline void RENAME(rgb24tobgr32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
73 const uint8_t *s
= src
;
76 const uint8_t *mm_end
;
80 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
82 __asm__
volatile("movq %0, %%mm7"::"m"(mask32a
):"memory");
87 "punpckldq 3%1, %%mm0 \n\t"
88 "movd 6%1, %%mm1 \n\t"
89 "punpckldq 9%1, %%mm1 \n\t"
90 "movd 12%1, %%mm2 \n\t"
91 "punpckldq 15%1, %%mm2 \n\t"
92 "movd 18%1, %%mm3 \n\t"
93 "punpckldq 21%1, %%mm3 \n\t"
94 "por %%mm7, %%mm0 \n\t"
95 "por %%mm7, %%mm1 \n\t"
96 "por %%mm7, %%mm2 \n\t"
97 "por %%mm7, %%mm3 \n\t"
98 MOVNTQ
" %%mm0, %0 \n\t"
99 MOVNTQ
" %%mm1, 8%0 \n\t"
100 MOVNTQ
" %%mm2, 16%0 \n\t"
108 __asm__
volatile(SFENCE:::"memory");
109 __asm__
volatile(EMMS:::"memory");
113 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
128 static inline void RENAME(rgb32tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
131 const uint8_t *s
= src
;
134 const uint8_t *mm_end
;
138 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
143 "movq %1, %%mm0 \n\t"
144 "movq 8%1, %%mm1 \n\t"
145 "movq 16%1, %%mm4 \n\t"
146 "movq 24%1, %%mm5 \n\t"
147 "movq %%mm0, %%mm2 \n\t"
148 "movq %%mm1, %%mm3 \n\t"
149 "movq %%mm4, %%mm6 \n\t"
150 "movq %%mm5, %%mm7 \n\t"
151 "psrlq $8, %%mm2 \n\t"
152 "psrlq $8, %%mm3 \n\t"
153 "psrlq $8, %%mm6 \n\t"
154 "psrlq $8, %%mm7 \n\t"
155 "pand %2, %%mm0 \n\t"
156 "pand %2, %%mm1 \n\t"
157 "pand %2, %%mm4 \n\t"
158 "pand %2, %%mm5 \n\t"
159 "pand %3, %%mm2 \n\t"
160 "pand %3, %%mm3 \n\t"
161 "pand %3, %%mm6 \n\t"
162 "pand %3, %%mm7 \n\t"
163 "por %%mm2, %%mm0 \n\t"
164 "por %%mm3, %%mm1 \n\t"
165 "por %%mm6, %%mm4 \n\t"
166 "por %%mm7, %%mm5 \n\t"
168 "movq %%mm1, %%mm2 \n\t"
169 "movq %%mm4, %%mm3 \n\t"
170 "psllq $48, %%mm2 \n\t"
171 "psllq $32, %%mm3 \n\t"
172 "pand %4, %%mm2 \n\t"
173 "pand %5, %%mm3 \n\t"
174 "por %%mm2, %%mm0 \n\t"
175 "psrlq $16, %%mm1 \n\t"
176 "psrlq $32, %%mm4 \n\t"
177 "psllq $16, %%mm5 \n\t"
178 "por %%mm3, %%mm1 \n\t"
179 "pand %6, %%mm5 \n\t"
180 "por %%mm5, %%mm4 \n\t"
182 MOVNTQ
" %%mm0, %0 \n\t"
183 MOVNTQ
" %%mm1, 8%0 \n\t"
186 :"m"(*s
),"m"(mask24l
),
187 "m"(mask24h
),"m"(mask24hh
),"m"(mask24hhh
),"m"(mask24hhhh
)
192 __asm__
volatile(SFENCE:::"memory");
193 __asm__
volatile(EMMS:::"memory");
197 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
213 original by Strepto/Astral
214 ported to gcc & bugfixed: A'rpi
215 MMX2, 3DNOW optimization by Nick Kurshev
216 32-bit C version, and and&add trick by Michael Niedermayer
218 static inline void RENAME(rgb15to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
220 register const uint8_t* s
=src
;
221 register uint8_t* d
=dst
;
222 register const uint8_t *end
;
223 const uint8_t *mm_end
;
226 __asm__
volatile(PREFETCH
" %0"::"m"(*s
));
227 __asm__
volatile("movq %0, %%mm4"::"m"(mask15s
));
232 "movq %1, %%mm0 \n\t"
233 "movq 8%1, %%mm2 \n\t"
234 "movq %%mm0, %%mm1 \n\t"
235 "movq %%mm2, %%mm3 \n\t"
236 "pand %%mm4, %%mm0 \n\t"
237 "pand %%mm4, %%mm2 \n\t"
238 "paddw %%mm1, %%mm0 \n\t"
239 "paddw %%mm3, %%mm2 \n\t"
240 MOVNTQ
" %%mm0, %0 \n\t"
248 __asm__
volatile(SFENCE:::"memory");
249 __asm__
volatile(EMMS:::"memory");
253 register unsigned x
= *((const uint32_t *)s
);
254 *((uint32_t *)d
) = (x
&0x7FFF7FFF) + (x
&0x7FE07FE0);
259 register unsigned short x
= *((const uint16_t *)s
);
260 *((uint16_t *)d
) = (x
&0x7FFF) + (x
&0x7FE0);
264 static inline void RENAME(rgb16to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
266 register const uint8_t* s
=src
;
267 register uint8_t* d
=dst
;
268 register const uint8_t *end
;
269 const uint8_t *mm_end
;
272 __asm__
volatile(PREFETCH
" %0"::"m"(*s
));
273 __asm__
volatile("movq %0, %%mm7"::"m"(mask15rg
));
274 __asm__
volatile("movq %0, %%mm6"::"m"(mask15b
));
279 "movq %1, %%mm0 \n\t"
280 "movq 8%1, %%mm2 \n\t"
281 "movq %%mm0, %%mm1 \n\t"
282 "movq %%mm2, %%mm3 \n\t"
283 "psrlq $1, %%mm0 \n\t"
284 "psrlq $1, %%mm2 \n\t"
285 "pand %%mm7, %%mm0 \n\t"
286 "pand %%mm7, %%mm2 \n\t"
287 "pand %%mm6, %%mm1 \n\t"
288 "pand %%mm6, %%mm3 \n\t"
289 "por %%mm1, %%mm0 \n\t"
290 "por %%mm3, %%mm2 \n\t"
291 MOVNTQ
" %%mm0, %0 \n\t"
299 __asm__
volatile(SFENCE:::"memory");
300 __asm__
volatile(EMMS:::"memory");
304 register uint32_t x
= *((const uint32_t*)s
);
305 *((uint32_t *)d
) = ((x
>>1)&0x7FE07FE0) | (x
&0x001F001F);
310 register uint16_t x
= *((const uint16_t*)s
);
311 *((uint16_t *)d
) = ((x
>>1)&0x7FE0) | (x
&0x001F);
315 static inline void RENAME(rgb32to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
317 const uint8_t *s
= src
;
320 const uint8_t *mm_end
;
322 uint16_t *d
= (uint16_t *)dst
;
326 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
328 "movq %3, %%mm5 \n\t"
329 "movq %4, %%mm6 \n\t"
330 "movq %5, %%mm7 \n\t"
334 PREFETCH
" 32(%1) \n\t"
335 "movd (%1), %%mm0 \n\t"
336 "movd 4(%1), %%mm3 \n\t"
337 "punpckldq 8(%1), %%mm0 \n\t"
338 "punpckldq 12(%1), %%mm3 \n\t"
339 "movq %%mm0, %%mm1 \n\t"
340 "movq %%mm3, %%mm4 \n\t"
341 "pand %%mm6, %%mm0 \n\t"
342 "pand %%mm6, %%mm3 \n\t"
343 "pmaddwd %%mm7, %%mm0 \n\t"
344 "pmaddwd %%mm7, %%mm3 \n\t"
345 "pand %%mm5, %%mm1 \n\t"
346 "pand %%mm5, %%mm4 \n\t"
347 "por %%mm1, %%mm0 \n\t"
348 "por %%mm4, %%mm3 \n\t"
349 "psrld $5, %%mm0 \n\t"
350 "pslld $11, %%mm3 \n\t"
351 "por %%mm3, %%mm0 \n\t"
352 MOVNTQ
" %%mm0, (%0) \n\t"
359 : "r" (mm_end
), "m" (mask3216g
), "m" (mask3216br
), "m" (mul3216
)
362 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
364 "movq %0, %%mm7 \n\t"
365 "movq %1, %%mm6 \n\t"
366 ::"m"(red_16mask
),"m"(green_16mask
));
370 "movd %1, %%mm0 \n\t"
371 "movd 4%1, %%mm3 \n\t"
372 "punpckldq 8%1, %%mm0 \n\t"
373 "punpckldq 12%1, %%mm3 \n\t"
374 "movq %%mm0, %%mm1 \n\t"
375 "movq %%mm0, %%mm2 \n\t"
376 "movq %%mm3, %%mm4 \n\t"
377 "movq %%mm3, %%mm5 \n\t"
378 "psrlq $3, %%mm0 \n\t"
379 "psrlq $3, %%mm3 \n\t"
380 "pand %2, %%mm0 \n\t"
381 "pand %2, %%mm3 \n\t"
382 "psrlq $5, %%mm1 \n\t"
383 "psrlq $5, %%mm4 \n\t"
384 "pand %%mm6, %%mm1 \n\t"
385 "pand %%mm6, %%mm4 \n\t"
386 "psrlq $8, %%mm2 \n\t"
387 "psrlq $8, %%mm5 \n\t"
388 "pand %%mm7, %%mm2 \n\t"
389 "pand %%mm7, %%mm5 \n\t"
390 "por %%mm1, %%mm0 \n\t"
391 "por %%mm4, %%mm3 \n\t"
392 "por %%mm2, %%mm0 \n\t"
393 "por %%mm5, %%mm3 \n\t"
394 "psllq $16, %%mm3 \n\t"
395 "por %%mm3, %%mm0 \n\t"
396 MOVNTQ
" %%mm0, %0 \n\t"
397 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
402 __asm__
volatile(SFENCE:::"memory");
403 __asm__
volatile(EMMS:::"memory");
406 register int rgb
= *(const uint32_t*)s
; s
+= 4;
407 *d
++ = ((rgb
&0xFF)>>3) + ((rgb
&0xFC00)>>5) + ((rgb
&0xF80000)>>8);
411 static inline void RENAME(rgb32tobgr16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
413 const uint8_t *s
= src
;
416 const uint8_t *mm_end
;
418 uint16_t *d
= (uint16_t *)dst
;
421 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
423 "movq %0, %%mm7 \n\t"
424 "movq %1, %%mm6 \n\t"
425 ::"m"(red_16mask
),"m"(green_16mask
));
430 "movd %1, %%mm0 \n\t"
431 "movd 4%1, %%mm3 \n\t"
432 "punpckldq 8%1, %%mm0 \n\t"
433 "punpckldq 12%1, %%mm3 \n\t"
434 "movq %%mm0, %%mm1 \n\t"
435 "movq %%mm0, %%mm2 \n\t"
436 "movq %%mm3, %%mm4 \n\t"
437 "movq %%mm3, %%mm5 \n\t"
438 "psllq $8, %%mm0 \n\t"
439 "psllq $8, %%mm3 \n\t"
440 "pand %%mm7, %%mm0 \n\t"
441 "pand %%mm7, %%mm3 \n\t"
442 "psrlq $5, %%mm1 \n\t"
443 "psrlq $5, %%mm4 \n\t"
444 "pand %%mm6, %%mm1 \n\t"
445 "pand %%mm6, %%mm4 \n\t"
446 "psrlq $19, %%mm2 \n\t"
447 "psrlq $19, %%mm5 \n\t"
448 "pand %2, %%mm2 \n\t"
449 "pand %2, %%mm5 \n\t"
450 "por %%mm1, %%mm0 \n\t"
451 "por %%mm4, %%mm3 \n\t"
452 "por %%mm2, %%mm0 \n\t"
453 "por %%mm5, %%mm3 \n\t"
454 "psllq $16, %%mm3 \n\t"
455 "por %%mm3, %%mm0 \n\t"
456 MOVNTQ
" %%mm0, %0 \n\t"
457 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
461 __asm__
volatile(SFENCE:::"memory");
462 __asm__
volatile(EMMS:::"memory");
465 register int rgb
= *(const uint32_t*)s
; s
+= 4;
466 *d
++ = ((rgb
&0xF8)<<8) + ((rgb
&0xFC00)>>5) + ((rgb
&0xF80000)>>19);
470 static inline void RENAME(rgb32to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
472 const uint8_t *s
= src
;
475 const uint8_t *mm_end
;
477 uint16_t *d
= (uint16_t *)dst
;
481 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
483 "movq %3, %%mm5 \n\t"
484 "movq %4, %%mm6 \n\t"
485 "movq %5, %%mm7 \n\t"
489 PREFETCH
" 32(%1) \n\t"
490 "movd (%1), %%mm0 \n\t"
491 "movd 4(%1), %%mm3 \n\t"
492 "punpckldq 8(%1), %%mm0 \n\t"
493 "punpckldq 12(%1), %%mm3 \n\t"
494 "movq %%mm0, %%mm1 \n\t"
495 "movq %%mm3, %%mm4 \n\t"
496 "pand %%mm6, %%mm0 \n\t"
497 "pand %%mm6, %%mm3 \n\t"
498 "pmaddwd %%mm7, %%mm0 \n\t"
499 "pmaddwd %%mm7, %%mm3 \n\t"
500 "pand %%mm5, %%mm1 \n\t"
501 "pand %%mm5, %%mm4 \n\t"
502 "por %%mm1, %%mm0 \n\t"
503 "por %%mm4, %%mm3 \n\t"
504 "psrld $6, %%mm0 \n\t"
505 "pslld $10, %%mm3 \n\t"
506 "por %%mm3, %%mm0 \n\t"
507 MOVNTQ
" %%mm0, (%0) \n\t"
514 : "r" (mm_end
), "m" (mask3215g
), "m" (mask3216br
), "m" (mul3215
)
517 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
519 "movq %0, %%mm7 \n\t"
520 "movq %1, %%mm6 \n\t"
521 ::"m"(red_15mask
),"m"(green_15mask
));
525 "movd %1, %%mm0 \n\t"
526 "movd 4%1, %%mm3 \n\t"
527 "punpckldq 8%1, %%mm0 \n\t"
528 "punpckldq 12%1, %%mm3 \n\t"
529 "movq %%mm0, %%mm1 \n\t"
530 "movq %%mm0, %%mm2 \n\t"
531 "movq %%mm3, %%mm4 \n\t"
532 "movq %%mm3, %%mm5 \n\t"
533 "psrlq $3, %%mm0 \n\t"
534 "psrlq $3, %%mm3 \n\t"
535 "pand %2, %%mm0 \n\t"
536 "pand %2, %%mm3 \n\t"
537 "psrlq $6, %%mm1 \n\t"
538 "psrlq $6, %%mm4 \n\t"
539 "pand %%mm6, %%mm1 \n\t"
540 "pand %%mm6, %%mm4 \n\t"
541 "psrlq $9, %%mm2 \n\t"
542 "psrlq $9, %%mm5 \n\t"
543 "pand %%mm7, %%mm2 \n\t"
544 "pand %%mm7, %%mm5 \n\t"
545 "por %%mm1, %%mm0 \n\t"
546 "por %%mm4, %%mm3 \n\t"
547 "por %%mm2, %%mm0 \n\t"
548 "por %%mm5, %%mm3 \n\t"
549 "psllq $16, %%mm3 \n\t"
550 "por %%mm3, %%mm0 \n\t"
551 MOVNTQ
" %%mm0, %0 \n\t"
552 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
557 __asm__
volatile(SFENCE:::"memory");
558 __asm__
volatile(EMMS:::"memory");
561 register int rgb
= *(const uint32_t*)s
; s
+= 4;
562 *d
++ = ((rgb
&0xFF)>>3) + ((rgb
&0xF800)>>6) + ((rgb
&0xF80000)>>9);
566 static inline void RENAME(rgb32tobgr15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
568 const uint8_t *s
= src
;
571 const uint8_t *mm_end
;
573 uint16_t *d
= (uint16_t *)dst
;
576 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
578 "movq %0, %%mm7 \n\t"
579 "movq %1, %%mm6 \n\t"
580 ::"m"(red_15mask
),"m"(green_15mask
));
585 "movd %1, %%mm0 \n\t"
586 "movd 4%1, %%mm3 \n\t"
587 "punpckldq 8%1, %%mm0 \n\t"
588 "punpckldq 12%1, %%mm3 \n\t"
589 "movq %%mm0, %%mm1 \n\t"
590 "movq %%mm0, %%mm2 \n\t"
591 "movq %%mm3, %%mm4 \n\t"
592 "movq %%mm3, %%mm5 \n\t"
593 "psllq $7, %%mm0 \n\t"
594 "psllq $7, %%mm3 \n\t"
595 "pand %%mm7, %%mm0 \n\t"
596 "pand %%mm7, %%mm3 \n\t"
597 "psrlq $6, %%mm1 \n\t"
598 "psrlq $6, %%mm4 \n\t"
599 "pand %%mm6, %%mm1 \n\t"
600 "pand %%mm6, %%mm4 \n\t"
601 "psrlq $19, %%mm2 \n\t"
602 "psrlq $19, %%mm5 \n\t"
603 "pand %2, %%mm2 \n\t"
604 "pand %2, %%mm5 \n\t"
605 "por %%mm1, %%mm0 \n\t"
606 "por %%mm4, %%mm3 \n\t"
607 "por %%mm2, %%mm0 \n\t"
608 "por %%mm5, %%mm3 \n\t"
609 "psllq $16, %%mm3 \n\t"
610 "por %%mm3, %%mm0 \n\t"
611 MOVNTQ
" %%mm0, %0 \n\t"
612 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
616 __asm__
volatile(SFENCE:::"memory");
617 __asm__
volatile(EMMS:::"memory");
620 register int rgb
= *(const uint32_t*)s
; s
+= 4;
621 *d
++ = ((rgb
&0xF8)<<7) + ((rgb
&0xF800)>>6) + ((rgb
&0xF80000)>>19);
625 static inline void RENAME(rgb24tobgr16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
627 const uint8_t *s
= src
;
630 const uint8_t *mm_end
;
632 uint16_t *d
= (uint16_t *)dst
;
635 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
637 "movq %0, %%mm7 \n\t"
638 "movq %1, %%mm6 \n\t"
639 ::"m"(red_16mask
),"m"(green_16mask
));
644 "movd %1, %%mm0 \n\t"
645 "movd 3%1, %%mm3 \n\t"
646 "punpckldq 6%1, %%mm0 \n\t"
647 "punpckldq 9%1, %%mm3 \n\t"
648 "movq %%mm0, %%mm1 \n\t"
649 "movq %%mm0, %%mm2 \n\t"
650 "movq %%mm3, %%mm4 \n\t"
651 "movq %%mm3, %%mm5 \n\t"
652 "psrlq $3, %%mm0 \n\t"
653 "psrlq $3, %%mm3 \n\t"
654 "pand %2, %%mm0 \n\t"
655 "pand %2, %%mm3 \n\t"
656 "psrlq $5, %%mm1 \n\t"
657 "psrlq $5, %%mm4 \n\t"
658 "pand %%mm6, %%mm1 \n\t"
659 "pand %%mm6, %%mm4 \n\t"
660 "psrlq $8, %%mm2 \n\t"
661 "psrlq $8, %%mm5 \n\t"
662 "pand %%mm7, %%mm2 \n\t"
663 "pand %%mm7, %%mm5 \n\t"
664 "por %%mm1, %%mm0 \n\t"
665 "por %%mm4, %%mm3 \n\t"
666 "por %%mm2, %%mm0 \n\t"
667 "por %%mm5, %%mm3 \n\t"
668 "psllq $16, %%mm3 \n\t"
669 "por %%mm3, %%mm0 \n\t"
670 MOVNTQ
" %%mm0, %0 \n\t"
671 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
675 __asm__
volatile(SFENCE:::"memory");
676 __asm__
volatile(EMMS:::"memory");
682 *d
++ = (b
>>3) | ((g
&0xFC)<<3) | ((r
&0xF8)<<8);
686 static inline void RENAME(rgb24to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
688 const uint8_t *s
= src
;
691 const uint8_t *mm_end
;
693 uint16_t *d
= (uint16_t *)dst
;
696 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
698 "movq %0, %%mm7 \n\t"
699 "movq %1, %%mm6 \n\t"
700 ::"m"(red_16mask
),"m"(green_16mask
));
705 "movd %1, %%mm0 \n\t"
706 "movd 3%1, %%mm3 \n\t"
707 "punpckldq 6%1, %%mm0 \n\t"
708 "punpckldq 9%1, %%mm3 \n\t"
709 "movq %%mm0, %%mm1 \n\t"
710 "movq %%mm0, %%mm2 \n\t"
711 "movq %%mm3, %%mm4 \n\t"
712 "movq %%mm3, %%mm5 \n\t"
713 "psllq $8, %%mm0 \n\t"
714 "psllq $8, %%mm3 \n\t"
715 "pand %%mm7, %%mm0 \n\t"
716 "pand %%mm7, %%mm3 \n\t"
717 "psrlq $5, %%mm1 \n\t"
718 "psrlq $5, %%mm4 \n\t"
719 "pand %%mm6, %%mm1 \n\t"
720 "pand %%mm6, %%mm4 \n\t"
721 "psrlq $19, %%mm2 \n\t"
722 "psrlq $19, %%mm5 \n\t"
723 "pand %2, %%mm2 \n\t"
724 "pand %2, %%mm5 \n\t"
725 "por %%mm1, %%mm0 \n\t"
726 "por %%mm4, %%mm3 \n\t"
727 "por %%mm2, %%mm0 \n\t"
728 "por %%mm5, %%mm3 \n\t"
729 "psllq $16, %%mm3 \n\t"
730 "por %%mm3, %%mm0 \n\t"
731 MOVNTQ
" %%mm0, %0 \n\t"
732 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
736 __asm__
volatile(SFENCE:::"memory");
737 __asm__
volatile(EMMS:::"memory");
743 *d
++ = (b
>>3) | ((g
&0xFC)<<3) | ((r
&0xF8)<<8);
747 static inline void RENAME(rgb24tobgr15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
749 const uint8_t *s
= src
;
752 const uint8_t *mm_end
;
754 uint16_t *d
= (uint16_t *)dst
;
757 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
759 "movq %0, %%mm7 \n\t"
760 "movq %1, %%mm6 \n\t"
761 ::"m"(red_15mask
),"m"(green_15mask
));
766 "movd %1, %%mm0 \n\t"
767 "movd 3%1, %%mm3 \n\t"
768 "punpckldq 6%1, %%mm0 \n\t"
769 "punpckldq 9%1, %%mm3 \n\t"
770 "movq %%mm0, %%mm1 \n\t"
771 "movq %%mm0, %%mm2 \n\t"
772 "movq %%mm3, %%mm4 \n\t"
773 "movq %%mm3, %%mm5 \n\t"
774 "psrlq $3, %%mm0 \n\t"
775 "psrlq $3, %%mm3 \n\t"
776 "pand %2, %%mm0 \n\t"
777 "pand %2, %%mm3 \n\t"
778 "psrlq $6, %%mm1 \n\t"
779 "psrlq $6, %%mm4 \n\t"
780 "pand %%mm6, %%mm1 \n\t"
781 "pand %%mm6, %%mm4 \n\t"
782 "psrlq $9, %%mm2 \n\t"
783 "psrlq $9, %%mm5 \n\t"
784 "pand %%mm7, %%mm2 \n\t"
785 "pand %%mm7, %%mm5 \n\t"
786 "por %%mm1, %%mm0 \n\t"
787 "por %%mm4, %%mm3 \n\t"
788 "por %%mm2, %%mm0 \n\t"
789 "por %%mm5, %%mm3 \n\t"
790 "psllq $16, %%mm3 \n\t"
791 "por %%mm3, %%mm0 \n\t"
792 MOVNTQ
" %%mm0, %0 \n\t"
793 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
797 __asm__
volatile(SFENCE:::"memory");
798 __asm__
volatile(EMMS:::"memory");
804 *d
++ = (b
>>3) | ((g
&0xF8)<<2) | ((r
&0xF8)<<7);
808 static inline void RENAME(rgb24to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
810 const uint8_t *s
= src
;
813 const uint8_t *mm_end
;
815 uint16_t *d
= (uint16_t *)dst
;
818 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
820 "movq %0, %%mm7 \n\t"
821 "movq %1, %%mm6 \n\t"
822 ::"m"(red_15mask
),"m"(green_15mask
));
827 "movd %1, %%mm0 \n\t"
828 "movd 3%1, %%mm3 \n\t"
829 "punpckldq 6%1, %%mm0 \n\t"
830 "punpckldq 9%1, %%mm3 \n\t"
831 "movq %%mm0, %%mm1 \n\t"
832 "movq %%mm0, %%mm2 \n\t"
833 "movq %%mm3, %%mm4 \n\t"
834 "movq %%mm3, %%mm5 \n\t"
835 "psllq $7, %%mm0 \n\t"
836 "psllq $7, %%mm3 \n\t"
837 "pand %%mm7, %%mm0 \n\t"
838 "pand %%mm7, %%mm3 \n\t"
839 "psrlq $6, %%mm1 \n\t"
840 "psrlq $6, %%mm4 \n\t"
841 "pand %%mm6, %%mm1 \n\t"
842 "pand %%mm6, %%mm4 \n\t"
843 "psrlq $19, %%mm2 \n\t"
844 "psrlq $19, %%mm5 \n\t"
845 "pand %2, %%mm2 \n\t"
846 "pand %2, %%mm5 \n\t"
847 "por %%mm1, %%mm0 \n\t"
848 "por %%mm4, %%mm3 \n\t"
849 "por %%mm2, %%mm0 \n\t"
850 "por %%mm5, %%mm3 \n\t"
851 "psllq $16, %%mm3 \n\t"
852 "por %%mm3, %%mm0 \n\t"
853 MOVNTQ
" %%mm0, %0 \n\t"
854 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
858 __asm__
volatile(SFENCE:::"memory");
859 __asm__
volatile(EMMS:::"memory");
865 *d
++ = (b
>>3) | ((g
&0xF8)<<2) | ((r
&0xF8)<<7);
870 I use less accurate approximation here by simply left-shifting the input
871 value and filling the low order bits with zeroes. This method improves PNG
872 compression but this scheme cannot reproduce white exactly, since it does
873 not generate an all-ones maximum value; the net effect is to darken the
876 The better method should be "left bit replication":
886 | leftmost bits repeated to fill open bits
890 static inline void RENAME(rgb15tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
894 const uint16_t *mm_end
;
897 const uint16_t *s
= (const uint16_t*)src
;
898 end
= s
+ src_size
/2;
900 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
905 "movq %1, %%mm0 \n\t"
906 "movq %1, %%mm1 \n\t"
907 "movq %1, %%mm2 \n\t"
908 "pand %2, %%mm0 \n\t"
909 "pand %3, %%mm1 \n\t"
910 "pand %4, %%mm2 \n\t"
911 "psllq $3, %%mm0 \n\t"
912 "psrlq $2, %%mm1 \n\t"
913 "psrlq $7, %%mm2 \n\t"
914 "movq %%mm0, %%mm3 \n\t"
915 "movq %%mm1, %%mm4 \n\t"
916 "movq %%mm2, %%mm5 \n\t"
917 "punpcklwd %5, %%mm0 \n\t"
918 "punpcklwd %5, %%mm1 \n\t"
919 "punpcklwd %5, %%mm2 \n\t"
920 "punpckhwd %5, %%mm3 \n\t"
921 "punpckhwd %5, %%mm4 \n\t"
922 "punpckhwd %5, %%mm5 \n\t"
923 "psllq $8, %%mm1 \n\t"
924 "psllq $16, %%mm2 \n\t"
925 "por %%mm1, %%mm0 \n\t"
926 "por %%mm2, %%mm0 \n\t"
927 "psllq $8, %%mm4 \n\t"
928 "psllq $16, %%mm5 \n\t"
929 "por %%mm4, %%mm3 \n\t"
930 "por %%mm5, %%mm3 \n\t"
932 "movq %%mm0, %%mm6 \n\t"
933 "movq %%mm3, %%mm7 \n\t"
935 "movq 8%1, %%mm0 \n\t"
936 "movq 8%1, %%mm1 \n\t"
937 "movq 8%1, %%mm2 \n\t"
938 "pand %2, %%mm0 \n\t"
939 "pand %3, %%mm1 \n\t"
940 "pand %4, %%mm2 \n\t"
941 "psllq $3, %%mm0 \n\t"
942 "psrlq $2, %%mm1 \n\t"
943 "psrlq $7, %%mm2 \n\t"
944 "movq %%mm0, %%mm3 \n\t"
945 "movq %%mm1, %%mm4 \n\t"
946 "movq %%mm2, %%mm5 \n\t"
947 "punpcklwd %5, %%mm0 \n\t"
948 "punpcklwd %5, %%mm1 \n\t"
949 "punpcklwd %5, %%mm2 \n\t"
950 "punpckhwd %5, %%mm3 \n\t"
951 "punpckhwd %5, %%mm4 \n\t"
952 "punpckhwd %5, %%mm5 \n\t"
953 "psllq $8, %%mm1 \n\t"
954 "psllq $16, %%mm2 \n\t"
955 "por %%mm1, %%mm0 \n\t"
956 "por %%mm2, %%mm0 \n\t"
957 "psllq $8, %%mm4 \n\t"
958 "psllq $16, %%mm5 \n\t"
959 "por %%mm4, %%mm3 \n\t"
960 "por %%mm5, %%mm3 \n\t"
963 :"m"(*s
),"m"(mask15b
),"m"(mask15g
),"m"(mask15r
), "m"(mmx_null
)
965 /* borrowed 32 to 24 */
967 "movq %%mm0, %%mm4 \n\t"
968 "movq %%mm3, %%mm5 \n\t"
969 "movq %%mm6, %%mm0 \n\t"
970 "movq %%mm7, %%mm1 \n\t"
972 "movq %%mm4, %%mm6 \n\t"
973 "movq %%mm5, %%mm7 \n\t"
974 "movq %%mm0, %%mm2 \n\t"
975 "movq %%mm1, %%mm3 \n\t"
977 "psrlq $8, %%mm2 \n\t"
978 "psrlq $8, %%mm3 \n\t"
979 "psrlq $8, %%mm6 \n\t"
980 "psrlq $8, %%mm7 \n\t"
981 "pand %2, %%mm0 \n\t"
982 "pand %2, %%mm1 \n\t"
983 "pand %2, %%mm4 \n\t"
984 "pand %2, %%mm5 \n\t"
985 "pand %3, %%mm2 \n\t"
986 "pand %3, %%mm3 \n\t"
987 "pand %3, %%mm6 \n\t"
988 "pand %3, %%mm7 \n\t"
989 "por %%mm2, %%mm0 \n\t"
990 "por %%mm3, %%mm1 \n\t"
991 "por %%mm6, %%mm4 \n\t"
992 "por %%mm7, %%mm5 \n\t"
994 "movq %%mm1, %%mm2 \n\t"
995 "movq %%mm4, %%mm3 \n\t"
996 "psllq $48, %%mm2 \n\t"
997 "psllq $32, %%mm3 \n\t"
998 "pand %4, %%mm2 \n\t"
999 "pand %5, %%mm3 \n\t"
1000 "por %%mm2, %%mm0 \n\t"
1001 "psrlq $16, %%mm1 \n\t"
1002 "psrlq $32, %%mm4 \n\t"
1003 "psllq $16, %%mm5 \n\t"
1004 "por %%mm3, %%mm1 \n\t"
1005 "pand %6, %%mm5 \n\t"
1006 "por %%mm5, %%mm4 \n\t"
1008 MOVNTQ
" %%mm0, %0 \n\t"
1009 MOVNTQ
" %%mm1, 8%0 \n\t"
1010 MOVNTQ
" %%mm4, 16%0"
1013 :"m"(*s
),"m"(mask24l
),"m"(mask24h
),"m"(mask24hh
),"m"(mask24hhh
),"m"(mask24hhhh
)
1018 __asm__
volatile(SFENCE:::"memory");
1019 __asm__
volatile(EMMS:::"memory");
1022 register uint16_t bgr
;
1024 *d
++ = (bgr
&0x1F)<<3;
1025 *d
++ = (bgr
&0x3E0)>>2;
1026 *d
++ = (bgr
&0x7C00)>>7;
1030 static inline void RENAME(rgb16tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1032 const uint16_t *end
;
1034 const uint16_t *mm_end
;
1036 uint8_t *d
= (uint8_t *)dst
;
1037 const uint16_t *s
= (const uint16_t *)src
;
1038 end
= s
+ src_size
/2;
1040 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1042 while (s
< mm_end
) {
1044 PREFETCH
" 32%1 \n\t"
1045 "movq %1, %%mm0 \n\t"
1046 "movq %1, %%mm1 \n\t"
1047 "movq %1, %%mm2 \n\t"
1048 "pand %2, %%mm0 \n\t"
1049 "pand %3, %%mm1 \n\t"
1050 "pand %4, %%mm2 \n\t"
1051 "psllq $3, %%mm0 \n\t"
1052 "psrlq $3, %%mm1 \n\t"
1053 "psrlq $8, %%mm2 \n\t"
1054 "movq %%mm0, %%mm3 \n\t"
1055 "movq %%mm1, %%mm4 \n\t"
1056 "movq %%mm2, %%mm5 \n\t"
1057 "punpcklwd %5, %%mm0 \n\t"
1058 "punpcklwd %5, %%mm1 \n\t"
1059 "punpcklwd %5, %%mm2 \n\t"
1060 "punpckhwd %5, %%mm3 \n\t"
1061 "punpckhwd %5, %%mm4 \n\t"
1062 "punpckhwd %5, %%mm5 \n\t"
1063 "psllq $8, %%mm1 \n\t"
1064 "psllq $16, %%mm2 \n\t"
1065 "por %%mm1, %%mm0 \n\t"
1066 "por %%mm2, %%mm0 \n\t"
1067 "psllq $8, %%mm4 \n\t"
1068 "psllq $16, %%mm5 \n\t"
1069 "por %%mm4, %%mm3 \n\t"
1070 "por %%mm5, %%mm3 \n\t"
1072 "movq %%mm0, %%mm6 \n\t"
1073 "movq %%mm3, %%mm7 \n\t"
1075 "movq 8%1, %%mm0 \n\t"
1076 "movq 8%1, %%mm1 \n\t"
1077 "movq 8%1, %%mm2 \n\t"
1078 "pand %2, %%mm0 \n\t"
1079 "pand %3, %%mm1 \n\t"
1080 "pand %4, %%mm2 \n\t"
1081 "psllq $3, %%mm0 \n\t"
1082 "psrlq $3, %%mm1 \n\t"
1083 "psrlq $8, %%mm2 \n\t"
1084 "movq %%mm0, %%mm3 \n\t"
1085 "movq %%mm1, %%mm4 \n\t"
1086 "movq %%mm2, %%mm5 \n\t"
1087 "punpcklwd %5, %%mm0 \n\t"
1088 "punpcklwd %5, %%mm1 \n\t"
1089 "punpcklwd %5, %%mm2 \n\t"
1090 "punpckhwd %5, %%mm3 \n\t"
1091 "punpckhwd %5, %%mm4 \n\t"
1092 "punpckhwd %5, %%mm5 \n\t"
1093 "psllq $8, %%mm1 \n\t"
1094 "psllq $16, %%mm2 \n\t"
1095 "por %%mm1, %%mm0 \n\t"
1096 "por %%mm2, %%mm0 \n\t"
1097 "psllq $8, %%mm4 \n\t"
1098 "psllq $16, %%mm5 \n\t"
1099 "por %%mm4, %%mm3 \n\t"
1100 "por %%mm5, %%mm3 \n\t"
1102 :"m"(*s
),"m"(mask16b
),"m"(mask16g
),"m"(mask16r
),"m"(mmx_null
)
1104 /* borrowed 32 to 24 */
1106 "movq %%mm0, %%mm4 \n\t"
1107 "movq %%mm3, %%mm5 \n\t"
1108 "movq %%mm6, %%mm0 \n\t"
1109 "movq %%mm7, %%mm1 \n\t"
1111 "movq %%mm4, %%mm6 \n\t"
1112 "movq %%mm5, %%mm7 \n\t"
1113 "movq %%mm0, %%mm2 \n\t"
1114 "movq %%mm1, %%mm3 \n\t"
1116 "psrlq $8, %%mm2 \n\t"
1117 "psrlq $8, %%mm3 \n\t"
1118 "psrlq $8, %%mm6 \n\t"
1119 "psrlq $8, %%mm7 \n\t"
1120 "pand %2, %%mm0 \n\t"
1121 "pand %2, %%mm1 \n\t"
1122 "pand %2, %%mm4 \n\t"
1123 "pand %2, %%mm5 \n\t"
1124 "pand %3, %%mm2 \n\t"
1125 "pand %3, %%mm3 \n\t"
1126 "pand %3, %%mm6 \n\t"
1127 "pand %3, %%mm7 \n\t"
1128 "por %%mm2, %%mm0 \n\t"
1129 "por %%mm3, %%mm1 \n\t"
1130 "por %%mm6, %%mm4 \n\t"
1131 "por %%mm7, %%mm5 \n\t"
1133 "movq %%mm1, %%mm2 \n\t"
1134 "movq %%mm4, %%mm3 \n\t"
1135 "psllq $48, %%mm2 \n\t"
1136 "psllq $32, %%mm3 \n\t"
1137 "pand %4, %%mm2 \n\t"
1138 "pand %5, %%mm3 \n\t"
1139 "por %%mm2, %%mm0 \n\t"
1140 "psrlq $16, %%mm1 \n\t"
1141 "psrlq $32, %%mm4 \n\t"
1142 "psllq $16, %%mm5 \n\t"
1143 "por %%mm3, %%mm1 \n\t"
1144 "pand %6, %%mm5 \n\t"
1145 "por %%mm5, %%mm4 \n\t"
1147 MOVNTQ
" %%mm0, %0 \n\t"
1148 MOVNTQ
" %%mm1, 8%0 \n\t"
1149 MOVNTQ
" %%mm4, 16%0"
1152 :"m"(*s
),"m"(mask24l
),"m"(mask24h
),"m"(mask24hh
),"m"(mask24hhh
),"m"(mask24hhhh
)
1157 __asm__
volatile(SFENCE:::"memory");
1158 __asm__
volatile(EMMS:::"memory");
1161 register uint16_t bgr
;
1163 *d
++ = (bgr
&0x1F)<<3;
1164 *d
++ = (bgr
&0x7E0)>>3;
1165 *d
++ = (bgr
&0xF800)>>8;
1170 * mm0 = 00 B3 00 B2 00 B1 00 B0
1171 * mm1 = 00 G3 00 G2 00 G1 00 G0
1172 * mm2 = 00 R3 00 R2 00 R1 00 R0
1173 * mm6 = FF FF FF FF FF FF FF FF
1174 * mm7 = 00 00 00 00 00 00 00 00
1176 #define PACK_RGB32 \
1177 "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
1178 "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
1179 "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
1180 "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
1181 "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
1182 "movq %%mm0, %%mm3 \n\t" \
1183 "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
1184 "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
1185 MOVNTQ" %%mm0, %0 \n\t" \
1186 MOVNTQ" %%mm3, 8%0 \n\t" \
1188 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
1190 const uint16_t *end
;
1192 const uint16_t *mm_end
;
1195 const uint16_t *s
= (const uint16_t *)src
;
1196 end
= s
+ src_size
/2;
1198 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1199 __asm__
volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1200 __asm__
volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1202 while (s
< mm_end
) {
1204 PREFETCH
" 32%1 \n\t"
1205 "movq %1, %%mm0 \n\t"
1206 "movq %1, %%mm1 \n\t"
1207 "movq %1, %%mm2 \n\t"
1208 "pand %2, %%mm0 \n\t"
1209 "pand %3, %%mm1 \n\t"
1210 "pand %4, %%mm2 \n\t"
1211 "psllq $3, %%mm0 \n\t"
1212 "psrlq $2, %%mm1 \n\t"
1213 "psrlq $7, %%mm2 \n\t"
1216 :"m"(*s
),"m"(mask15b
),"m"(mask15g
),"m"(mask15r
)
1221 __asm__
volatile(SFENCE:::"memory");
1222 __asm__
volatile(EMMS:::"memory");
1225 register uint16_t bgr
;
1229 *d
++ = (bgr
&0x7C00)>>7;
1230 *d
++ = (bgr
&0x3E0)>>2;
1231 *d
++ = (bgr
&0x1F)<<3;
1233 *d
++ = (bgr
&0x1F)<<3;
1234 *d
++ = (bgr
&0x3E0)>>2;
1235 *d
++ = (bgr
&0x7C00)>>7;
1241 static inline void RENAME(rgb16to32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1243 const uint16_t *end
;
1245 const uint16_t *mm_end
;
1248 const uint16_t *s
= (const uint16_t*)src
;
1249 end
= s
+ src_size
/2;
1251 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1252 __asm__
volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1253 __asm__
volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1255 while (s
< mm_end
) {
1257 PREFETCH
" 32%1 \n\t"
1258 "movq %1, %%mm0 \n\t"
1259 "movq %1, %%mm1 \n\t"
1260 "movq %1, %%mm2 \n\t"
1261 "pand %2, %%mm0 \n\t"
1262 "pand %3, %%mm1 \n\t"
1263 "pand %4, %%mm2 \n\t"
1264 "psllq $3, %%mm0 \n\t"
1265 "psrlq $3, %%mm1 \n\t"
1266 "psrlq $8, %%mm2 \n\t"
1269 :"m"(*s
),"m"(mask16b
),"m"(mask16g
),"m"(mask16r
)
1274 __asm__
volatile(SFENCE:::"memory");
1275 __asm__
volatile(EMMS:::"memory");
1278 register uint16_t bgr
;
1282 *d
++ = (bgr
&0xF800)>>8;
1283 *d
++ = (bgr
&0x7E0)>>3;
1284 *d
++ = (bgr
&0x1F)<<3;
1286 *d
++ = (bgr
&0x1F)<<3;
1287 *d
++ = (bgr
&0x7E0)>>3;
1288 *d
++ = (bgr
&0xF800)>>8;
1294 static inline void RENAME(rgb32tobgr32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1296 x86_reg idx
= 15 - src_size
;
1297 const uint8_t *s
= src
-idx
;
1298 uint8_t *d
= dst
-idx
;
1303 PREFETCH
" (%1, %0) \n\t"
1304 "movq %3, %%mm7 \n\t"
1305 "pxor %4, %%mm7 \n\t"
1306 "movq %%mm7, %%mm6 \n\t"
1307 "pxor %5, %%mm7 \n\t"
1310 PREFETCH
" 32(%1, %0) \n\t"
1311 "movq (%1, %0), %%mm0 \n\t"
1312 "movq 8(%1, %0), %%mm1 \n\t"
1314 "pshufw $177, %%mm0, %%mm3 \n\t"
1315 "pshufw $177, %%mm1, %%mm5 \n\t"
1316 "pand %%mm7, %%mm0 \n\t"
1317 "pand %%mm6, %%mm3 \n\t"
1318 "pand %%mm7, %%mm1 \n\t"
1319 "pand %%mm6, %%mm5 \n\t"
1320 "por %%mm3, %%mm0 \n\t"
1321 "por %%mm5, %%mm1 \n\t"
1323 "movq %%mm0, %%mm2 \n\t"
1324 "movq %%mm1, %%mm4 \n\t"
1325 "pand %%mm7, %%mm0 \n\t"
1326 "pand %%mm6, %%mm2 \n\t"
1327 "pand %%mm7, %%mm1 \n\t"
1328 "pand %%mm6, %%mm4 \n\t"
1329 "movq %%mm2, %%mm3 \n\t"
1330 "movq %%mm4, %%mm5 \n\t"
1331 "pslld $16, %%mm2 \n\t"
1332 "psrld $16, %%mm3 \n\t"
1333 "pslld $16, %%mm4 \n\t"
1334 "psrld $16, %%mm5 \n\t"
1335 "por %%mm2, %%mm0 \n\t"
1336 "por %%mm4, %%mm1 \n\t"
1337 "por %%mm3, %%mm0 \n\t"
1338 "por %%mm5, %%mm1 \n\t"
1340 MOVNTQ
" %%mm0, (%2, %0) \n\t"
1341 MOVNTQ
" %%mm1, 8(%2, %0) \n\t"
1348 : "r" (s
), "r" (d
), "m" (mask32b
), "m" (mask32r
), "m" (mmx_one
)
1351 for (; idx
<15; idx
+=4) {
1352 register int v
= *(const uint32_t *)&s
[idx
], g
= v
& 0xff00ff00;
1354 *(uint32_t *)&d
[idx
] = (v
>>16) + g
+ (v
<<16);
1358 static inline void RENAME(rgb24tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1362 x86_reg mmx_size
= 23 - src_size
;
1364 "test %%"REG_a
", %%"REG_a
" \n\t"
1366 "movq "MANGLE(mask24r
)", %%mm5 \n\t"
1367 "movq "MANGLE(mask24g
)", %%mm6 \n\t"
1368 "movq "MANGLE(mask24b
)", %%mm7 \n\t"
1371 PREFETCH
" 32(%1, %%"REG_a
") \n\t"
1372 "movq (%1, %%"REG_a
"), %%mm0 \n\t" // BGR BGR BG
1373 "movq (%1, %%"REG_a
"), %%mm1 \n\t" // BGR BGR BG
1374 "movq 2(%1, %%"REG_a
"), %%mm2 \n\t" // R BGR BGR B
1375 "psllq $16, %%mm0 \n\t" // 00 BGR BGR
1376 "pand %%mm5, %%mm0 \n\t"
1377 "pand %%mm6, %%mm1 \n\t"
1378 "pand %%mm7, %%mm2 \n\t"
1379 "por %%mm0, %%mm1 \n\t"
1380 "por %%mm2, %%mm1 \n\t"
1381 "movq 6(%1, %%"REG_a
"), %%mm0 \n\t" // BGR BGR BG
1382 MOVNTQ
" %%mm1, (%2, %%"REG_a
") \n\t" // RGB RGB RG
1383 "movq 8(%1, %%"REG_a
"), %%mm1 \n\t" // R BGR BGR B
1384 "movq 10(%1, %%"REG_a
"), %%mm2 \n\t" // GR BGR BGR
1385 "pand %%mm7, %%mm0 \n\t"
1386 "pand %%mm5, %%mm1 \n\t"
1387 "pand %%mm6, %%mm2 \n\t"
1388 "por %%mm0, %%mm1 \n\t"
1389 "por %%mm2, %%mm1 \n\t"
1390 "movq 14(%1, %%"REG_a
"), %%mm0 \n\t" // R BGR BGR B
1391 MOVNTQ
" %%mm1, 8(%2, %%"REG_a
") \n\t" // B RGB RGB R
1392 "movq 16(%1, %%"REG_a
"), %%mm1 \n\t" // GR BGR BGR
1393 "movq 18(%1, %%"REG_a
"), %%mm2 \n\t" // BGR BGR BG
1394 "pand %%mm6, %%mm0 \n\t"
1395 "pand %%mm7, %%mm1 \n\t"
1396 "pand %%mm5, %%mm2 \n\t"
1397 "por %%mm0, %%mm1 \n\t"
1398 "por %%mm2, %%mm1 \n\t"
1399 MOVNTQ
" %%mm1, 16(%2, %%"REG_a
") \n\t"
1400 "add $24, %%"REG_a
" \n\t"
1404 : "r" (src
-mmx_size
), "r"(dst
-mmx_size
)
1407 __asm__
volatile(SFENCE:::"memory");
1408 __asm__
volatile(EMMS:::"memory");
1410 if (mmx_size
==23) return; //finished, was multiple of 8
1414 src_size
= 23-mmx_size
;
1418 for (i
=0; i
<src_size
; i
+=3) {
1421 dst
[i
+ 1] = src
[i
+ 1];
1422 dst
[i
+ 2] = src
[i
+ 0];
1427 static inline void RENAME(yuvPlanartoyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1428 long width
, long height
,
1429 long lumStride
, long chromStride
, long dstStride
, long vertLumPerChroma
)
1432 const x86_reg chromWidth
= width
>>1;
1433 for (y
=0; y
<height
; y
++) {
1435 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1437 "xor %%"REG_a
", %%"REG_a
" \n\t"
1440 PREFETCH
" 32(%1, %%"REG_a
", 2) \n\t"
1441 PREFETCH
" 32(%2, %%"REG_a
") \n\t"
1442 PREFETCH
" 32(%3, %%"REG_a
") \n\t"
1443 "movq (%2, %%"REG_a
"), %%mm0 \n\t" // U(0)
1444 "movq %%mm0, %%mm2 \n\t" // U(0)
1445 "movq (%3, %%"REG_a
"), %%mm1 \n\t" // V(0)
1446 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1447 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1449 "movq (%1, %%"REG_a
",2), %%mm3 \n\t" // Y(0)
1450 "movq 8(%1, %%"REG_a
",2), %%mm5 \n\t" // Y(8)
1451 "movq %%mm3, %%mm4 \n\t" // Y(0)
1452 "movq %%mm5, %%mm6 \n\t" // Y(8)
1453 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
1454 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
1455 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
1456 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
1458 MOVNTQ
" %%mm3, (%0, %%"REG_a
", 4) \n\t"
1459 MOVNTQ
" %%mm4, 8(%0, %%"REG_a
", 4) \n\t"
1460 MOVNTQ
" %%mm5, 16(%0, %%"REG_a
", 4) \n\t"
1461 MOVNTQ
" %%mm6, 24(%0, %%"REG_a
", 4) \n\t"
1463 "add $8, %%"REG_a
" \n\t"
1464 "cmp %4, %%"REG_a
" \n\t"
1466 ::"r"(dst
), "r"(ysrc
), "r"(usrc
), "r"(vsrc
), "g" (chromWidth
)
1471 #if ARCH_ALPHA && HAVE_MVI
1472 #define pl2yuy2(n) \
1477 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
1478 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
1479 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
1480 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
1481 yuv1 = (u << 8) + (v << 24); \
1488 uint64_t *qdst
= (uint64_t *) dst
;
1489 uint64_t *qdst2
= (uint64_t *) (dst
+ dstStride
);
1490 const uint32_t *yc
= (uint32_t *) ysrc
;
1491 const uint32_t *yc2
= (uint32_t *) (ysrc
+ lumStride
);
1492 const uint16_t *uc
= (uint16_t*) usrc
, *vc
= (uint16_t*) vsrc
;
1493 for (i
= 0; i
< chromWidth
; i
+= 8) {
1494 uint64_t y1
, y2
, yuv1
, yuv2
;
1497 __asm__("ldq $31,64(%0)" :: "r"(yc
));
1498 __asm__("ldq $31,64(%0)" :: "r"(yc2
));
1499 __asm__("ldq $31,64(%0)" :: "r"(uc
));
1500 __asm__("ldq $31,64(%0)" :: "r"(vc
));
1518 #elif HAVE_FAST_64BIT
1520 uint64_t *ldst
= (uint64_t *) dst
;
1521 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1522 for (i
= 0; i
< chromWidth
; i
+= 2) {
1524 k
= yc
[0] + (uc
[0] << 8) +
1525 (yc
[1] << 16) + (vc
[0] << 24);
1526 l
= yc
[2] + (uc
[1] << 8) +
1527 (yc
[3] << 16) + (vc
[1] << 24);
1528 *ldst
++ = k
+ (l
<< 32);
1535 int i
, *idst
= (int32_t *) dst
;
1536 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1537 for (i
= 0; i
< chromWidth
; i
++) {
1539 *idst
++ = (yc
[0] << 24)+ (uc
[0] << 16) +
1540 (yc
[1] << 8) + (vc
[0] << 0);
1542 *idst
++ = yc
[0] + (uc
[0] << 8) +
1543 (yc
[1] << 16) + (vc
[0] << 24);
1551 if ((y
&(vertLumPerChroma
-1)) == vertLumPerChroma
-1) {
1552 usrc
+= chromStride
;
1553 vsrc
+= chromStride
;
1566 * Height should be a multiple of 2 and width should be a multiple of 16.
1567 * (If this is a problem for anyone then tell me, and I will fix it.)
1569 static inline void RENAME(yv12toyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1570 long width
, long height
,
1571 long lumStride
, long chromStride
, long dstStride
)
1573 //FIXME interpolate chroma
1574 RENAME(yuvPlanartoyuy2
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 2);
1577 static inline void RENAME(yuvPlanartouyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1578 long width
, long height
,
1579 long lumStride
, long chromStride
, long dstStride
, long vertLumPerChroma
)
1582 const x86_reg chromWidth
= width
>>1;
1583 for (y
=0; y
<height
; y
++) {
1585 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1587 "xor %%"REG_a
", %%"REG_a
" \n\t"
1590 PREFETCH
" 32(%1, %%"REG_a
", 2) \n\t"
1591 PREFETCH
" 32(%2, %%"REG_a
") \n\t"
1592 PREFETCH
" 32(%3, %%"REG_a
") \n\t"
1593 "movq (%2, %%"REG_a
"), %%mm0 \n\t" // U(0)
1594 "movq %%mm0, %%mm2 \n\t" // U(0)
1595 "movq (%3, %%"REG_a
"), %%mm1 \n\t" // V(0)
1596 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1597 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1599 "movq (%1, %%"REG_a
",2), %%mm3 \n\t" // Y(0)
1600 "movq 8(%1, %%"REG_a
",2), %%mm5 \n\t" // Y(8)
1601 "movq %%mm0, %%mm4 \n\t" // Y(0)
1602 "movq %%mm2, %%mm6 \n\t" // Y(8)
1603 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
1604 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
1605 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
1606 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
1608 MOVNTQ
" %%mm0, (%0, %%"REG_a
", 4) \n\t"
1609 MOVNTQ
" %%mm4, 8(%0, %%"REG_a
", 4) \n\t"
1610 MOVNTQ
" %%mm2, 16(%0, %%"REG_a
", 4) \n\t"
1611 MOVNTQ
" %%mm6, 24(%0, %%"REG_a
", 4) \n\t"
1613 "add $8, %%"REG_a
" \n\t"
1614 "cmp %4, %%"REG_a
" \n\t"
1616 ::"r"(dst
), "r"(ysrc
), "r"(usrc
), "r"(vsrc
), "g" (chromWidth
)
1620 //FIXME adapt the Alpha ASM code from yv12->yuy2
1624 uint64_t *ldst
= (uint64_t *) dst
;
1625 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1626 for (i
= 0; i
< chromWidth
; i
+= 2) {
1628 k
= uc
[0] + (yc
[0] << 8) +
1629 (vc
[0] << 16) + (yc
[1] << 24);
1630 l
= uc
[1] + (yc
[2] << 8) +
1631 (vc
[1] << 16) + (yc
[3] << 24);
1632 *ldst
++ = k
+ (l
<< 32);
1639 int i
, *idst
= (int32_t *) dst
;
1640 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1641 for (i
= 0; i
< chromWidth
; i
++) {
1643 *idst
++ = (uc
[0] << 24)+ (yc
[0] << 16) +
1644 (vc
[0] << 8) + (yc
[1] << 0);
1646 *idst
++ = uc
[0] + (yc
[0] << 8) +
1647 (vc
[0] << 16) + (yc
[1] << 24);
1655 if ((y
&(vertLumPerChroma
-1)) == vertLumPerChroma
-1) {
1656 usrc
+= chromStride
;
1657 vsrc
+= chromStride
;
1670 * Height should be a multiple of 2 and width should be a multiple of 16
1671 * (If this is a problem for anyone then tell me, and I will fix it.)
1673 static inline void RENAME(yv12touyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1674 long width
, long height
,
1675 long lumStride
, long chromStride
, long dstStride
)
1677 //FIXME interpolate chroma
1678 RENAME(yuvPlanartouyvy
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 2);
1682 * Width should be a multiple of 16.
1684 static inline void RENAME(yuv422ptouyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1685 long width
, long height
,
1686 long lumStride
, long chromStride
, long dstStride
)
1688 RENAME(yuvPlanartouyvy
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 1);
1692 * Width should be a multiple of 16.
1694 static inline void RENAME(yuv422ptoyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1695 long width
, long height
,
1696 long lumStride
, long chromStride
, long dstStride
)
1698 RENAME(yuvPlanartoyuy2
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 1);
1702 * Height should be a multiple of 2 and width should be a multiple of 16.
1703 * (If this is a problem for anyone then tell me, and I will fix it.)
1705 static inline void RENAME(yuy2toyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1706 long width
, long height
,
1707 long lumStride
, long chromStride
, long srcStride
)
1710 const x86_reg chromWidth
= width
>>1;
1711 for (y
=0; y
<height
; y
+=2) {
1714 "xor %%"REG_a
", %%"REG_a
" \n\t"
1715 "pcmpeqw %%mm7, %%mm7 \n\t"
1716 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1719 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1720 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1721 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1722 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
1723 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
1724 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
1725 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
1726 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1727 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1728 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1729 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1731 MOVNTQ
" %%mm2, (%1, %%"REG_a
", 2) \n\t"
1733 "movq 16(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(8)
1734 "movq 24(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(12)
1735 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
1736 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
1737 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
1738 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
1739 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1740 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1741 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1742 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1744 MOVNTQ
" %%mm3, 8(%1, %%"REG_a
", 2) \n\t"
1746 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1747 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1748 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1749 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1750 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1751 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1752 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1753 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1755 MOVNTQ
" %%mm0, (%3, %%"REG_a
") \n\t"
1756 MOVNTQ
" %%mm2, (%2, %%"REG_a
") \n\t"
1758 "add $8, %%"REG_a
" \n\t"
1759 "cmp %4, %%"REG_a
" \n\t"
1761 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1762 : "memory", "%"REG_a
1769 "xor %%"REG_a
", %%"REG_a
" \n\t"
1772 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1773 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1774 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1775 "movq 16(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(8)
1776 "movq 24(%0, %%"REG_a
", 4), %%mm3 \n\t" // YUYV YUYV(12)
1777 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
1778 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
1779 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
1780 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
1781 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
1782 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
1784 MOVNTQ
" %%mm0, (%1, %%"REG_a
", 2) \n\t"
1785 MOVNTQ
" %%mm2, 8(%1, %%"REG_a
", 2) \n\t"
1787 "add $8, %%"REG_a
" \n\t"
1788 "cmp %4, %%"REG_a
" \n\t"
1791 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1792 : "memory", "%"REG_a
1796 for (i
=0; i
<chromWidth
; i
++) {
1797 ydst
[2*i
+0] = src
[4*i
+0];
1798 udst
[i
] = src
[4*i
+1];
1799 ydst
[2*i
+1] = src
[4*i
+2];
1800 vdst
[i
] = src
[4*i
+3];
1805 for (i
=0; i
<chromWidth
; i
++) {
1806 ydst
[2*i
+0] = src
[4*i
+0];
1807 ydst
[2*i
+1] = src
[4*i
+2];
1810 udst
+= chromStride
;
1811 vdst
+= chromStride
;
1816 __asm__
volatile(EMMS
" \n\t"
1822 static inline void RENAME(yvu9toyv12
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
,
1823 uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1824 long width
, long height
, long lumStride
, long chromStride
)
1827 memcpy(ydst
, ysrc
, width
*height
);
1829 /* XXX: implement upscaling for U,V */
1832 static inline void RENAME(planar2x
)(const uint8_t *src
, uint8_t *dst
, long srcWidth
, long srcHeight
, long srcStride
, long dstStride
)
1839 for (x
=0; x
<srcWidth
-1; x
++) {
1840 dst
[2*x
+1]= (3*src
[x
] + src
[x
+1])>>2;
1841 dst
[2*x
+2]= ( src
[x
] + 3*src
[x
+1])>>2;
1843 dst
[2*srcWidth
-1]= src
[srcWidth
-1];
1847 for (y
=1; y
<srcHeight
; y
++) {
1848 #if HAVE_MMX2 || HAVE_AMD3DNOW
1849 const x86_reg mmxSize
= srcWidth
&~15;
1851 "mov %4, %%"REG_a
" \n\t"
1853 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
1854 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
1855 "movq 1(%0, %%"REG_a
"), %%mm2 \n\t"
1856 "movq 1(%1, %%"REG_a
"), %%mm3 \n\t"
1857 "movq -1(%0, %%"REG_a
"), %%mm4 \n\t"
1858 "movq -1(%1, %%"REG_a
"), %%mm5 \n\t"
1859 PAVGB
" %%mm0, %%mm5 \n\t"
1860 PAVGB
" %%mm0, %%mm3 \n\t"
1861 PAVGB
" %%mm0, %%mm5 \n\t"
1862 PAVGB
" %%mm0, %%mm3 \n\t"
1863 PAVGB
" %%mm1, %%mm4 \n\t"
1864 PAVGB
" %%mm1, %%mm2 \n\t"
1865 PAVGB
" %%mm1, %%mm4 \n\t"
1866 PAVGB
" %%mm1, %%mm2 \n\t"
1867 "movq %%mm5, %%mm7 \n\t"
1868 "movq %%mm4, %%mm6 \n\t"
1869 "punpcklbw %%mm3, %%mm5 \n\t"
1870 "punpckhbw %%mm3, %%mm7 \n\t"
1871 "punpcklbw %%mm2, %%mm4 \n\t"
1872 "punpckhbw %%mm2, %%mm6 \n\t"
1874 MOVNTQ
" %%mm5, (%2, %%"REG_a
", 2) \n\t"
1875 MOVNTQ
" %%mm7, 8(%2, %%"REG_a
", 2) \n\t"
1876 MOVNTQ
" %%mm4, (%3, %%"REG_a
", 2) \n\t"
1877 MOVNTQ
" %%mm6, 8(%3, %%"REG_a
", 2) \n\t"
1879 "movq %%mm5, (%2, %%"REG_a
", 2) \n\t"
1880 "movq %%mm7, 8(%2, %%"REG_a
", 2) \n\t"
1881 "movq %%mm4, (%3, %%"REG_a
", 2) \n\t"
1882 "movq %%mm6, 8(%3, %%"REG_a
", 2) \n\t"
1884 "add $8, %%"REG_a
" \n\t"
1886 :: "r" (src
+ mmxSize
), "r" (src
+ srcStride
+ mmxSize
),
1887 "r" (dst
+ mmxSize
*2), "r" (dst
+ dstStride
+ mmxSize
*2),
1893 const x86_reg mmxSize
=1;
1895 dst
[0 ]= (3*src
[0] + src
[srcStride
])>>2;
1896 dst
[dstStride
]= ( src
[0] + 3*src
[srcStride
])>>2;
1898 for (x
=mmxSize
-1; x
<srcWidth
-1; x
++) {
1899 dst
[2*x
+1]= (3*src
[x
+0] + src
[x
+srcStride
+1])>>2;
1900 dst
[2*x
+dstStride
+2]= ( src
[x
+0] + 3*src
[x
+srcStride
+1])>>2;
1901 dst
[2*x
+dstStride
+1]= ( src
[x
+1] + 3*src
[x
+srcStride
])>>2;
1902 dst
[2*x
+2]= (3*src
[x
+1] + src
[x
+srcStride
])>>2;
1904 dst
[srcWidth
*2 -1 ]= (3*src
[srcWidth
-1] + src
[srcWidth
-1 + srcStride
])>>2;
1905 dst
[srcWidth
*2 -1 + dstStride
]= ( src
[srcWidth
-1] + 3*src
[srcWidth
-1 + srcStride
])>>2;
1915 for (x
=0; x
<srcWidth
-1; x
++) {
1916 dst
[2*x
+1]= (3*src
[x
] + src
[x
+1])>>2;
1917 dst
[2*x
+2]= ( src
[x
] + 3*src
[x
+1])>>2;
1919 dst
[2*srcWidth
-1]= src
[srcWidth
-1];
1921 for (x
=0; x
<srcWidth
; x
++) {
1928 __asm__
volatile(EMMS
" \n\t"
1935 * Height should be a multiple of 2 and width should be a multiple of 16.
1936 * (If this is a problem for anyone then tell me, and I will fix it.)
1937 * Chrominance data is only taken from every second line, others are ignored.
1938 * FIXME: Write HQ version.
1940 static inline void RENAME(uyvytoyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1941 long width
, long height
,
1942 long lumStride
, long chromStride
, long srcStride
)
1945 const x86_reg chromWidth
= width
>>1;
1946 for (y
=0; y
<height
; y
+=2) {
1949 "xor %%"REG_a
", %%"REG_a
" \n\t"
1950 "pcmpeqw %%mm7, %%mm7 \n\t"
1951 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1954 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1955 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // UYVY UYVY(0)
1956 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // UYVY UYVY(4)
1957 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
1958 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
1959 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
1960 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
1961 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1962 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1963 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1964 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1966 MOVNTQ
" %%mm2, (%1, %%"REG_a
", 2) \n\t"
1968 "movq 16(%0, %%"REG_a
", 4), %%mm1 \n\t" // UYVY UYVY(8)
1969 "movq 24(%0, %%"REG_a
", 4), %%mm2 \n\t" // UYVY UYVY(12)
1970 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
1971 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
1972 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
1973 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
1974 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1975 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1976 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1977 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1979 MOVNTQ
" %%mm3, 8(%1, %%"REG_a
", 2) \n\t"
1981 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1982 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1983 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1984 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1985 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1986 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1987 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1988 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1990 MOVNTQ
" %%mm0, (%3, %%"REG_a
") \n\t"
1991 MOVNTQ
" %%mm2, (%2, %%"REG_a
") \n\t"
1993 "add $8, %%"REG_a
" \n\t"
1994 "cmp %4, %%"REG_a
" \n\t"
1996 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1997 : "memory", "%"REG_a
2004 "xor %%"REG_a
", %%"REG_a
" \n\t"
2007 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
2008 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
2009 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
2010 "movq 16(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(8)
2011 "movq 24(%0, %%"REG_a
", 4), %%mm3 \n\t" // YUYV YUYV(12)
2012 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
2013 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
2014 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
2015 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
2016 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
2017 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
2019 MOVNTQ
" %%mm0, (%1, %%"REG_a
", 2) \n\t"
2020 MOVNTQ
" %%mm2, 8(%1, %%"REG_a
", 2) \n\t"
2022 "add $8, %%"REG_a
" \n\t"
2023 "cmp %4, %%"REG_a
" \n\t"
2026 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
2027 : "memory", "%"REG_a
2031 for (i
=0; i
<chromWidth
; i
++) {
2032 udst
[i
] = src
[4*i
+0];
2033 ydst
[2*i
+0] = src
[4*i
+1];
2034 vdst
[i
] = src
[4*i
+2];
2035 ydst
[2*i
+1] = src
[4*i
+3];
2040 for (i
=0; i
<chromWidth
; i
++) {
2041 ydst
[2*i
+0] = src
[4*i
+1];
2042 ydst
[2*i
+1] = src
[4*i
+3];
2045 udst
+= chromStride
;
2046 vdst
+= chromStride
;
2051 __asm__
volatile(EMMS
" \n\t"
2058 * Height should be a multiple of 2 and width should be a multiple of 2.
2059 * (If this is a problem for anyone then tell me, and I will fix it.)
2060 * Chrominance data is only taken from every second line,
2061 * others are ignored in the C version.
2062 * FIXME: Write HQ version.
2064 static inline void RENAME(rgb24toyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
2065 long width
, long height
,
2066 long lumStride
, long chromStride
, long srcStride
)
2069 const x86_reg chromWidth
= width
>>1;
2071 for (y
=0; y
<height
-2; y
+=2) {
2073 for (i
=0; i
<2; i
++) {
2075 "mov %2, %%"REG_a
" \n\t"
2076 "movq "MANGLE(ff_bgr2YCoeff
)", %%mm6 \n\t"
2077 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2078 "pxor %%mm7, %%mm7 \n\t"
2079 "lea (%%"REG_a
", %%"REG_a
", 2), %%"REG_d
" \n\t"
2082 PREFETCH
" 64(%0, %%"REG_d
") \n\t"
2083 "movd (%0, %%"REG_d
"), %%mm0 \n\t"
2084 "movd 3(%0, %%"REG_d
"), %%mm1 \n\t"
2085 "punpcklbw %%mm7, %%mm0 \n\t"
2086 "punpcklbw %%mm7, %%mm1 \n\t"
2087 "movd 6(%0, %%"REG_d
"), %%mm2 \n\t"
2088 "movd 9(%0, %%"REG_d
"), %%mm3 \n\t"
2089 "punpcklbw %%mm7, %%mm2 \n\t"
2090 "punpcklbw %%mm7, %%mm3 \n\t"
2091 "pmaddwd %%mm6, %%mm0 \n\t"
2092 "pmaddwd %%mm6, %%mm1 \n\t"
2093 "pmaddwd %%mm6, %%mm2 \n\t"
2094 "pmaddwd %%mm6, %%mm3 \n\t"
2095 #ifndef FAST_BGR2YV12
2096 "psrad $8, %%mm0 \n\t"
2097 "psrad $8, %%mm1 \n\t"
2098 "psrad $8, %%mm2 \n\t"
2099 "psrad $8, %%mm3 \n\t"
2101 "packssdw %%mm1, %%mm0 \n\t"
2102 "packssdw %%mm3, %%mm2 \n\t"
2103 "pmaddwd %%mm5, %%mm0 \n\t"
2104 "pmaddwd %%mm5, %%mm2 \n\t"
2105 "packssdw %%mm2, %%mm0 \n\t"
2106 "psraw $7, %%mm0 \n\t"
2108 "movd 12(%0, %%"REG_d
"), %%mm4 \n\t"
2109 "movd 15(%0, %%"REG_d
"), %%mm1 \n\t"
2110 "punpcklbw %%mm7, %%mm4 \n\t"
2111 "punpcklbw %%mm7, %%mm1 \n\t"
2112 "movd 18(%0, %%"REG_d
"), %%mm2 \n\t"
2113 "movd 21(%0, %%"REG_d
"), %%mm3 \n\t"
2114 "punpcklbw %%mm7, %%mm2 \n\t"
2115 "punpcklbw %%mm7, %%mm3 \n\t"
2116 "pmaddwd %%mm6, %%mm4 \n\t"
2117 "pmaddwd %%mm6, %%mm1 \n\t"
2118 "pmaddwd %%mm6, %%mm2 \n\t"
2119 "pmaddwd %%mm6, %%mm3 \n\t"
2120 #ifndef FAST_BGR2YV12
2121 "psrad $8, %%mm4 \n\t"
2122 "psrad $8, %%mm1 \n\t"
2123 "psrad $8, %%mm2 \n\t"
2124 "psrad $8, %%mm3 \n\t"
2126 "packssdw %%mm1, %%mm4 \n\t"
2127 "packssdw %%mm3, %%mm2 \n\t"
2128 "pmaddwd %%mm5, %%mm4 \n\t"
2129 "pmaddwd %%mm5, %%mm2 \n\t"
2130 "add $24, %%"REG_d
" \n\t"
2131 "packssdw %%mm2, %%mm4 \n\t"
2132 "psraw $7, %%mm4 \n\t"
2134 "packuswb %%mm4, %%mm0 \n\t"
2135 "paddusb "MANGLE(ff_bgr2YOffset
)", %%mm0 \n\t"
2137 MOVNTQ
" %%mm0, (%1, %%"REG_a
") \n\t"
2138 "add $8, %%"REG_a
" \n\t"
2140 : : "r" (src
+width
*3), "r" (ydst
+width
), "g" ((x86_reg
)-width
)
2141 : "%"REG_a
, "%"REG_d
2148 "mov %4, %%"REG_a
" \n\t"
2149 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2150 "movq "MANGLE(ff_bgr2UCoeff
)", %%mm6 \n\t"
2151 "pxor %%mm7, %%mm7 \n\t"
2152 "lea (%%"REG_a
", %%"REG_a
", 2), %%"REG_d
" \n\t"
2153 "add %%"REG_d
", %%"REG_d
" \n\t"
2156 PREFETCH
" 64(%0, %%"REG_d
") \n\t"
2157 PREFETCH
" 64(%1, %%"REG_d
") \n\t"
2158 #if HAVE_MMX2 || HAVE_AMD3DNOW
2159 "movq (%0, %%"REG_d
"), %%mm0 \n\t"
2160 "movq (%1, %%"REG_d
"), %%mm1 \n\t"
2161 "movq 6(%0, %%"REG_d
"), %%mm2 \n\t"
2162 "movq 6(%1, %%"REG_d
"), %%mm3 \n\t"
2163 PAVGB
" %%mm1, %%mm0 \n\t"
2164 PAVGB
" %%mm3, %%mm2 \n\t"
2165 "movq %%mm0, %%mm1 \n\t"
2166 "movq %%mm2, %%mm3 \n\t"
2167 "psrlq $24, %%mm0 \n\t"
2168 "psrlq $24, %%mm2 \n\t"
2169 PAVGB
" %%mm1, %%mm0 \n\t"
2170 PAVGB
" %%mm3, %%mm2 \n\t"
2171 "punpcklbw %%mm7, %%mm0 \n\t"
2172 "punpcklbw %%mm7, %%mm2 \n\t"
2174 "movd (%0, %%"REG_d
"), %%mm0 \n\t"
2175 "movd (%1, %%"REG_d
"), %%mm1 \n\t"
2176 "movd 3(%0, %%"REG_d
"), %%mm2 \n\t"
2177 "movd 3(%1, %%"REG_d
"), %%mm3 \n\t"
2178 "punpcklbw %%mm7, %%mm0 \n\t"
2179 "punpcklbw %%mm7, %%mm1 \n\t"
2180 "punpcklbw %%mm7, %%mm2 \n\t"
2181 "punpcklbw %%mm7, %%mm3 \n\t"
2182 "paddw %%mm1, %%mm0 \n\t"
2183 "paddw %%mm3, %%mm2 \n\t"
2184 "paddw %%mm2, %%mm0 \n\t"
2185 "movd 6(%0, %%"REG_d
"), %%mm4 \n\t"
2186 "movd 6(%1, %%"REG_d
"), %%mm1 \n\t"
2187 "movd 9(%0, %%"REG_d
"), %%mm2 \n\t"
2188 "movd 9(%1, %%"REG_d
"), %%mm3 \n\t"
2189 "punpcklbw %%mm7, %%mm4 \n\t"
2190 "punpcklbw %%mm7, %%mm1 \n\t"
2191 "punpcklbw %%mm7, %%mm2 \n\t"
2192 "punpcklbw %%mm7, %%mm3 \n\t"
2193 "paddw %%mm1, %%mm4 \n\t"
2194 "paddw %%mm3, %%mm2 \n\t"
2195 "paddw %%mm4, %%mm2 \n\t"
2196 "psrlw $2, %%mm0 \n\t"
2197 "psrlw $2, %%mm2 \n\t"
2199 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm1 \n\t"
2200 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm3 \n\t"
2202 "pmaddwd %%mm0, %%mm1 \n\t"
2203 "pmaddwd %%mm2, %%mm3 \n\t"
2204 "pmaddwd %%mm6, %%mm0 \n\t"
2205 "pmaddwd %%mm6, %%mm2 \n\t"
2206 #ifndef FAST_BGR2YV12
2207 "psrad $8, %%mm0 \n\t"
2208 "psrad $8, %%mm1 \n\t"
2209 "psrad $8, %%mm2 \n\t"
2210 "psrad $8, %%mm3 \n\t"
2212 "packssdw %%mm2, %%mm0 \n\t"
2213 "packssdw %%mm3, %%mm1 \n\t"
2214 "pmaddwd %%mm5, %%mm0 \n\t"
2215 "pmaddwd %%mm5, %%mm1 \n\t"
2216 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
2217 "psraw $7, %%mm0 \n\t"
2219 #if HAVE_MMX2 || HAVE_AMD3DNOW
2220 "movq 12(%0, %%"REG_d
"), %%mm4 \n\t"
2221 "movq 12(%1, %%"REG_d
"), %%mm1 \n\t"
2222 "movq 18(%0, %%"REG_d
"), %%mm2 \n\t"
2223 "movq 18(%1, %%"REG_d
"), %%mm3 \n\t"
2224 PAVGB
" %%mm1, %%mm4 \n\t"
2225 PAVGB
" %%mm3, %%mm2 \n\t"
2226 "movq %%mm4, %%mm1 \n\t"
2227 "movq %%mm2, %%mm3 \n\t"
2228 "psrlq $24, %%mm4 \n\t"
2229 "psrlq $24, %%mm2 \n\t"
2230 PAVGB
" %%mm1, %%mm4 \n\t"
2231 PAVGB
" %%mm3, %%mm2 \n\t"
2232 "punpcklbw %%mm7, %%mm4 \n\t"
2233 "punpcklbw %%mm7, %%mm2 \n\t"
2235 "movd 12(%0, %%"REG_d
"), %%mm4 \n\t"
2236 "movd 12(%1, %%"REG_d
"), %%mm1 \n\t"
2237 "movd 15(%0, %%"REG_d
"), %%mm2 \n\t"
2238 "movd 15(%1, %%"REG_d
"), %%mm3 \n\t"
2239 "punpcklbw %%mm7, %%mm4 \n\t"
2240 "punpcklbw %%mm7, %%mm1 \n\t"
2241 "punpcklbw %%mm7, %%mm2 \n\t"
2242 "punpcklbw %%mm7, %%mm3 \n\t"
2243 "paddw %%mm1, %%mm4 \n\t"
2244 "paddw %%mm3, %%mm2 \n\t"
2245 "paddw %%mm2, %%mm4 \n\t"
2246 "movd 18(%0, %%"REG_d
"), %%mm5 \n\t"
2247 "movd 18(%1, %%"REG_d
"), %%mm1 \n\t"
2248 "movd 21(%0, %%"REG_d
"), %%mm2 \n\t"
2249 "movd 21(%1, %%"REG_d
"), %%mm3 \n\t"
2250 "punpcklbw %%mm7, %%mm5 \n\t"
2251 "punpcklbw %%mm7, %%mm1 \n\t"
2252 "punpcklbw %%mm7, %%mm2 \n\t"
2253 "punpcklbw %%mm7, %%mm3 \n\t"
2254 "paddw %%mm1, %%mm5 \n\t"
2255 "paddw %%mm3, %%mm2 \n\t"
2256 "paddw %%mm5, %%mm2 \n\t"
2257 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2258 "psrlw $2, %%mm4 \n\t"
2259 "psrlw $2, %%mm2 \n\t"
2261 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm1 \n\t"
2262 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm3 \n\t"
2264 "pmaddwd %%mm4, %%mm1 \n\t"
2265 "pmaddwd %%mm2, %%mm3 \n\t"
2266 "pmaddwd %%mm6, %%mm4 \n\t"
2267 "pmaddwd %%mm6, %%mm2 \n\t"
2268 #ifndef FAST_BGR2YV12
2269 "psrad $8, %%mm4 \n\t"
2270 "psrad $8, %%mm1 \n\t"
2271 "psrad $8, %%mm2 \n\t"
2272 "psrad $8, %%mm3 \n\t"
2274 "packssdw %%mm2, %%mm4 \n\t"
2275 "packssdw %%mm3, %%mm1 \n\t"
2276 "pmaddwd %%mm5, %%mm4 \n\t"
2277 "pmaddwd %%mm5, %%mm1 \n\t"
2278 "add $24, %%"REG_d
" \n\t"
2279 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
2280 "psraw $7, %%mm4 \n\t"
2282 "movq %%mm0, %%mm1 \n\t"
2283 "punpckldq %%mm4, %%mm0 \n\t"
2284 "punpckhdq %%mm4, %%mm1 \n\t"
2285 "packsswb %%mm1, %%mm0 \n\t"
2286 "paddb "MANGLE(ff_bgr2UVOffset
)", %%mm0 \n\t"
2287 "movd %%mm0, (%2, %%"REG_a
") \n\t"
2288 "punpckhdq %%mm0, %%mm0 \n\t"
2289 "movd %%mm0, (%3, %%"REG_a
") \n\t"
2290 "add $4, %%"REG_a
" \n\t"
2292 : : "r" (src
+chromWidth
*6), "r" (src
+srcStride
+chromWidth
*6), "r" (udst
+chromWidth
), "r" (vdst
+chromWidth
), "g" (-chromWidth
)
2293 : "%"REG_a
, "%"REG_d
2296 udst
+= chromStride
;
2297 vdst
+= chromStride
;
2301 __asm__
volatile(EMMS
" \n\t"
2307 for (; y
<height
; y
+=2) {
2309 for (i
=0; i
<chromWidth
; i
++) {
2310 unsigned int b
= src
[6*i
+0];
2311 unsigned int g
= src
[6*i
+1];
2312 unsigned int r
= src
[6*i
+2];
2314 unsigned int Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2315 unsigned int V
= ((RV
*r
+ GV
*g
+ BV
*b
)>>RGB2YUV_SHIFT
) + 128;
2316 unsigned int U
= ((RU
*r
+ GU
*g
+ BU
*b
)>>RGB2YUV_SHIFT
) + 128;
2326 Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2332 for (i
=0; i
<chromWidth
; i
++) {
2333 unsigned int b
= src
[6*i
+0];
2334 unsigned int g
= src
[6*i
+1];
2335 unsigned int r
= src
[6*i
+2];
2337 unsigned int Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2345 Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2348 udst
+= chromStride
;
2349 vdst
+= chromStride
;
2355 static void RENAME(interleaveBytes
)(const uint8_t *src1
, const uint8_t *src2
, uint8_t *dest
,
2356 long width
, long height
, long src1Stride
,
2357 long src2Stride
, long dstStride
)
2361 for (h
=0; h
< height
; h
++) {
2367 "xor %%"REG_a
", %%"REG_a
" \n\t"
2369 PREFETCH
" 64(%1, %%"REG_a
") \n\t"
2370 PREFETCH
" 64(%2, %%"REG_a
") \n\t"
2371 "movdqa (%1, %%"REG_a
"), %%xmm0 \n\t"
2372 "movdqa (%1, %%"REG_a
"), %%xmm1 \n\t"
2373 "movdqa (%2, %%"REG_a
"), %%xmm2 \n\t"
2374 "punpcklbw %%xmm2, %%xmm0 \n\t"
2375 "punpckhbw %%xmm2, %%xmm1 \n\t"
2376 "movntdq %%xmm0, (%0, %%"REG_a
", 2) \n\t"
2377 "movntdq %%xmm1, 16(%0, %%"REG_a
", 2) \n\t"
2378 "add $16, %%"REG_a
" \n\t"
2379 "cmp %3, %%"REG_a
" \n\t"
2381 ::"r"(dest
), "r"(src1
), "r"(src2
), "r" ((x86_reg
)width
-15)
2382 : "memory", "%"REG_a
""
2386 "xor %%"REG_a
", %%"REG_a
" \n\t"
2388 PREFETCH
" 64(%1, %%"REG_a
") \n\t"
2389 PREFETCH
" 64(%2, %%"REG_a
") \n\t"
2390 "movq (%1, %%"REG_a
"), %%mm0 \n\t"
2391 "movq 8(%1, %%"REG_a
"), %%mm2 \n\t"
2392 "movq %%mm0, %%mm1 \n\t"
2393 "movq %%mm2, %%mm3 \n\t"
2394 "movq (%2, %%"REG_a
"), %%mm4 \n\t"
2395 "movq 8(%2, %%"REG_a
"), %%mm5 \n\t"
2396 "punpcklbw %%mm4, %%mm0 \n\t"
2397 "punpckhbw %%mm4, %%mm1 \n\t"
2398 "punpcklbw %%mm5, %%mm2 \n\t"
2399 "punpckhbw %%mm5, %%mm3 \n\t"
2400 MOVNTQ
" %%mm0, (%0, %%"REG_a
", 2) \n\t"
2401 MOVNTQ
" %%mm1, 8(%0, %%"REG_a
", 2) \n\t"
2402 MOVNTQ
" %%mm2, 16(%0, %%"REG_a
", 2) \n\t"
2403 MOVNTQ
" %%mm3, 24(%0, %%"REG_a
", 2) \n\t"
2404 "add $16, %%"REG_a
" \n\t"
2405 "cmp %3, %%"REG_a
" \n\t"
2407 ::"r"(dest
), "r"(src1
), "r"(src2
), "r" ((x86_reg
)width
-15)
2408 : "memory", "%"REG_a
2411 for (w
= (width
&(~15)); w
< width
; w
++) {
2412 dest
[2*w
+0] = src1
[w
];
2413 dest
[2*w
+1] = src2
[w
];
2416 for (w
=0; w
< width
; w
++) {
2417 dest
[2*w
+0] = src1
[w
];
2418 dest
[2*w
+1] = src2
[w
];
2434 static inline void RENAME(vu9_to_vu12
)(const uint8_t *src1
, const uint8_t *src2
,
2435 uint8_t *dst1
, uint8_t *dst2
,
2436 long width
, long height
,
2437 long srcStride1
, long srcStride2
,
2438 long dstStride1
, long dstStride2
)
2442 w
=width
/2; h
=height
/2;
2447 ::"m"(*(src1
+srcStride1
)),"m"(*(src2
+srcStride2
)):"memory");
2450 const uint8_t* s1
=src1
+srcStride1
*(y
>>1);
2451 uint8_t* d
=dst1
+dstStride1
*y
;
2454 for (;x
<w
-31;x
+=32) {
2456 PREFETCH
" 32%1 \n\t"
2457 "movq %1, %%mm0 \n\t"
2458 "movq 8%1, %%mm2 \n\t"
2459 "movq 16%1, %%mm4 \n\t"
2460 "movq 24%1, %%mm6 \n\t"
2461 "movq %%mm0, %%mm1 \n\t"
2462 "movq %%mm2, %%mm3 \n\t"
2463 "movq %%mm4, %%mm5 \n\t"
2464 "movq %%mm6, %%mm7 \n\t"
2465 "punpcklbw %%mm0, %%mm0 \n\t"
2466 "punpckhbw %%mm1, %%mm1 \n\t"
2467 "punpcklbw %%mm2, %%mm2 \n\t"
2468 "punpckhbw %%mm3, %%mm3 \n\t"
2469 "punpcklbw %%mm4, %%mm4 \n\t"
2470 "punpckhbw %%mm5, %%mm5 \n\t"
2471 "punpcklbw %%mm6, %%mm6 \n\t"
2472 "punpckhbw %%mm7, %%mm7 \n\t"
2473 MOVNTQ
" %%mm0, %0 \n\t"
2474 MOVNTQ
" %%mm1, 8%0 \n\t"
2475 MOVNTQ
" %%mm2, 16%0 \n\t"
2476 MOVNTQ
" %%mm3, 24%0 \n\t"
2477 MOVNTQ
" %%mm4, 32%0 \n\t"
2478 MOVNTQ
" %%mm5, 40%0 \n\t"
2479 MOVNTQ
" %%mm6, 48%0 \n\t"
2480 MOVNTQ
" %%mm7, 56%0"
2486 for (;x
<w
;x
++) d
[2*x
]=d
[2*x
+1]=s1
[x
];
2489 const uint8_t* s2
=src2
+srcStride2
*(y
>>1);
2490 uint8_t* d
=dst2
+dstStride2
*y
;
2493 for (;x
<w
-31;x
+=32) {
2495 PREFETCH
" 32%1 \n\t"
2496 "movq %1, %%mm0 \n\t"
2497 "movq 8%1, %%mm2 \n\t"
2498 "movq 16%1, %%mm4 \n\t"
2499 "movq 24%1, %%mm6 \n\t"
2500 "movq %%mm0, %%mm1 \n\t"
2501 "movq %%mm2, %%mm3 \n\t"
2502 "movq %%mm4, %%mm5 \n\t"
2503 "movq %%mm6, %%mm7 \n\t"
2504 "punpcklbw %%mm0, %%mm0 \n\t"
2505 "punpckhbw %%mm1, %%mm1 \n\t"
2506 "punpcklbw %%mm2, %%mm2 \n\t"
2507 "punpckhbw %%mm3, %%mm3 \n\t"
2508 "punpcklbw %%mm4, %%mm4 \n\t"
2509 "punpckhbw %%mm5, %%mm5 \n\t"
2510 "punpcklbw %%mm6, %%mm6 \n\t"
2511 "punpckhbw %%mm7, %%mm7 \n\t"
2512 MOVNTQ
" %%mm0, %0 \n\t"
2513 MOVNTQ
" %%mm1, 8%0 \n\t"
2514 MOVNTQ
" %%mm2, 16%0 \n\t"
2515 MOVNTQ
" %%mm3, 24%0 \n\t"
2516 MOVNTQ
" %%mm4, 32%0 \n\t"
2517 MOVNTQ
" %%mm5, 40%0 \n\t"
2518 MOVNTQ
" %%mm6, 48%0 \n\t"
2519 MOVNTQ
" %%mm7, 56%0"
2525 for (;x
<w
;x
++) d
[2*x
]=d
[2*x
+1]=s2
[x
];
2536 static inline void RENAME(yvu9_to_yuy2
)(const uint8_t *src1
, const uint8_t *src2
, const uint8_t *src3
,
2538 long width
, long height
,
2539 long srcStride1
, long srcStride2
,
2540 long srcStride3
, long dstStride
)
2544 w
=width
/2; h
=height
;
2546 const uint8_t* yp
=src1
+srcStride1
*y
;
2547 const uint8_t* up
=src2
+srcStride2
*(y
>>2);
2548 const uint8_t* vp
=src3
+srcStride3
*(y
>>2);
2549 uint8_t* d
=dst
+dstStride
*y
;
2554 PREFETCH
" 32(%1, %0) \n\t"
2555 PREFETCH
" 32(%2, %0) \n\t"
2556 PREFETCH
" 32(%3, %0) \n\t"
2557 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2558 "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */
2559 "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */
2560 "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2561 "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */
2562 "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */
2563 "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */
2564 "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */
2565 "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */
2566 "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */
2568 "movq %%mm1, %%mm6 \n\t"
2569 "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/
2570 "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
2571 "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
2572 MOVNTQ
" %%mm0, (%4, %0, 8) \n\t"
2573 MOVNTQ
" %%mm3, 8(%4, %0, 8) \n\t"
2575 "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/
2576 "movq 8(%1, %0, 4), %%mm0 \n\t"
2577 "movq %%mm0, %%mm3 \n\t"
2578 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/
2579 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/
2580 MOVNTQ
" %%mm0, 16(%4, %0, 8) \n\t"
2581 MOVNTQ
" %%mm3, 24(%4, %0, 8) \n\t"
2583 "movq %%mm4, %%mm6 \n\t"
2584 "movq 16(%1, %0, 4), %%mm0 \n\t"
2585 "movq %%mm0, %%mm3 \n\t"
2586 "punpcklbw %%mm5, %%mm4 \n\t"
2587 "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/
2588 "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/
2589 MOVNTQ
" %%mm0, 32(%4, %0, 8) \n\t"
2590 MOVNTQ
" %%mm3, 40(%4, %0, 8) \n\t"
2592 "punpckhbw %%mm5, %%mm6 \n\t"
2593 "movq 24(%1, %0, 4), %%mm0 \n\t"
2594 "movq %%mm0, %%mm3 \n\t"
2595 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/
2596 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/
2597 MOVNTQ
" %%mm0, 48(%4, %0, 8) \n\t"
2598 MOVNTQ
" %%mm3, 56(%4, %0, 8) \n\t"
2601 : "r"(yp
), "r" (up
), "r"(vp
), "r"(d
)
2606 const long x2
= x
<<2;
2609 d
[8*x
+2] = yp
[x2
+1];
2611 d
[8*x
+4] = yp
[x2
+2];
2613 d
[8*x
+6] = yp
[x2
+3];
2626 static void RENAME(extract_even
)(const uint8_t *src
, uint8_t *dst
, x86_reg count
)
2636 "pcmpeqw %%mm7, %%mm7 \n\t"
2637 "psrlw $8, %%mm7 \n\t"
2639 "movq -30(%1, %0, 2), %%mm0 \n\t"
2640 "movq -22(%1, %0, 2), %%mm1 \n\t"
2641 "movq -14(%1, %0, 2), %%mm2 \n\t"
2642 "movq -6(%1, %0, 2), %%mm3 \n\t"
2643 "pand %%mm7, %%mm0 \n\t"
2644 "pand %%mm7, %%mm1 \n\t"
2645 "pand %%mm7, %%mm2 \n\t"
2646 "pand %%mm7, %%mm3 \n\t"
2647 "packuswb %%mm1, %%mm0 \n\t"
2648 "packuswb %%mm3, %%mm2 \n\t"
2649 MOVNTQ
" %%mm0,-15(%2, %0) \n\t"
2650 MOVNTQ
" %%mm2,- 7(%2, %0) \n\t"
2654 : "r"(src
), "r"(dst
)
2660 dst
[count
]= src
[2*count
];
2665 static void RENAME(extract_even2
)(const uint8_t *src
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2675 "pcmpeqw %%mm7, %%mm7 \n\t"
2676 "psrlw $8, %%mm7 \n\t"
2678 "movq -28(%1, %0, 4), %%mm0 \n\t"
2679 "movq -20(%1, %0, 4), %%mm1 \n\t"
2680 "movq -12(%1, %0, 4), %%mm2 \n\t"
2681 "movq -4(%1, %0, 4), %%mm3 \n\t"
2682 "pand %%mm7, %%mm0 \n\t"
2683 "pand %%mm7, %%mm1 \n\t"
2684 "pand %%mm7, %%mm2 \n\t"
2685 "pand %%mm7, %%mm3 \n\t"
2686 "packuswb %%mm1, %%mm0 \n\t"
2687 "packuswb %%mm3, %%mm2 \n\t"
2688 "movq %%mm0, %%mm1 \n\t"
2689 "movq %%mm2, %%mm3 \n\t"
2690 "psrlw $8, %%mm0 \n\t"
2691 "psrlw $8, %%mm2 \n\t"
2692 "pand %%mm7, %%mm1 \n\t"
2693 "pand %%mm7, %%mm3 \n\t"
2694 "packuswb %%mm2, %%mm0 \n\t"
2695 "packuswb %%mm3, %%mm1 \n\t"
2696 MOVNTQ
" %%mm0,- 7(%3, %0) \n\t"
2697 MOVNTQ
" %%mm1,- 7(%2, %0) \n\t"
2701 : "r"(src
), "r"(dst0
), "r"(dst1
)
2707 dst0
[count
]= src
[4*count
+0];
2708 dst1
[count
]= src
[4*count
+2];
2713 static void RENAME(extract_even2avg
)(const uint8_t *src0
, const uint8_t *src1
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2724 "pcmpeqw %%mm7, %%mm7 \n\t"
2725 "psrlw $8, %%mm7 \n\t"
2727 "movq -28(%1, %0, 4), %%mm0 \n\t"
2728 "movq -20(%1, %0, 4), %%mm1 \n\t"
2729 "movq -12(%1, %0, 4), %%mm2 \n\t"
2730 "movq -4(%1, %0, 4), %%mm3 \n\t"
2731 PAVGB
" -28(%2, %0, 4), %%mm0 \n\t"
2732 PAVGB
" -20(%2, %0, 4), %%mm1 \n\t"
2733 PAVGB
" -12(%2, %0, 4), %%mm2 \n\t"
2734 PAVGB
" - 4(%2, %0, 4), %%mm3 \n\t"
2735 "pand %%mm7, %%mm0 \n\t"
2736 "pand %%mm7, %%mm1 \n\t"
2737 "pand %%mm7, %%mm2 \n\t"
2738 "pand %%mm7, %%mm3 \n\t"
2739 "packuswb %%mm1, %%mm0 \n\t"
2740 "packuswb %%mm3, %%mm2 \n\t"
2741 "movq %%mm0, %%mm1 \n\t"
2742 "movq %%mm2, %%mm3 \n\t"
2743 "psrlw $8, %%mm0 \n\t"
2744 "psrlw $8, %%mm2 \n\t"
2745 "pand %%mm7, %%mm1 \n\t"
2746 "pand %%mm7, %%mm3 \n\t"
2747 "packuswb %%mm2, %%mm0 \n\t"
2748 "packuswb %%mm3, %%mm1 \n\t"
2749 MOVNTQ
" %%mm0,- 7(%4, %0) \n\t"
2750 MOVNTQ
" %%mm1,- 7(%3, %0) \n\t"
2754 : "r"(src0
), "r"(src1
), "r"(dst0
), "r"(dst1
)
2760 dst0
[count
]= (src0
[4*count
+0]+src1
[4*count
+0])>>1;
2761 dst1
[count
]= (src0
[4*count
+2]+src1
[4*count
+2])>>1;
2766 static void RENAME(extract_odd2
)(const uint8_t *src
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2776 "pcmpeqw %%mm7, %%mm7 \n\t"
2777 "psrlw $8, %%mm7 \n\t"
2779 "movq -28(%1, %0, 4), %%mm0 \n\t"
2780 "movq -20(%1, %0, 4), %%mm1 \n\t"
2781 "movq -12(%1, %0, 4), %%mm2 \n\t"
2782 "movq -4(%1, %0, 4), %%mm3 \n\t"
2783 "psrlw $8, %%mm0 \n\t"
2784 "psrlw $8, %%mm1 \n\t"
2785 "psrlw $8, %%mm2 \n\t"
2786 "psrlw $8, %%mm3 \n\t"
2787 "packuswb %%mm1, %%mm0 \n\t"
2788 "packuswb %%mm3, %%mm2 \n\t"
2789 "movq %%mm0, %%mm1 \n\t"
2790 "movq %%mm2, %%mm3 \n\t"
2791 "psrlw $8, %%mm0 \n\t"
2792 "psrlw $8, %%mm2 \n\t"
2793 "pand %%mm7, %%mm1 \n\t"
2794 "pand %%mm7, %%mm3 \n\t"
2795 "packuswb %%mm2, %%mm0 \n\t"
2796 "packuswb %%mm3, %%mm1 \n\t"
2797 MOVNTQ
" %%mm0,- 7(%3, %0) \n\t"
2798 MOVNTQ
" %%mm1,- 7(%2, %0) \n\t"
2802 : "r"(src
), "r"(dst0
), "r"(dst1
)
2809 dst0
[count
]= src
[4*count
+0];
2810 dst1
[count
]= src
[4*count
+2];
2815 static void RENAME(extract_odd2avg
)(const uint8_t *src0
, const uint8_t *src1
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2826 "pcmpeqw %%mm7, %%mm7 \n\t"
2827 "psrlw $8, %%mm7 \n\t"
2829 "movq -28(%1, %0, 4), %%mm0 \n\t"
2830 "movq -20(%1, %0, 4), %%mm1 \n\t"
2831 "movq -12(%1, %0, 4), %%mm2 \n\t"
2832 "movq -4(%1, %0, 4), %%mm3 \n\t"
2833 PAVGB
" -28(%2, %0, 4), %%mm0 \n\t"
2834 PAVGB
" -20(%2, %0, 4), %%mm1 \n\t"
2835 PAVGB
" -12(%2, %0, 4), %%mm2 \n\t"
2836 PAVGB
" - 4(%2, %0, 4), %%mm3 \n\t"
2837 "psrlw $8, %%mm0 \n\t"
2838 "psrlw $8, %%mm1 \n\t"
2839 "psrlw $8, %%mm2 \n\t"
2840 "psrlw $8, %%mm3 \n\t"
2841 "packuswb %%mm1, %%mm0 \n\t"
2842 "packuswb %%mm3, %%mm2 \n\t"
2843 "movq %%mm0, %%mm1 \n\t"
2844 "movq %%mm2, %%mm3 \n\t"
2845 "psrlw $8, %%mm0 \n\t"
2846 "psrlw $8, %%mm2 \n\t"
2847 "pand %%mm7, %%mm1 \n\t"
2848 "pand %%mm7, %%mm3 \n\t"
2849 "packuswb %%mm2, %%mm0 \n\t"
2850 "packuswb %%mm3, %%mm1 \n\t"
2851 MOVNTQ
" %%mm0,- 7(%4, %0) \n\t"
2852 MOVNTQ
" %%mm1,- 7(%3, %0) \n\t"
2856 : "r"(src0
), "r"(src1
), "r"(dst0
), "r"(dst1
)
2864 dst0
[count
]= (src0
[4*count
+0]+src1
[4*count
+0])>>1;
2865 dst1
[count
]= (src0
[4*count
+2]+src1
[4*count
+2])>>1;
2870 static void RENAME(yuyvtoyuv420
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2871 long width
, long height
,
2872 long lumStride
, long chromStride
, long srcStride
)
2875 const long chromWidth
= -((-width
)>>1);
2877 for (y
=0; y
<height
; y
++) {
2878 RENAME(extract_even
)(src
, ydst
, width
);
2880 RENAME(extract_odd2avg
)(src
-srcStride
, src
, udst
, vdst
, chromWidth
);
2897 static void RENAME(yuyvtoyuv422
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2898 long width
, long height
,
2899 long lumStride
, long chromStride
, long srcStride
)
2902 const long chromWidth
= -((-width
)>>1);
2904 for (y
=0; y
<height
; y
++) {
2905 RENAME(extract_even
)(src
, ydst
, width
);
2906 RENAME(extract_odd2
)(src
, udst
, vdst
, chromWidth
);
2922 static void RENAME(uyvytoyuv420
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2923 long width
, long height
,
2924 long lumStride
, long chromStride
, long srcStride
)
2927 const long chromWidth
= -((-width
)>>1);
2929 for (y
=0; y
<height
; y
++) {
2930 RENAME(extract_even
)(src
+1, ydst
, width
);
2932 RENAME(extract_even2avg
)(src
-srcStride
, src
, udst
, vdst
, chromWidth
);
2949 static void RENAME(uyvytoyuv422
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2950 long width
, long height
,
2951 long lumStride
, long chromStride
, long srcStride
)
2954 const long chromWidth
= -((-width
)>>1);
2956 for (y
=0; y
<height
; y
++) {
2957 RENAME(extract_even
)(src
+1, ydst
, width
);
2958 RENAME(extract_even2
)(src
, udst
, vdst
, chromWidth
);
2974 static inline void RENAME(rgb2rgb_init
)(void)
2976 rgb15to16
= RENAME(rgb15to16
);
2977 rgb15tobgr24
= RENAME(rgb15tobgr24
);
2978 rgb15to32
= RENAME(rgb15to32
);
2979 rgb16tobgr24
= RENAME(rgb16tobgr24
);
2980 rgb16to32
= RENAME(rgb16to32
);
2981 rgb16to15
= RENAME(rgb16to15
);
2982 rgb24tobgr16
= RENAME(rgb24tobgr16
);
2983 rgb24tobgr15
= RENAME(rgb24tobgr15
);
2984 rgb24tobgr32
= RENAME(rgb24tobgr32
);
2985 rgb32to16
= RENAME(rgb32to16
);
2986 rgb32to15
= RENAME(rgb32to15
);
2987 rgb32tobgr24
= RENAME(rgb32tobgr24
);
2988 rgb24to15
= RENAME(rgb24to15
);
2989 rgb24to16
= RENAME(rgb24to16
);
2990 rgb24tobgr24
= RENAME(rgb24tobgr24
);
2991 rgb32tobgr32
= RENAME(rgb32tobgr32
);
2992 rgb32tobgr16
= RENAME(rgb32tobgr16
);
2993 rgb32tobgr15
= RENAME(rgb32tobgr15
);
2994 yv12toyuy2
= RENAME(yv12toyuy2
);
2995 yv12touyvy
= RENAME(yv12touyvy
);
2996 yuv422ptoyuy2
= RENAME(yuv422ptoyuy2
);
2997 yuv422ptouyvy
= RENAME(yuv422ptouyvy
);
2998 yuy2toyv12
= RENAME(yuy2toyv12
);
2999 // yvu9toyv12 = RENAME(yvu9toyv12);
3000 planar2x
= RENAME(planar2x
);
3001 rgb24toyv12
= RENAME(rgb24toyv12
);
3002 interleaveBytes
= RENAME(interleaveBytes
);
3003 vu9_to_vu12
= RENAME(vu9_to_vu12
);
3004 yvu9_to_yuy2
= RENAME(yvu9_to_yuy2
);
3006 uyvytoyuv420
= RENAME(uyvytoyuv420
);
3007 uyvytoyuv422
= RENAME(uyvytoyuv422
);
3008 yuyvtoyuv420
= RENAME(yuyvtoyuv420
);
3009 yuyvtoyuv422
= RENAME(yuyvtoyuv422
);