2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 * software YUV to YUV converter
5 * software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * The C code (not assembly, MMX, ...) of this file can be used
27 * under the LGPL license.
47 #define PREFETCH "prefetch"
48 #define PREFETCHW "prefetchw"
49 #define PAVGB "pavgusb"
51 #define PREFETCH "prefetchnta"
52 #define PREFETCHW "prefetcht0"
55 #define PREFETCH " # nop"
56 #define PREFETCHW " # nop"
60 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
67 #define MOVNTQ "movntq"
68 #define SFENCE "sfence"
71 #define SFENCE " # nop"
74 static inline void RENAME(rgb24tobgr32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
77 const uint8_t *s
= src
;
80 const uint8_t *mm_end
;
84 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
86 __asm__
volatile("movq %0, %%mm7"::"m"(mask32a
):"memory");
91 "punpckldq 3%1, %%mm0 \n\t"
92 "movd 6%1, %%mm1 \n\t"
93 "punpckldq 9%1, %%mm1 \n\t"
94 "movd 12%1, %%mm2 \n\t"
95 "punpckldq 15%1, %%mm2 \n\t"
96 "movd 18%1, %%mm3 \n\t"
97 "punpckldq 21%1, %%mm3 \n\t"
98 "por %%mm7, %%mm0 \n\t"
99 "por %%mm7, %%mm1 \n\t"
100 "por %%mm7, %%mm2 \n\t"
101 "por %%mm7, %%mm3 \n\t"
102 MOVNTQ
" %%mm0, %0 \n\t"
103 MOVNTQ
" %%mm1, 8%0 \n\t"
104 MOVNTQ
" %%mm2, 16%0 \n\t"
112 __asm__
volatile(SFENCE:::"memory");
113 __asm__
volatile(EMMS:::"memory");
117 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
132 static inline void RENAME(rgb32tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
135 const uint8_t *s
= src
;
138 const uint8_t *mm_end
;
142 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
147 "movq %1, %%mm0 \n\t"
148 "movq 8%1, %%mm1 \n\t"
149 "movq 16%1, %%mm4 \n\t"
150 "movq 24%1, %%mm5 \n\t"
151 "movq %%mm0, %%mm2 \n\t"
152 "movq %%mm1, %%mm3 \n\t"
153 "movq %%mm4, %%mm6 \n\t"
154 "movq %%mm5, %%mm7 \n\t"
155 "psrlq $8, %%mm2 \n\t"
156 "psrlq $8, %%mm3 \n\t"
157 "psrlq $8, %%mm6 \n\t"
158 "psrlq $8, %%mm7 \n\t"
159 "pand %2, %%mm0 \n\t"
160 "pand %2, %%mm1 \n\t"
161 "pand %2, %%mm4 \n\t"
162 "pand %2, %%mm5 \n\t"
163 "pand %3, %%mm2 \n\t"
164 "pand %3, %%mm3 \n\t"
165 "pand %3, %%mm6 \n\t"
166 "pand %3, %%mm7 \n\t"
167 "por %%mm2, %%mm0 \n\t"
168 "por %%mm3, %%mm1 \n\t"
169 "por %%mm6, %%mm4 \n\t"
170 "por %%mm7, %%mm5 \n\t"
172 "movq %%mm1, %%mm2 \n\t"
173 "movq %%mm4, %%mm3 \n\t"
174 "psllq $48, %%mm2 \n\t"
175 "psllq $32, %%mm3 \n\t"
176 "pand %4, %%mm2 \n\t"
177 "pand %5, %%mm3 \n\t"
178 "por %%mm2, %%mm0 \n\t"
179 "psrlq $16, %%mm1 \n\t"
180 "psrlq $32, %%mm4 \n\t"
181 "psllq $16, %%mm5 \n\t"
182 "por %%mm3, %%mm1 \n\t"
183 "pand %6, %%mm5 \n\t"
184 "por %%mm5, %%mm4 \n\t"
186 MOVNTQ
" %%mm0, %0 \n\t"
187 MOVNTQ
" %%mm1, 8%0 \n\t"
190 :"m"(*s
),"m"(mask24l
),
191 "m"(mask24h
),"m"(mask24hh
),"m"(mask24hhh
),"m"(mask24hhhh
)
196 __asm__
volatile(SFENCE:::"memory");
197 __asm__
volatile(EMMS:::"memory");
201 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
217 original by Strepto/Astral
218 ported to gcc & bugfixed: A'rpi
219 MMX2, 3DNOW optimization by Nick Kurshev
220 32-bit C version, and and&add trick by Michael Niedermayer
222 static inline void RENAME(rgb15to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
224 register const uint8_t* s
=src
;
225 register uint8_t* d
=dst
;
226 register const uint8_t *end
;
227 const uint8_t *mm_end
;
230 __asm__
volatile(PREFETCH
" %0"::"m"(*s
));
231 __asm__
volatile("movq %0, %%mm4"::"m"(mask15s
));
236 "movq %1, %%mm0 \n\t"
237 "movq 8%1, %%mm2 \n\t"
238 "movq %%mm0, %%mm1 \n\t"
239 "movq %%mm2, %%mm3 \n\t"
240 "pand %%mm4, %%mm0 \n\t"
241 "pand %%mm4, %%mm2 \n\t"
242 "paddw %%mm1, %%mm0 \n\t"
243 "paddw %%mm3, %%mm2 \n\t"
244 MOVNTQ
" %%mm0, %0 \n\t"
252 __asm__
volatile(SFENCE:::"memory");
253 __asm__
volatile(EMMS:::"memory");
257 register unsigned x
= *((const uint32_t *)s
);
258 *((uint32_t *)d
) = (x
&0x7FFF7FFF) + (x
&0x7FE07FE0);
263 register unsigned short x
= *((const uint16_t *)s
);
264 *((uint16_t *)d
) = (x
&0x7FFF) + (x
&0x7FE0);
268 static inline void RENAME(rgb16to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
270 register const uint8_t* s
=src
;
271 register uint8_t* d
=dst
;
272 register const uint8_t *end
;
273 const uint8_t *mm_end
;
276 __asm__
volatile(PREFETCH
" %0"::"m"(*s
));
277 __asm__
volatile("movq %0, %%mm7"::"m"(mask15rg
));
278 __asm__
volatile("movq %0, %%mm6"::"m"(mask15b
));
283 "movq %1, %%mm0 \n\t"
284 "movq 8%1, %%mm2 \n\t"
285 "movq %%mm0, %%mm1 \n\t"
286 "movq %%mm2, %%mm3 \n\t"
287 "psrlq $1, %%mm0 \n\t"
288 "psrlq $1, %%mm2 \n\t"
289 "pand %%mm7, %%mm0 \n\t"
290 "pand %%mm7, %%mm2 \n\t"
291 "pand %%mm6, %%mm1 \n\t"
292 "pand %%mm6, %%mm3 \n\t"
293 "por %%mm1, %%mm0 \n\t"
294 "por %%mm3, %%mm2 \n\t"
295 MOVNTQ
" %%mm0, %0 \n\t"
303 __asm__
volatile(SFENCE:::"memory");
304 __asm__
volatile(EMMS:::"memory");
308 register uint32_t x
= *((const uint32_t*)s
);
309 *((uint32_t *)d
) = ((x
>>1)&0x7FE07FE0) | (x
&0x001F001F);
314 register uint16_t x
= *((const uint16_t*)s
);
315 *((uint16_t *)d
) = ((x
>>1)&0x7FE0) | (x
&0x001F);
319 static inline void RENAME(rgb32to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
321 const uint8_t *s
= src
;
324 const uint8_t *mm_end
;
326 uint16_t *d
= (uint16_t *)dst
;
330 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
332 "movq %3, %%mm5 \n\t"
333 "movq %4, %%mm6 \n\t"
334 "movq %5, %%mm7 \n\t"
338 PREFETCH
" 32(%1) \n\t"
339 "movd (%1), %%mm0 \n\t"
340 "movd 4(%1), %%mm3 \n\t"
341 "punpckldq 8(%1), %%mm0 \n\t"
342 "punpckldq 12(%1), %%mm3 \n\t"
343 "movq %%mm0, %%mm1 \n\t"
344 "movq %%mm3, %%mm4 \n\t"
345 "pand %%mm6, %%mm0 \n\t"
346 "pand %%mm6, %%mm3 \n\t"
347 "pmaddwd %%mm7, %%mm0 \n\t"
348 "pmaddwd %%mm7, %%mm3 \n\t"
349 "pand %%mm5, %%mm1 \n\t"
350 "pand %%mm5, %%mm4 \n\t"
351 "por %%mm1, %%mm0 \n\t"
352 "por %%mm4, %%mm3 \n\t"
353 "psrld $5, %%mm0 \n\t"
354 "pslld $11, %%mm3 \n\t"
355 "por %%mm3, %%mm0 \n\t"
356 MOVNTQ
" %%mm0, (%0) \n\t"
363 : "r" (mm_end
), "m" (mask3216g
), "m" (mask3216br
), "m" (mul3216
)
366 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
368 "movq %0, %%mm7 \n\t"
369 "movq %1, %%mm6 \n\t"
370 ::"m"(red_16mask
),"m"(green_16mask
));
374 "movd %1, %%mm0 \n\t"
375 "movd 4%1, %%mm3 \n\t"
376 "punpckldq 8%1, %%mm0 \n\t"
377 "punpckldq 12%1, %%mm3 \n\t"
378 "movq %%mm0, %%mm1 \n\t"
379 "movq %%mm0, %%mm2 \n\t"
380 "movq %%mm3, %%mm4 \n\t"
381 "movq %%mm3, %%mm5 \n\t"
382 "psrlq $3, %%mm0 \n\t"
383 "psrlq $3, %%mm3 \n\t"
384 "pand %2, %%mm0 \n\t"
385 "pand %2, %%mm3 \n\t"
386 "psrlq $5, %%mm1 \n\t"
387 "psrlq $5, %%mm4 \n\t"
388 "pand %%mm6, %%mm1 \n\t"
389 "pand %%mm6, %%mm4 \n\t"
390 "psrlq $8, %%mm2 \n\t"
391 "psrlq $8, %%mm5 \n\t"
392 "pand %%mm7, %%mm2 \n\t"
393 "pand %%mm7, %%mm5 \n\t"
394 "por %%mm1, %%mm0 \n\t"
395 "por %%mm4, %%mm3 \n\t"
396 "por %%mm2, %%mm0 \n\t"
397 "por %%mm5, %%mm3 \n\t"
398 "psllq $16, %%mm3 \n\t"
399 "por %%mm3, %%mm0 \n\t"
400 MOVNTQ
" %%mm0, %0 \n\t"
401 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
406 __asm__
volatile(SFENCE:::"memory");
407 __asm__
volatile(EMMS:::"memory");
410 register int rgb
= *(const uint32_t*)s
; s
+= 4;
411 *d
++ = ((rgb
&0xFF)>>3) + ((rgb
&0xFC00)>>5) + ((rgb
&0xF80000)>>8);
415 static inline void RENAME(rgb32tobgr16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
417 const uint8_t *s
= src
;
420 const uint8_t *mm_end
;
422 uint16_t *d
= (uint16_t *)dst
;
425 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
427 "movq %0, %%mm7 \n\t"
428 "movq %1, %%mm6 \n\t"
429 ::"m"(red_16mask
),"m"(green_16mask
));
434 "movd %1, %%mm0 \n\t"
435 "movd 4%1, %%mm3 \n\t"
436 "punpckldq 8%1, %%mm0 \n\t"
437 "punpckldq 12%1, %%mm3 \n\t"
438 "movq %%mm0, %%mm1 \n\t"
439 "movq %%mm0, %%mm2 \n\t"
440 "movq %%mm3, %%mm4 \n\t"
441 "movq %%mm3, %%mm5 \n\t"
442 "psllq $8, %%mm0 \n\t"
443 "psllq $8, %%mm3 \n\t"
444 "pand %%mm7, %%mm0 \n\t"
445 "pand %%mm7, %%mm3 \n\t"
446 "psrlq $5, %%mm1 \n\t"
447 "psrlq $5, %%mm4 \n\t"
448 "pand %%mm6, %%mm1 \n\t"
449 "pand %%mm6, %%mm4 \n\t"
450 "psrlq $19, %%mm2 \n\t"
451 "psrlq $19, %%mm5 \n\t"
452 "pand %2, %%mm2 \n\t"
453 "pand %2, %%mm5 \n\t"
454 "por %%mm1, %%mm0 \n\t"
455 "por %%mm4, %%mm3 \n\t"
456 "por %%mm2, %%mm0 \n\t"
457 "por %%mm5, %%mm3 \n\t"
458 "psllq $16, %%mm3 \n\t"
459 "por %%mm3, %%mm0 \n\t"
460 MOVNTQ
" %%mm0, %0 \n\t"
461 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
465 __asm__
volatile(SFENCE:::"memory");
466 __asm__
volatile(EMMS:::"memory");
469 register int rgb
= *(const uint32_t*)s
; s
+= 4;
470 *d
++ = ((rgb
&0xF8)<<8) + ((rgb
&0xFC00)>>5) + ((rgb
&0xF80000)>>19);
474 static inline void RENAME(rgb32to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
476 const uint8_t *s
= src
;
479 const uint8_t *mm_end
;
481 uint16_t *d
= (uint16_t *)dst
;
485 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
487 "movq %3, %%mm5 \n\t"
488 "movq %4, %%mm6 \n\t"
489 "movq %5, %%mm7 \n\t"
493 PREFETCH
" 32(%1) \n\t"
494 "movd (%1), %%mm0 \n\t"
495 "movd 4(%1), %%mm3 \n\t"
496 "punpckldq 8(%1), %%mm0 \n\t"
497 "punpckldq 12(%1), %%mm3 \n\t"
498 "movq %%mm0, %%mm1 \n\t"
499 "movq %%mm3, %%mm4 \n\t"
500 "pand %%mm6, %%mm0 \n\t"
501 "pand %%mm6, %%mm3 \n\t"
502 "pmaddwd %%mm7, %%mm0 \n\t"
503 "pmaddwd %%mm7, %%mm3 \n\t"
504 "pand %%mm5, %%mm1 \n\t"
505 "pand %%mm5, %%mm4 \n\t"
506 "por %%mm1, %%mm0 \n\t"
507 "por %%mm4, %%mm3 \n\t"
508 "psrld $6, %%mm0 \n\t"
509 "pslld $10, %%mm3 \n\t"
510 "por %%mm3, %%mm0 \n\t"
511 MOVNTQ
" %%mm0, (%0) \n\t"
518 : "r" (mm_end
), "m" (mask3215g
), "m" (mask3216br
), "m" (mul3215
)
521 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
523 "movq %0, %%mm7 \n\t"
524 "movq %1, %%mm6 \n\t"
525 ::"m"(red_15mask
),"m"(green_15mask
));
529 "movd %1, %%mm0 \n\t"
530 "movd 4%1, %%mm3 \n\t"
531 "punpckldq 8%1, %%mm0 \n\t"
532 "punpckldq 12%1, %%mm3 \n\t"
533 "movq %%mm0, %%mm1 \n\t"
534 "movq %%mm0, %%mm2 \n\t"
535 "movq %%mm3, %%mm4 \n\t"
536 "movq %%mm3, %%mm5 \n\t"
537 "psrlq $3, %%mm0 \n\t"
538 "psrlq $3, %%mm3 \n\t"
539 "pand %2, %%mm0 \n\t"
540 "pand %2, %%mm3 \n\t"
541 "psrlq $6, %%mm1 \n\t"
542 "psrlq $6, %%mm4 \n\t"
543 "pand %%mm6, %%mm1 \n\t"
544 "pand %%mm6, %%mm4 \n\t"
545 "psrlq $9, %%mm2 \n\t"
546 "psrlq $9, %%mm5 \n\t"
547 "pand %%mm7, %%mm2 \n\t"
548 "pand %%mm7, %%mm5 \n\t"
549 "por %%mm1, %%mm0 \n\t"
550 "por %%mm4, %%mm3 \n\t"
551 "por %%mm2, %%mm0 \n\t"
552 "por %%mm5, %%mm3 \n\t"
553 "psllq $16, %%mm3 \n\t"
554 "por %%mm3, %%mm0 \n\t"
555 MOVNTQ
" %%mm0, %0 \n\t"
556 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
561 __asm__
volatile(SFENCE:::"memory");
562 __asm__
volatile(EMMS:::"memory");
565 register int rgb
= *(const uint32_t*)s
; s
+= 4;
566 *d
++ = ((rgb
&0xFF)>>3) + ((rgb
&0xF800)>>6) + ((rgb
&0xF80000)>>9);
570 static inline void RENAME(rgb32tobgr15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
572 const uint8_t *s
= src
;
575 const uint8_t *mm_end
;
577 uint16_t *d
= (uint16_t *)dst
;
580 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
582 "movq %0, %%mm7 \n\t"
583 "movq %1, %%mm6 \n\t"
584 ::"m"(red_15mask
),"m"(green_15mask
));
589 "movd %1, %%mm0 \n\t"
590 "movd 4%1, %%mm3 \n\t"
591 "punpckldq 8%1, %%mm0 \n\t"
592 "punpckldq 12%1, %%mm3 \n\t"
593 "movq %%mm0, %%mm1 \n\t"
594 "movq %%mm0, %%mm2 \n\t"
595 "movq %%mm3, %%mm4 \n\t"
596 "movq %%mm3, %%mm5 \n\t"
597 "psllq $7, %%mm0 \n\t"
598 "psllq $7, %%mm3 \n\t"
599 "pand %%mm7, %%mm0 \n\t"
600 "pand %%mm7, %%mm3 \n\t"
601 "psrlq $6, %%mm1 \n\t"
602 "psrlq $6, %%mm4 \n\t"
603 "pand %%mm6, %%mm1 \n\t"
604 "pand %%mm6, %%mm4 \n\t"
605 "psrlq $19, %%mm2 \n\t"
606 "psrlq $19, %%mm5 \n\t"
607 "pand %2, %%mm2 \n\t"
608 "pand %2, %%mm5 \n\t"
609 "por %%mm1, %%mm0 \n\t"
610 "por %%mm4, %%mm3 \n\t"
611 "por %%mm2, %%mm0 \n\t"
612 "por %%mm5, %%mm3 \n\t"
613 "psllq $16, %%mm3 \n\t"
614 "por %%mm3, %%mm0 \n\t"
615 MOVNTQ
" %%mm0, %0 \n\t"
616 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
620 __asm__
volatile(SFENCE:::"memory");
621 __asm__
volatile(EMMS:::"memory");
624 register int rgb
= *(const uint32_t*)s
; s
+= 4;
625 *d
++ = ((rgb
&0xF8)<<7) + ((rgb
&0xF800)>>6) + ((rgb
&0xF80000)>>19);
629 static inline void RENAME(rgb24tobgr16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
631 const uint8_t *s
= src
;
634 const uint8_t *mm_end
;
636 uint16_t *d
= (uint16_t *)dst
;
639 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
641 "movq %0, %%mm7 \n\t"
642 "movq %1, %%mm6 \n\t"
643 ::"m"(red_16mask
),"m"(green_16mask
));
648 "movd %1, %%mm0 \n\t"
649 "movd 3%1, %%mm3 \n\t"
650 "punpckldq 6%1, %%mm0 \n\t"
651 "punpckldq 9%1, %%mm3 \n\t"
652 "movq %%mm0, %%mm1 \n\t"
653 "movq %%mm0, %%mm2 \n\t"
654 "movq %%mm3, %%mm4 \n\t"
655 "movq %%mm3, %%mm5 \n\t"
656 "psrlq $3, %%mm0 \n\t"
657 "psrlq $3, %%mm3 \n\t"
658 "pand %2, %%mm0 \n\t"
659 "pand %2, %%mm3 \n\t"
660 "psrlq $5, %%mm1 \n\t"
661 "psrlq $5, %%mm4 \n\t"
662 "pand %%mm6, %%mm1 \n\t"
663 "pand %%mm6, %%mm4 \n\t"
664 "psrlq $8, %%mm2 \n\t"
665 "psrlq $8, %%mm5 \n\t"
666 "pand %%mm7, %%mm2 \n\t"
667 "pand %%mm7, %%mm5 \n\t"
668 "por %%mm1, %%mm0 \n\t"
669 "por %%mm4, %%mm3 \n\t"
670 "por %%mm2, %%mm0 \n\t"
671 "por %%mm5, %%mm3 \n\t"
672 "psllq $16, %%mm3 \n\t"
673 "por %%mm3, %%mm0 \n\t"
674 MOVNTQ
" %%mm0, %0 \n\t"
675 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
679 __asm__
volatile(SFENCE:::"memory");
680 __asm__
volatile(EMMS:::"memory");
686 *d
++ = (b
>>3) | ((g
&0xFC)<<3) | ((r
&0xF8)<<8);
690 static inline void RENAME(rgb24to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
692 const uint8_t *s
= src
;
695 const uint8_t *mm_end
;
697 uint16_t *d
= (uint16_t *)dst
;
700 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
702 "movq %0, %%mm7 \n\t"
703 "movq %1, %%mm6 \n\t"
704 ::"m"(red_16mask
),"m"(green_16mask
));
709 "movd %1, %%mm0 \n\t"
710 "movd 3%1, %%mm3 \n\t"
711 "punpckldq 6%1, %%mm0 \n\t"
712 "punpckldq 9%1, %%mm3 \n\t"
713 "movq %%mm0, %%mm1 \n\t"
714 "movq %%mm0, %%mm2 \n\t"
715 "movq %%mm3, %%mm4 \n\t"
716 "movq %%mm3, %%mm5 \n\t"
717 "psllq $8, %%mm0 \n\t"
718 "psllq $8, %%mm3 \n\t"
719 "pand %%mm7, %%mm0 \n\t"
720 "pand %%mm7, %%mm3 \n\t"
721 "psrlq $5, %%mm1 \n\t"
722 "psrlq $5, %%mm4 \n\t"
723 "pand %%mm6, %%mm1 \n\t"
724 "pand %%mm6, %%mm4 \n\t"
725 "psrlq $19, %%mm2 \n\t"
726 "psrlq $19, %%mm5 \n\t"
727 "pand %2, %%mm2 \n\t"
728 "pand %2, %%mm5 \n\t"
729 "por %%mm1, %%mm0 \n\t"
730 "por %%mm4, %%mm3 \n\t"
731 "por %%mm2, %%mm0 \n\t"
732 "por %%mm5, %%mm3 \n\t"
733 "psllq $16, %%mm3 \n\t"
734 "por %%mm3, %%mm0 \n\t"
735 MOVNTQ
" %%mm0, %0 \n\t"
736 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
740 __asm__
volatile(SFENCE:::"memory");
741 __asm__
volatile(EMMS:::"memory");
747 *d
++ = (b
>>3) | ((g
&0xFC)<<3) | ((r
&0xF8)<<8);
751 static inline void RENAME(rgb24tobgr15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
753 const uint8_t *s
= src
;
756 const uint8_t *mm_end
;
758 uint16_t *d
= (uint16_t *)dst
;
761 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
763 "movq %0, %%mm7 \n\t"
764 "movq %1, %%mm6 \n\t"
765 ::"m"(red_15mask
),"m"(green_15mask
));
770 "movd %1, %%mm0 \n\t"
771 "movd 3%1, %%mm3 \n\t"
772 "punpckldq 6%1, %%mm0 \n\t"
773 "punpckldq 9%1, %%mm3 \n\t"
774 "movq %%mm0, %%mm1 \n\t"
775 "movq %%mm0, %%mm2 \n\t"
776 "movq %%mm3, %%mm4 \n\t"
777 "movq %%mm3, %%mm5 \n\t"
778 "psrlq $3, %%mm0 \n\t"
779 "psrlq $3, %%mm3 \n\t"
780 "pand %2, %%mm0 \n\t"
781 "pand %2, %%mm3 \n\t"
782 "psrlq $6, %%mm1 \n\t"
783 "psrlq $6, %%mm4 \n\t"
784 "pand %%mm6, %%mm1 \n\t"
785 "pand %%mm6, %%mm4 \n\t"
786 "psrlq $9, %%mm2 \n\t"
787 "psrlq $9, %%mm5 \n\t"
788 "pand %%mm7, %%mm2 \n\t"
789 "pand %%mm7, %%mm5 \n\t"
790 "por %%mm1, %%mm0 \n\t"
791 "por %%mm4, %%mm3 \n\t"
792 "por %%mm2, %%mm0 \n\t"
793 "por %%mm5, %%mm3 \n\t"
794 "psllq $16, %%mm3 \n\t"
795 "por %%mm3, %%mm0 \n\t"
796 MOVNTQ
" %%mm0, %0 \n\t"
797 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
801 __asm__
volatile(SFENCE:::"memory");
802 __asm__
volatile(EMMS:::"memory");
808 *d
++ = (b
>>3) | ((g
&0xF8)<<2) | ((r
&0xF8)<<7);
812 static inline void RENAME(rgb24to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
814 const uint8_t *s
= src
;
817 const uint8_t *mm_end
;
819 uint16_t *d
= (uint16_t *)dst
;
822 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
824 "movq %0, %%mm7 \n\t"
825 "movq %1, %%mm6 \n\t"
826 ::"m"(red_15mask
),"m"(green_15mask
));
831 "movd %1, %%mm0 \n\t"
832 "movd 3%1, %%mm3 \n\t"
833 "punpckldq 6%1, %%mm0 \n\t"
834 "punpckldq 9%1, %%mm3 \n\t"
835 "movq %%mm0, %%mm1 \n\t"
836 "movq %%mm0, %%mm2 \n\t"
837 "movq %%mm3, %%mm4 \n\t"
838 "movq %%mm3, %%mm5 \n\t"
839 "psllq $7, %%mm0 \n\t"
840 "psllq $7, %%mm3 \n\t"
841 "pand %%mm7, %%mm0 \n\t"
842 "pand %%mm7, %%mm3 \n\t"
843 "psrlq $6, %%mm1 \n\t"
844 "psrlq $6, %%mm4 \n\t"
845 "pand %%mm6, %%mm1 \n\t"
846 "pand %%mm6, %%mm4 \n\t"
847 "psrlq $19, %%mm2 \n\t"
848 "psrlq $19, %%mm5 \n\t"
849 "pand %2, %%mm2 \n\t"
850 "pand %2, %%mm5 \n\t"
851 "por %%mm1, %%mm0 \n\t"
852 "por %%mm4, %%mm3 \n\t"
853 "por %%mm2, %%mm0 \n\t"
854 "por %%mm5, %%mm3 \n\t"
855 "psllq $16, %%mm3 \n\t"
856 "por %%mm3, %%mm0 \n\t"
857 MOVNTQ
" %%mm0, %0 \n\t"
858 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
862 __asm__
volatile(SFENCE:::"memory");
863 __asm__
volatile(EMMS:::"memory");
869 *d
++ = (b
>>3) | ((g
&0xF8)<<2) | ((r
&0xF8)<<7);
874 I use less accurate approximation here by simply left-shifting the input
875 value and filling the low order bits with zeroes. This method improves PNG
876 compression but this scheme cannot reproduce white exactly, since it does
877 not generate an all-ones maximum value; the net effect is to darken the
880 The better method should be "left bit replication":
890 | leftmost bits repeated to fill open bits
894 static inline void RENAME(rgb15tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
898 const uint16_t *mm_end
;
901 const uint16_t *s
= (const uint16_t*)src
;
902 end
= s
+ src_size
/2;
904 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
909 "movq %1, %%mm0 \n\t"
910 "movq %1, %%mm1 \n\t"
911 "movq %1, %%mm2 \n\t"
912 "pand %2, %%mm0 \n\t"
913 "pand %3, %%mm1 \n\t"
914 "pand %4, %%mm2 \n\t"
915 "psllq $3, %%mm0 \n\t"
916 "psrlq $2, %%mm1 \n\t"
917 "psrlq $7, %%mm2 \n\t"
918 "movq %%mm0, %%mm3 \n\t"
919 "movq %%mm1, %%mm4 \n\t"
920 "movq %%mm2, %%mm5 \n\t"
921 "punpcklwd %5, %%mm0 \n\t"
922 "punpcklwd %5, %%mm1 \n\t"
923 "punpcklwd %5, %%mm2 \n\t"
924 "punpckhwd %5, %%mm3 \n\t"
925 "punpckhwd %5, %%mm4 \n\t"
926 "punpckhwd %5, %%mm5 \n\t"
927 "psllq $8, %%mm1 \n\t"
928 "psllq $16, %%mm2 \n\t"
929 "por %%mm1, %%mm0 \n\t"
930 "por %%mm2, %%mm0 \n\t"
931 "psllq $8, %%mm4 \n\t"
932 "psllq $16, %%mm5 \n\t"
933 "por %%mm4, %%mm3 \n\t"
934 "por %%mm5, %%mm3 \n\t"
936 "movq %%mm0, %%mm6 \n\t"
937 "movq %%mm3, %%mm7 \n\t"
939 "movq 8%1, %%mm0 \n\t"
940 "movq 8%1, %%mm1 \n\t"
941 "movq 8%1, %%mm2 \n\t"
942 "pand %2, %%mm0 \n\t"
943 "pand %3, %%mm1 \n\t"
944 "pand %4, %%mm2 \n\t"
945 "psllq $3, %%mm0 \n\t"
946 "psrlq $2, %%mm1 \n\t"
947 "psrlq $7, %%mm2 \n\t"
948 "movq %%mm0, %%mm3 \n\t"
949 "movq %%mm1, %%mm4 \n\t"
950 "movq %%mm2, %%mm5 \n\t"
951 "punpcklwd %5, %%mm0 \n\t"
952 "punpcklwd %5, %%mm1 \n\t"
953 "punpcklwd %5, %%mm2 \n\t"
954 "punpckhwd %5, %%mm3 \n\t"
955 "punpckhwd %5, %%mm4 \n\t"
956 "punpckhwd %5, %%mm5 \n\t"
957 "psllq $8, %%mm1 \n\t"
958 "psllq $16, %%mm2 \n\t"
959 "por %%mm1, %%mm0 \n\t"
960 "por %%mm2, %%mm0 \n\t"
961 "psllq $8, %%mm4 \n\t"
962 "psllq $16, %%mm5 \n\t"
963 "por %%mm4, %%mm3 \n\t"
964 "por %%mm5, %%mm3 \n\t"
967 :"m"(*s
),"m"(mask15b
),"m"(mask15g
),"m"(mask15r
), "m"(mmx_null
)
969 /* borrowed 32 to 24 */
971 "movq %%mm0, %%mm4 \n\t"
972 "movq %%mm3, %%mm5 \n\t"
973 "movq %%mm6, %%mm0 \n\t"
974 "movq %%mm7, %%mm1 \n\t"
976 "movq %%mm4, %%mm6 \n\t"
977 "movq %%mm5, %%mm7 \n\t"
978 "movq %%mm0, %%mm2 \n\t"
979 "movq %%mm1, %%mm3 \n\t"
981 "psrlq $8, %%mm2 \n\t"
982 "psrlq $8, %%mm3 \n\t"
983 "psrlq $8, %%mm6 \n\t"
984 "psrlq $8, %%mm7 \n\t"
985 "pand %2, %%mm0 \n\t"
986 "pand %2, %%mm1 \n\t"
987 "pand %2, %%mm4 \n\t"
988 "pand %2, %%mm5 \n\t"
989 "pand %3, %%mm2 \n\t"
990 "pand %3, %%mm3 \n\t"
991 "pand %3, %%mm6 \n\t"
992 "pand %3, %%mm7 \n\t"
993 "por %%mm2, %%mm0 \n\t"
994 "por %%mm3, %%mm1 \n\t"
995 "por %%mm6, %%mm4 \n\t"
996 "por %%mm7, %%mm5 \n\t"
998 "movq %%mm1, %%mm2 \n\t"
999 "movq %%mm4, %%mm3 \n\t"
1000 "psllq $48, %%mm2 \n\t"
1001 "psllq $32, %%mm3 \n\t"
1002 "pand %4, %%mm2 \n\t"
1003 "pand %5, %%mm3 \n\t"
1004 "por %%mm2, %%mm0 \n\t"
1005 "psrlq $16, %%mm1 \n\t"
1006 "psrlq $32, %%mm4 \n\t"
1007 "psllq $16, %%mm5 \n\t"
1008 "por %%mm3, %%mm1 \n\t"
1009 "pand %6, %%mm5 \n\t"
1010 "por %%mm5, %%mm4 \n\t"
1012 MOVNTQ
" %%mm0, %0 \n\t"
1013 MOVNTQ
" %%mm1, 8%0 \n\t"
1014 MOVNTQ
" %%mm4, 16%0"
1017 :"m"(*s
),"m"(mask24l
),"m"(mask24h
),"m"(mask24hh
),"m"(mask24hhh
),"m"(mask24hhhh
)
1022 __asm__
volatile(SFENCE:::"memory");
1023 __asm__
volatile(EMMS:::"memory");
1026 register uint16_t bgr
;
1028 *d
++ = (bgr
&0x1F)<<3;
1029 *d
++ = (bgr
&0x3E0)>>2;
1030 *d
++ = (bgr
&0x7C00)>>7;
1034 static inline void RENAME(rgb16tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1036 const uint16_t *end
;
1038 const uint16_t *mm_end
;
1040 uint8_t *d
= (uint8_t *)dst
;
1041 const uint16_t *s
= (const uint16_t *)src
;
1042 end
= s
+ src_size
/2;
1044 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1046 while (s
< mm_end
) {
1048 PREFETCH
" 32%1 \n\t"
1049 "movq %1, %%mm0 \n\t"
1050 "movq %1, %%mm1 \n\t"
1051 "movq %1, %%mm2 \n\t"
1052 "pand %2, %%mm0 \n\t"
1053 "pand %3, %%mm1 \n\t"
1054 "pand %4, %%mm2 \n\t"
1055 "psllq $3, %%mm0 \n\t"
1056 "psrlq $3, %%mm1 \n\t"
1057 "psrlq $8, %%mm2 \n\t"
1058 "movq %%mm0, %%mm3 \n\t"
1059 "movq %%mm1, %%mm4 \n\t"
1060 "movq %%mm2, %%mm5 \n\t"
1061 "punpcklwd %5, %%mm0 \n\t"
1062 "punpcklwd %5, %%mm1 \n\t"
1063 "punpcklwd %5, %%mm2 \n\t"
1064 "punpckhwd %5, %%mm3 \n\t"
1065 "punpckhwd %5, %%mm4 \n\t"
1066 "punpckhwd %5, %%mm5 \n\t"
1067 "psllq $8, %%mm1 \n\t"
1068 "psllq $16, %%mm2 \n\t"
1069 "por %%mm1, %%mm0 \n\t"
1070 "por %%mm2, %%mm0 \n\t"
1071 "psllq $8, %%mm4 \n\t"
1072 "psllq $16, %%mm5 \n\t"
1073 "por %%mm4, %%mm3 \n\t"
1074 "por %%mm5, %%mm3 \n\t"
1076 "movq %%mm0, %%mm6 \n\t"
1077 "movq %%mm3, %%mm7 \n\t"
1079 "movq 8%1, %%mm0 \n\t"
1080 "movq 8%1, %%mm1 \n\t"
1081 "movq 8%1, %%mm2 \n\t"
1082 "pand %2, %%mm0 \n\t"
1083 "pand %3, %%mm1 \n\t"
1084 "pand %4, %%mm2 \n\t"
1085 "psllq $3, %%mm0 \n\t"
1086 "psrlq $3, %%mm1 \n\t"
1087 "psrlq $8, %%mm2 \n\t"
1088 "movq %%mm0, %%mm3 \n\t"
1089 "movq %%mm1, %%mm4 \n\t"
1090 "movq %%mm2, %%mm5 \n\t"
1091 "punpcklwd %5, %%mm0 \n\t"
1092 "punpcklwd %5, %%mm1 \n\t"
1093 "punpcklwd %5, %%mm2 \n\t"
1094 "punpckhwd %5, %%mm3 \n\t"
1095 "punpckhwd %5, %%mm4 \n\t"
1096 "punpckhwd %5, %%mm5 \n\t"
1097 "psllq $8, %%mm1 \n\t"
1098 "psllq $16, %%mm2 \n\t"
1099 "por %%mm1, %%mm0 \n\t"
1100 "por %%mm2, %%mm0 \n\t"
1101 "psllq $8, %%mm4 \n\t"
1102 "psllq $16, %%mm5 \n\t"
1103 "por %%mm4, %%mm3 \n\t"
1104 "por %%mm5, %%mm3 \n\t"
1106 :"m"(*s
),"m"(mask16b
),"m"(mask16g
),"m"(mask16r
),"m"(mmx_null
)
1108 /* borrowed 32 to 24 */
1110 "movq %%mm0, %%mm4 \n\t"
1111 "movq %%mm3, %%mm5 \n\t"
1112 "movq %%mm6, %%mm0 \n\t"
1113 "movq %%mm7, %%mm1 \n\t"
1115 "movq %%mm4, %%mm6 \n\t"
1116 "movq %%mm5, %%mm7 \n\t"
1117 "movq %%mm0, %%mm2 \n\t"
1118 "movq %%mm1, %%mm3 \n\t"
1120 "psrlq $8, %%mm2 \n\t"
1121 "psrlq $8, %%mm3 \n\t"
1122 "psrlq $8, %%mm6 \n\t"
1123 "psrlq $8, %%mm7 \n\t"
1124 "pand %2, %%mm0 \n\t"
1125 "pand %2, %%mm1 \n\t"
1126 "pand %2, %%mm4 \n\t"
1127 "pand %2, %%mm5 \n\t"
1128 "pand %3, %%mm2 \n\t"
1129 "pand %3, %%mm3 \n\t"
1130 "pand %3, %%mm6 \n\t"
1131 "pand %3, %%mm7 \n\t"
1132 "por %%mm2, %%mm0 \n\t"
1133 "por %%mm3, %%mm1 \n\t"
1134 "por %%mm6, %%mm4 \n\t"
1135 "por %%mm7, %%mm5 \n\t"
1137 "movq %%mm1, %%mm2 \n\t"
1138 "movq %%mm4, %%mm3 \n\t"
1139 "psllq $48, %%mm2 \n\t"
1140 "psllq $32, %%mm3 \n\t"
1141 "pand %4, %%mm2 \n\t"
1142 "pand %5, %%mm3 \n\t"
1143 "por %%mm2, %%mm0 \n\t"
1144 "psrlq $16, %%mm1 \n\t"
1145 "psrlq $32, %%mm4 \n\t"
1146 "psllq $16, %%mm5 \n\t"
1147 "por %%mm3, %%mm1 \n\t"
1148 "pand %6, %%mm5 \n\t"
1149 "por %%mm5, %%mm4 \n\t"
1151 MOVNTQ
" %%mm0, %0 \n\t"
1152 MOVNTQ
" %%mm1, 8%0 \n\t"
1153 MOVNTQ
" %%mm4, 16%0"
1156 :"m"(*s
),"m"(mask24l
),"m"(mask24h
),"m"(mask24hh
),"m"(mask24hhh
),"m"(mask24hhhh
)
1161 __asm__
volatile(SFENCE:::"memory");
1162 __asm__
volatile(EMMS:::"memory");
1165 register uint16_t bgr
;
1167 *d
++ = (bgr
&0x1F)<<3;
1168 *d
++ = (bgr
&0x7E0)>>3;
1169 *d
++ = (bgr
&0xF800)>>8;
1174 * mm0 = 00 B3 00 B2 00 B1 00 B0
1175 * mm1 = 00 G3 00 G2 00 G1 00 G0
1176 * mm2 = 00 R3 00 R2 00 R1 00 R0
1177 * mm6 = FF FF FF FF FF FF FF FF
1178 * mm7 = 00 00 00 00 00 00 00 00
1180 #define PACK_RGB32 \
1181 "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
1182 "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
1183 "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
1184 "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
1185 "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
1186 "movq %%mm0, %%mm3 \n\t" \
1187 "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
1188 "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
1189 MOVNTQ" %%mm0, %0 \n\t" \
1190 MOVNTQ" %%mm3, 8%0 \n\t" \
1192 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
1194 const uint16_t *end
;
1196 const uint16_t *mm_end
;
1199 const uint16_t *s
= (const uint16_t *)src
;
1200 end
= s
+ src_size
/2;
1202 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1203 __asm__
volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1204 __asm__
volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1206 while (s
< mm_end
) {
1208 PREFETCH
" 32%1 \n\t"
1209 "movq %1, %%mm0 \n\t"
1210 "movq %1, %%mm1 \n\t"
1211 "movq %1, %%mm2 \n\t"
1212 "pand %2, %%mm0 \n\t"
1213 "pand %3, %%mm1 \n\t"
1214 "pand %4, %%mm2 \n\t"
1215 "psllq $3, %%mm0 \n\t"
1216 "psrlq $2, %%mm1 \n\t"
1217 "psrlq $7, %%mm2 \n\t"
1220 :"m"(*s
),"m"(mask15b
),"m"(mask15g
),"m"(mask15r
)
1225 __asm__
volatile(SFENCE:::"memory");
1226 __asm__
volatile(EMMS:::"memory");
1229 register uint16_t bgr
;
1233 *d
++ = (bgr
&0x7C00)>>7;
1234 *d
++ = (bgr
&0x3E0)>>2;
1235 *d
++ = (bgr
&0x1F)<<3;
1237 *d
++ = (bgr
&0x1F)<<3;
1238 *d
++ = (bgr
&0x3E0)>>2;
1239 *d
++ = (bgr
&0x7C00)>>7;
1245 static inline void RENAME(rgb16to32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1247 const uint16_t *end
;
1249 const uint16_t *mm_end
;
1252 const uint16_t *s
= (const uint16_t*)src
;
1253 end
= s
+ src_size
/2;
1255 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1256 __asm__
volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1257 __asm__
volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1259 while (s
< mm_end
) {
1261 PREFETCH
" 32%1 \n\t"
1262 "movq %1, %%mm0 \n\t"
1263 "movq %1, %%mm1 \n\t"
1264 "movq %1, %%mm2 \n\t"
1265 "pand %2, %%mm0 \n\t"
1266 "pand %3, %%mm1 \n\t"
1267 "pand %4, %%mm2 \n\t"
1268 "psllq $3, %%mm0 \n\t"
1269 "psrlq $3, %%mm1 \n\t"
1270 "psrlq $8, %%mm2 \n\t"
1273 :"m"(*s
),"m"(mask16b
),"m"(mask16g
),"m"(mask16r
)
1278 __asm__
volatile(SFENCE:::"memory");
1279 __asm__
volatile(EMMS:::"memory");
1282 register uint16_t bgr
;
1286 *d
++ = (bgr
&0xF800)>>8;
1287 *d
++ = (bgr
&0x7E0)>>3;
1288 *d
++ = (bgr
&0x1F)<<3;
1290 *d
++ = (bgr
&0x1F)<<3;
1291 *d
++ = (bgr
&0x7E0)>>3;
1292 *d
++ = (bgr
&0xF800)>>8;
1298 static inline void RENAME(rgb32tobgr32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1300 x86_reg idx
= 15 - src_size
;
1301 const uint8_t *s
= src
-idx
;
1302 uint8_t *d
= dst
-idx
;
1307 PREFETCH
" (%1, %0) \n\t"
1308 "movq %3, %%mm7 \n\t"
1309 "pxor %4, %%mm7 \n\t"
1310 "movq %%mm7, %%mm6 \n\t"
1311 "pxor %5, %%mm7 \n\t"
1314 PREFETCH
" 32(%1, %0) \n\t"
1315 "movq (%1, %0), %%mm0 \n\t"
1316 "movq 8(%1, %0), %%mm1 \n\t"
1318 "pshufw $177, %%mm0, %%mm3 \n\t"
1319 "pshufw $177, %%mm1, %%mm5 \n\t"
1320 "pand %%mm7, %%mm0 \n\t"
1321 "pand %%mm6, %%mm3 \n\t"
1322 "pand %%mm7, %%mm1 \n\t"
1323 "pand %%mm6, %%mm5 \n\t"
1324 "por %%mm3, %%mm0 \n\t"
1325 "por %%mm5, %%mm1 \n\t"
1327 "movq %%mm0, %%mm2 \n\t"
1328 "movq %%mm1, %%mm4 \n\t"
1329 "pand %%mm7, %%mm0 \n\t"
1330 "pand %%mm6, %%mm2 \n\t"
1331 "pand %%mm7, %%mm1 \n\t"
1332 "pand %%mm6, %%mm4 \n\t"
1333 "movq %%mm2, %%mm3 \n\t"
1334 "movq %%mm4, %%mm5 \n\t"
1335 "pslld $16, %%mm2 \n\t"
1336 "psrld $16, %%mm3 \n\t"
1337 "pslld $16, %%mm4 \n\t"
1338 "psrld $16, %%mm5 \n\t"
1339 "por %%mm2, %%mm0 \n\t"
1340 "por %%mm4, %%mm1 \n\t"
1341 "por %%mm3, %%mm0 \n\t"
1342 "por %%mm5, %%mm1 \n\t"
1344 MOVNTQ
" %%mm0, (%2, %0) \n\t"
1345 MOVNTQ
" %%mm1, 8(%2, %0) \n\t"
1352 : "r" (s
), "r" (d
), "m" (mask32b
), "m" (mask32r
), "m" (mmx_one
)
1355 for (; idx
<15; idx
+=4) {
1356 register int v
= *(const uint32_t *)&s
[idx
], g
= v
& 0xff00ff00;
1358 *(uint32_t *)&d
[idx
] = (v
>>16) + g
+ (v
<<16);
1362 static inline void RENAME(rgb24tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1366 x86_reg mmx_size
= 23 - src_size
;
1368 "test %%"REG_a
", %%"REG_a
" \n\t"
1370 "movq "MANGLE(mask24r
)", %%mm5 \n\t"
1371 "movq "MANGLE(mask24g
)", %%mm6 \n\t"
1372 "movq "MANGLE(mask24b
)", %%mm7 \n\t"
1375 PREFETCH
" 32(%1, %%"REG_a
") \n\t"
1376 "movq (%1, %%"REG_a
"), %%mm0 \n\t" // BGR BGR BG
1377 "movq (%1, %%"REG_a
"), %%mm1 \n\t" // BGR BGR BG
1378 "movq 2(%1, %%"REG_a
"), %%mm2 \n\t" // R BGR BGR B
1379 "psllq $16, %%mm0 \n\t" // 00 BGR BGR
1380 "pand %%mm5, %%mm0 \n\t"
1381 "pand %%mm6, %%mm1 \n\t"
1382 "pand %%mm7, %%mm2 \n\t"
1383 "por %%mm0, %%mm1 \n\t"
1384 "por %%mm2, %%mm1 \n\t"
1385 "movq 6(%1, %%"REG_a
"), %%mm0 \n\t" // BGR BGR BG
1386 MOVNTQ
" %%mm1, (%2, %%"REG_a
") \n\t" // RGB RGB RG
1387 "movq 8(%1, %%"REG_a
"), %%mm1 \n\t" // R BGR BGR B
1388 "movq 10(%1, %%"REG_a
"), %%mm2 \n\t" // GR BGR BGR
1389 "pand %%mm7, %%mm0 \n\t"
1390 "pand %%mm5, %%mm1 \n\t"
1391 "pand %%mm6, %%mm2 \n\t"
1392 "por %%mm0, %%mm1 \n\t"
1393 "por %%mm2, %%mm1 \n\t"
1394 "movq 14(%1, %%"REG_a
"), %%mm0 \n\t" // R BGR BGR B
1395 MOVNTQ
" %%mm1, 8(%2, %%"REG_a
") \n\t" // B RGB RGB R
1396 "movq 16(%1, %%"REG_a
"), %%mm1 \n\t" // GR BGR BGR
1397 "movq 18(%1, %%"REG_a
"), %%mm2 \n\t" // BGR BGR BG
1398 "pand %%mm6, %%mm0 \n\t"
1399 "pand %%mm7, %%mm1 \n\t"
1400 "pand %%mm5, %%mm2 \n\t"
1401 "por %%mm0, %%mm1 \n\t"
1402 "por %%mm2, %%mm1 \n\t"
1403 MOVNTQ
" %%mm1, 16(%2, %%"REG_a
") \n\t"
1404 "add $24, %%"REG_a
" \n\t"
1408 : "r" (src
-mmx_size
), "r"(dst
-mmx_size
)
1411 __asm__
volatile(SFENCE:::"memory");
1412 __asm__
volatile(EMMS:::"memory");
1414 if (mmx_size
==23) return; //finished, was multiple of 8
1418 src_size
= 23-mmx_size
;
1422 for (i
=0; i
<src_size
; i
+=3) {
1425 dst
[i
+ 1] = src
[i
+ 1];
1426 dst
[i
+ 2] = src
[i
+ 0];
1431 static inline void RENAME(yuvPlanartoyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1432 long width
, long height
,
1433 long lumStride
, long chromStride
, long dstStride
, long vertLumPerChroma
)
1436 const x86_reg chromWidth
= width
>>1;
1437 for (y
=0; y
<height
; y
++) {
1439 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1441 "xor %%"REG_a
", %%"REG_a
" \n\t"
1444 PREFETCH
" 32(%1, %%"REG_a
", 2) \n\t"
1445 PREFETCH
" 32(%2, %%"REG_a
") \n\t"
1446 PREFETCH
" 32(%3, %%"REG_a
") \n\t"
1447 "movq (%2, %%"REG_a
"), %%mm0 \n\t" // U(0)
1448 "movq %%mm0, %%mm2 \n\t" // U(0)
1449 "movq (%3, %%"REG_a
"), %%mm1 \n\t" // V(0)
1450 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1451 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1453 "movq (%1, %%"REG_a
",2), %%mm3 \n\t" // Y(0)
1454 "movq 8(%1, %%"REG_a
",2), %%mm5 \n\t" // Y(8)
1455 "movq %%mm3, %%mm4 \n\t" // Y(0)
1456 "movq %%mm5, %%mm6 \n\t" // Y(8)
1457 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
1458 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
1459 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
1460 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
1462 MOVNTQ
" %%mm3, (%0, %%"REG_a
", 4) \n\t"
1463 MOVNTQ
" %%mm4, 8(%0, %%"REG_a
", 4) \n\t"
1464 MOVNTQ
" %%mm5, 16(%0, %%"REG_a
", 4) \n\t"
1465 MOVNTQ
" %%mm6, 24(%0, %%"REG_a
", 4) \n\t"
1467 "add $8, %%"REG_a
" \n\t"
1468 "cmp %4, %%"REG_a
" \n\t"
1470 ::"r"(dst
), "r"(ysrc
), "r"(usrc
), "r"(vsrc
), "g" (chromWidth
)
1475 #if ARCH_ALPHA && HAVE_MVI
1476 #define pl2yuy2(n) \
1481 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
1482 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
1483 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
1484 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
1485 yuv1 = (u << 8) + (v << 24); \
1492 uint64_t *qdst
= (uint64_t *) dst
;
1493 uint64_t *qdst2
= (uint64_t *) (dst
+ dstStride
);
1494 const uint32_t *yc
= (uint32_t *) ysrc
;
1495 const uint32_t *yc2
= (uint32_t *) (ysrc
+ lumStride
);
1496 const uint16_t *uc
= (uint16_t*) usrc
, *vc
= (uint16_t*) vsrc
;
1497 for (i
= 0; i
< chromWidth
; i
+= 8) {
1498 uint64_t y1
, y2
, yuv1
, yuv2
;
1501 __asm__("ldq $31,64(%0)" :: "r"(yc
));
1502 __asm__("ldq $31,64(%0)" :: "r"(yc2
));
1503 __asm__("ldq $31,64(%0)" :: "r"(uc
));
1504 __asm__("ldq $31,64(%0)" :: "r"(vc
));
1522 #elif HAVE_FAST_64BIT
1524 uint64_t *ldst
= (uint64_t *) dst
;
1525 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1526 for (i
= 0; i
< chromWidth
; i
+= 2) {
1528 k
= yc
[0] + (uc
[0] << 8) +
1529 (yc
[1] << 16) + (vc
[0] << 24);
1530 l
= yc
[2] + (uc
[1] << 8) +
1531 (yc
[3] << 16) + (vc
[1] << 24);
1532 *ldst
++ = k
+ (l
<< 32);
1539 int i
, *idst
= (int32_t *) dst
;
1540 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1541 for (i
= 0; i
< chromWidth
; i
++) {
1543 *idst
++ = (yc
[0] << 24)+ (uc
[0] << 16) +
1544 (yc
[1] << 8) + (vc
[0] << 0);
1546 *idst
++ = yc
[0] + (uc
[0] << 8) +
1547 (yc
[1] << 16) + (vc
[0] << 24);
1555 if ((y
&(vertLumPerChroma
-1)) == vertLumPerChroma
-1) {
1556 usrc
+= chromStride
;
1557 vsrc
+= chromStride
;
1570 * Height should be a multiple of 2 and width should be a multiple of 16.
1571 * (If this is a problem for anyone then tell me, and I will fix it.)
1573 static inline void RENAME(yv12toyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1574 long width
, long height
,
1575 long lumStride
, long chromStride
, long dstStride
)
1577 //FIXME interpolate chroma
1578 RENAME(yuvPlanartoyuy2
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 2);
1581 static inline void RENAME(yuvPlanartouyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1582 long width
, long height
,
1583 long lumStride
, long chromStride
, long dstStride
, long vertLumPerChroma
)
1586 const x86_reg chromWidth
= width
>>1;
1587 for (y
=0; y
<height
; y
++) {
1589 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1591 "xor %%"REG_a
", %%"REG_a
" \n\t"
1594 PREFETCH
" 32(%1, %%"REG_a
", 2) \n\t"
1595 PREFETCH
" 32(%2, %%"REG_a
") \n\t"
1596 PREFETCH
" 32(%3, %%"REG_a
") \n\t"
1597 "movq (%2, %%"REG_a
"), %%mm0 \n\t" // U(0)
1598 "movq %%mm0, %%mm2 \n\t" // U(0)
1599 "movq (%3, %%"REG_a
"), %%mm1 \n\t" // V(0)
1600 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1601 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1603 "movq (%1, %%"REG_a
",2), %%mm3 \n\t" // Y(0)
1604 "movq 8(%1, %%"REG_a
",2), %%mm5 \n\t" // Y(8)
1605 "movq %%mm0, %%mm4 \n\t" // Y(0)
1606 "movq %%mm2, %%mm6 \n\t" // Y(8)
1607 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
1608 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
1609 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
1610 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
1612 MOVNTQ
" %%mm0, (%0, %%"REG_a
", 4) \n\t"
1613 MOVNTQ
" %%mm4, 8(%0, %%"REG_a
", 4) \n\t"
1614 MOVNTQ
" %%mm2, 16(%0, %%"REG_a
", 4) \n\t"
1615 MOVNTQ
" %%mm6, 24(%0, %%"REG_a
", 4) \n\t"
1617 "add $8, %%"REG_a
" \n\t"
1618 "cmp %4, %%"REG_a
" \n\t"
1620 ::"r"(dst
), "r"(ysrc
), "r"(usrc
), "r"(vsrc
), "g" (chromWidth
)
1624 //FIXME adapt the Alpha ASM code from yv12->yuy2
1628 uint64_t *ldst
= (uint64_t *) dst
;
1629 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1630 for (i
= 0; i
< chromWidth
; i
+= 2) {
1632 k
= uc
[0] + (yc
[0] << 8) +
1633 (vc
[0] << 16) + (yc
[1] << 24);
1634 l
= uc
[1] + (yc
[2] << 8) +
1635 (vc
[1] << 16) + (yc
[3] << 24);
1636 *ldst
++ = k
+ (l
<< 32);
1643 int i
, *idst
= (int32_t *) dst
;
1644 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1645 for (i
= 0; i
< chromWidth
; i
++) {
1647 *idst
++ = (uc
[0] << 24)+ (yc
[0] << 16) +
1648 (vc
[0] << 8) + (yc
[1] << 0);
1650 *idst
++ = uc
[0] + (yc
[0] << 8) +
1651 (vc
[0] << 16) + (yc
[1] << 24);
1659 if ((y
&(vertLumPerChroma
-1)) == vertLumPerChroma
-1) {
1660 usrc
+= chromStride
;
1661 vsrc
+= chromStride
;
1674 * Height should be a multiple of 2 and width should be a multiple of 16
1675 * (If this is a problem for anyone then tell me, and I will fix it.)
1677 static inline void RENAME(yv12touyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1678 long width
, long height
,
1679 long lumStride
, long chromStride
, long dstStride
)
1681 //FIXME interpolate chroma
1682 RENAME(yuvPlanartouyvy
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 2);
1686 * Width should be a multiple of 16.
1688 static inline void RENAME(yuv422ptouyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1689 long width
, long height
,
1690 long lumStride
, long chromStride
, long dstStride
)
1692 RENAME(yuvPlanartouyvy
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 1);
1696 * Width should be a multiple of 16.
1698 static inline void RENAME(yuv422ptoyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1699 long width
, long height
,
1700 long lumStride
, long chromStride
, long dstStride
)
1702 RENAME(yuvPlanartoyuy2
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 1);
1706 * Height should be a multiple of 2 and width should be a multiple of 16.
1707 * (If this is a problem for anyone then tell me, and I will fix it.)
1709 static inline void RENAME(yuy2toyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1710 long width
, long height
,
1711 long lumStride
, long chromStride
, long srcStride
)
1714 const x86_reg chromWidth
= width
>>1;
1715 for (y
=0; y
<height
; y
+=2) {
1718 "xor %%"REG_a
", %%"REG_a
" \n\t"
1719 "pcmpeqw %%mm7, %%mm7 \n\t"
1720 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1723 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1724 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1725 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1726 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
1727 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
1728 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
1729 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
1730 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1731 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1732 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1733 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1735 MOVNTQ
" %%mm2, (%1, %%"REG_a
", 2) \n\t"
1737 "movq 16(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(8)
1738 "movq 24(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(12)
1739 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
1740 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
1741 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
1742 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
1743 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1744 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1745 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1746 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1748 MOVNTQ
" %%mm3, 8(%1, %%"REG_a
", 2) \n\t"
1750 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1751 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1752 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1753 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1754 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1755 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1756 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1757 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1759 MOVNTQ
" %%mm0, (%3, %%"REG_a
") \n\t"
1760 MOVNTQ
" %%mm2, (%2, %%"REG_a
") \n\t"
1762 "add $8, %%"REG_a
" \n\t"
1763 "cmp %4, %%"REG_a
" \n\t"
1765 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1766 : "memory", "%"REG_a
1773 "xor %%"REG_a
", %%"REG_a
" \n\t"
1776 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1777 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1778 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1779 "movq 16(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(8)
1780 "movq 24(%0, %%"REG_a
", 4), %%mm3 \n\t" // YUYV YUYV(12)
1781 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
1782 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
1783 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
1784 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
1785 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
1786 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
1788 MOVNTQ
" %%mm0, (%1, %%"REG_a
", 2) \n\t"
1789 MOVNTQ
" %%mm2, 8(%1, %%"REG_a
", 2) \n\t"
1791 "add $8, %%"REG_a
" \n\t"
1792 "cmp %4, %%"REG_a
" \n\t"
1795 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1796 : "memory", "%"REG_a
1800 for (i
=0; i
<chromWidth
; i
++) {
1801 ydst
[2*i
+0] = src
[4*i
+0];
1802 udst
[i
] = src
[4*i
+1];
1803 ydst
[2*i
+1] = src
[4*i
+2];
1804 vdst
[i
] = src
[4*i
+3];
1809 for (i
=0; i
<chromWidth
; i
++) {
1810 ydst
[2*i
+0] = src
[4*i
+0];
1811 ydst
[2*i
+1] = src
[4*i
+2];
1814 udst
+= chromStride
;
1815 vdst
+= chromStride
;
1820 __asm__
volatile(EMMS
" \n\t"
1826 static inline void RENAME(yvu9toyv12
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
,
1827 uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1828 long width
, long height
, long lumStride
, long chromStride
)
1831 memcpy(ydst
, ysrc
, width
*height
);
1833 /* XXX: implement upscaling for U,V */
1836 static inline void RENAME(planar2x
)(const uint8_t *src
, uint8_t *dst
, long srcWidth
, long srcHeight
, long srcStride
, long dstStride
)
1843 for (x
=0; x
<srcWidth
-1; x
++) {
1844 dst
[2*x
+1]= (3*src
[x
] + src
[x
+1])>>2;
1845 dst
[2*x
+2]= ( src
[x
] + 3*src
[x
+1])>>2;
1847 dst
[2*srcWidth
-1]= src
[srcWidth
-1];
1851 for (y
=1; y
<srcHeight
; y
++) {
1852 #if HAVE_MMX2 || HAVE_AMD3DNOW
1853 const x86_reg mmxSize
= srcWidth
&~15;
1855 "mov %4, %%"REG_a
" \n\t"
1857 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
1858 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
1859 "movq 1(%0, %%"REG_a
"), %%mm2 \n\t"
1860 "movq 1(%1, %%"REG_a
"), %%mm3 \n\t"
1861 "movq -1(%0, %%"REG_a
"), %%mm4 \n\t"
1862 "movq -1(%1, %%"REG_a
"), %%mm5 \n\t"
1863 PAVGB
" %%mm0, %%mm5 \n\t"
1864 PAVGB
" %%mm0, %%mm3 \n\t"
1865 PAVGB
" %%mm0, %%mm5 \n\t"
1866 PAVGB
" %%mm0, %%mm3 \n\t"
1867 PAVGB
" %%mm1, %%mm4 \n\t"
1868 PAVGB
" %%mm1, %%mm2 \n\t"
1869 PAVGB
" %%mm1, %%mm4 \n\t"
1870 PAVGB
" %%mm1, %%mm2 \n\t"
1871 "movq %%mm5, %%mm7 \n\t"
1872 "movq %%mm4, %%mm6 \n\t"
1873 "punpcklbw %%mm3, %%mm5 \n\t"
1874 "punpckhbw %%mm3, %%mm7 \n\t"
1875 "punpcklbw %%mm2, %%mm4 \n\t"
1876 "punpckhbw %%mm2, %%mm6 \n\t"
1878 MOVNTQ
" %%mm5, (%2, %%"REG_a
", 2) \n\t"
1879 MOVNTQ
" %%mm7, 8(%2, %%"REG_a
", 2) \n\t"
1880 MOVNTQ
" %%mm4, (%3, %%"REG_a
", 2) \n\t"
1881 MOVNTQ
" %%mm6, 8(%3, %%"REG_a
", 2) \n\t"
1883 "movq %%mm5, (%2, %%"REG_a
", 2) \n\t"
1884 "movq %%mm7, 8(%2, %%"REG_a
", 2) \n\t"
1885 "movq %%mm4, (%3, %%"REG_a
", 2) \n\t"
1886 "movq %%mm6, 8(%3, %%"REG_a
", 2) \n\t"
1888 "add $8, %%"REG_a
" \n\t"
1890 :: "r" (src
+ mmxSize
), "r" (src
+ srcStride
+ mmxSize
),
1891 "r" (dst
+ mmxSize
*2), "r" (dst
+ dstStride
+ mmxSize
*2),
1897 const x86_reg mmxSize
=1;
1899 dst
[0 ]= (3*src
[0] + src
[srcStride
])>>2;
1900 dst
[dstStride
]= ( src
[0] + 3*src
[srcStride
])>>2;
1902 for (x
=mmxSize
-1; x
<srcWidth
-1; x
++) {
1903 dst
[2*x
+1]= (3*src
[x
+0] + src
[x
+srcStride
+1])>>2;
1904 dst
[2*x
+dstStride
+2]= ( src
[x
+0] + 3*src
[x
+srcStride
+1])>>2;
1905 dst
[2*x
+dstStride
+1]= ( src
[x
+1] + 3*src
[x
+srcStride
])>>2;
1906 dst
[2*x
+2]= (3*src
[x
+1] + src
[x
+srcStride
])>>2;
1908 dst
[srcWidth
*2 -1 ]= (3*src
[srcWidth
-1] + src
[srcWidth
-1 + srcStride
])>>2;
1909 dst
[srcWidth
*2 -1 + dstStride
]= ( src
[srcWidth
-1] + 3*src
[srcWidth
-1 + srcStride
])>>2;
1919 for (x
=0; x
<srcWidth
-1; x
++) {
1920 dst
[2*x
+1]= (3*src
[x
] + src
[x
+1])>>2;
1921 dst
[2*x
+2]= ( src
[x
] + 3*src
[x
+1])>>2;
1923 dst
[2*srcWidth
-1]= src
[srcWidth
-1];
1925 for (x
=0; x
<srcWidth
; x
++) {
1932 __asm__
volatile(EMMS
" \n\t"
1939 * Height should be a multiple of 2 and width should be a multiple of 16.
1940 * (If this is a problem for anyone then tell me, and I will fix it.)
1941 * Chrominance data is only taken from every second line, others are ignored.
1942 * FIXME: Write HQ version.
1944 static inline void RENAME(uyvytoyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1945 long width
, long height
,
1946 long lumStride
, long chromStride
, long srcStride
)
1949 const x86_reg chromWidth
= width
>>1;
1950 for (y
=0; y
<height
; y
+=2) {
1953 "xor %%"REG_a
", %%"REG_a
" \n\t"
1954 "pcmpeqw %%mm7, %%mm7 \n\t"
1955 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1958 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1959 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // UYVY UYVY(0)
1960 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // UYVY UYVY(4)
1961 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
1962 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
1963 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
1964 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
1965 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1966 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1967 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1968 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1970 MOVNTQ
" %%mm2, (%1, %%"REG_a
", 2) \n\t"
1972 "movq 16(%0, %%"REG_a
", 4), %%mm1 \n\t" // UYVY UYVY(8)
1973 "movq 24(%0, %%"REG_a
", 4), %%mm2 \n\t" // UYVY UYVY(12)
1974 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
1975 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
1976 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
1977 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
1978 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1979 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1980 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1981 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1983 MOVNTQ
" %%mm3, 8(%1, %%"REG_a
", 2) \n\t"
1985 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1986 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1987 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1988 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1989 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1990 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1991 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1992 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1994 MOVNTQ
" %%mm0, (%3, %%"REG_a
") \n\t"
1995 MOVNTQ
" %%mm2, (%2, %%"REG_a
") \n\t"
1997 "add $8, %%"REG_a
" \n\t"
1998 "cmp %4, %%"REG_a
" \n\t"
2000 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
2001 : "memory", "%"REG_a
2008 "xor %%"REG_a
", %%"REG_a
" \n\t"
2011 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
2012 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
2013 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
2014 "movq 16(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(8)
2015 "movq 24(%0, %%"REG_a
", 4), %%mm3 \n\t" // YUYV YUYV(12)
2016 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
2017 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
2018 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
2019 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
2020 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
2021 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
2023 MOVNTQ
" %%mm0, (%1, %%"REG_a
", 2) \n\t"
2024 MOVNTQ
" %%mm2, 8(%1, %%"REG_a
", 2) \n\t"
2026 "add $8, %%"REG_a
" \n\t"
2027 "cmp %4, %%"REG_a
" \n\t"
2030 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
2031 : "memory", "%"REG_a
2035 for (i
=0; i
<chromWidth
; i
++) {
2036 udst
[i
] = src
[4*i
+0];
2037 ydst
[2*i
+0] = src
[4*i
+1];
2038 vdst
[i
] = src
[4*i
+2];
2039 ydst
[2*i
+1] = src
[4*i
+3];
2044 for (i
=0; i
<chromWidth
; i
++) {
2045 ydst
[2*i
+0] = src
[4*i
+1];
2046 ydst
[2*i
+1] = src
[4*i
+3];
2049 udst
+= chromStride
;
2050 vdst
+= chromStride
;
2055 __asm__
volatile(EMMS
" \n\t"
2062 * Height should be a multiple of 2 and width should be a multiple of 2.
2063 * (If this is a problem for anyone then tell me, and I will fix it.)
2064 * Chrominance data is only taken from every second line,
2065 * others are ignored in the C version.
2066 * FIXME: Write HQ version.
2068 static inline void RENAME(rgb24toyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
2069 long width
, long height
,
2070 long lumStride
, long chromStride
, long srcStride
)
2073 const x86_reg chromWidth
= width
>>1;
2075 for (y
=0; y
<height
-2; y
+=2) {
2077 for (i
=0; i
<2; i
++) {
2079 "mov %2, %%"REG_a
" \n\t"
2080 "movq "MANGLE(ff_bgr2YCoeff
)", %%mm6 \n\t"
2081 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2082 "pxor %%mm7, %%mm7 \n\t"
2083 "lea (%%"REG_a
", %%"REG_a
", 2), %%"REG_d
" \n\t"
2086 PREFETCH
" 64(%0, %%"REG_d
") \n\t"
2087 "movd (%0, %%"REG_d
"), %%mm0 \n\t"
2088 "movd 3(%0, %%"REG_d
"), %%mm1 \n\t"
2089 "punpcklbw %%mm7, %%mm0 \n\t"
2090 "punpcklbw %%mm7, %%mm1 \n\t"
2091 "movd 6(%0, %%"REG_d
"), %%mm2 \n\t"
2092 "movd 9(%0, %%"REG_d
"), %%mm3 \n\t"
2093 "punpcklbw %%mm7, %%mm2 \n\t"
2094 "punpcklbw %%mm7, %%mm3 \n\t"
2095 "pmaddwd %%mm6, %%mm0 \n\t"
2096 "pmaddwd %%mm6, %%mm1 \n\t"
2097 "pmaddwd %%mm6, %%mm2 \n\t"
2098 "pmaddwd %%mm6, %%mm3 \n\t"
2099 #ifndef FAST_BGR2YV12
2100 "psrad $8, %%mm0 \n\t"
2101 "psrad $8, %%mm1 \n\t"
2102 "psrad $8, %%mm2 \n\t"
2103 "psrad $8, %%mm3 \n\t"
2105 "packssdw %%mm1, %%mm0 \n\t"
2106 "packssdw %%mm3, %%mm2 \n\t"
2107 "pmaddwd %%mm5, %%mm0 \n\t"
2108 "pmaddwd %%mm5, %%mm2 \n\t"
2109 "packssdw %%mm2, %%mm0 \n\t"
2110 "psraw $7, %%mm0 \n\t"
2112 "movd 12(%0, %%"REG_d
"), %%mm4 \n\t"
2113 "movd 15(%0, %%"REG_d
"), %%mm1 \n\t"
2114 "punpcklbw %%mm7, %%mm4 \n\t"
2115 "punpcklbw %%mm7, %%mm1 \n\t"
2116 "movd 18(%0, %%"REG_d
"), %%mm2 \n\t"
2117 "movd 21(%0, %%"REG_d
"), %%mm3 \n\t"
2118 "punpcklbw %%mm7, %%mm2 \n\t"
2119 "punpcklbw %%mm7, %%mm3 \n\t"
2120 "pmaddwd %%mm6, %%mm4 \n\t"
2121 "pmaddwd %%mm6, %%mm1 \n\t"
2122 "pmaddwd %%mm6, %%mm2 \n\t"
2123 "pmaddwd %%mm6, %%mm3 \n\t"
2124 #ifndef FAST_BGR2YV12
2125 "psrad $8, %%mm4 \n\t"
2126 "psrad $8, %%mm1 \n\t"
2127 "psrad $8, %%mm2 \n\t"
2128 "psrad $8, %%mm3 \n\t"
2130 "packssdw %%mm1, %%mm4 \n\t"
2131 "packssdw %%mm3, %%mm2 \n\t"
2132 "pmaddwd %%mm5, %%mm4 \n\t"
2133 "pmaddwd %%mm5, %%mm2 \n\t"
2134 "add $24, %%"REG_d
" \n\t"
2135 "packssdw %%mm2, %%mm4 \n\t"
2136 "psraw $7, %%mm4 \n\t"
2138 "packuswb %%mm4, %%mm0 \n\t"
2139 "paddusb "MANGLE(ff_bgr2YOffset
)", %%mm0 \n\t"
2141 MOVNTQ
" %%mm0, (%1, %%"REG_a
") \n\t"
2142 "add $8, %%"REG_a
" \n\t"
2144 : : "r" (src
+width
*3), "r" (ydst
+width
), "g" ((x86_reg
)-width
)
2145 : "%"REG_a
, "%"REG_d
2152 "mov %4, %%"REG_a
" \n\t"
2153 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2154 "movq "MANGLE(ff_bgr2UCoeff
)", %%mm6 \n\t"
2155 "pxor %%mm7, %%mm7 \n\t"
2156 "lea (%%"REG_a
", %%"REG_a
", 2), %%"REG_d
" \n\t"
2157 "add %%"REG_d
", %%"REG_d
" \n\t"
2160 PREFETCH
" 64(%0, %%"REG_d
") \n\t"
2161 PREFETCH
" 64(%1, %%"REG_d
") \n\t"
2162 #if HAVE_MMX2 || HAVE_AMD3DNOW
2163 "movq (%0, %%"REG_d
"), %%mm0 \n\t"
2164 "movq (%1, %%"REG_d
"), %%mm1 \n\t"
2165 "movq 6(%0, %%"REG_d
"), %%mm2 \n\t"
2166 "movq 6(%1, %%"REG_d
"), %%mm3 \n\t"
2167 PAVGB
" %%mm1, %%mm0 \n\t"
2168 PAVGB
" %%mm3, %%mm2 \n\t"
2169 "movq %%mm0, %%mm1 \n\t"
2170 "movq %%mm2, %%mm3 \n\t"
2171 "psrlq $24, %%mm0 \n\t"
2172 "psrlq $24, %%mm2 \n\t"
2173 PAVGB
" %%mm1, %%mm0 \n\t"
2174 PAVGB
" %%mm3, %%mm2 \n\t"
2175 "punpcklbw %%mm7, %%mm0 \n\t"
2176 "punpcklbw %%mm7, %%mm2 \n\t"
2178 "movd (%0, %%"REG_d
"), %%mm0 \n\t"
2179 "movd (%1, %%"REG_d
"), %%mm1 \n\t"
2180 "movd 3(%0, %%"REG_d
"), %%mm2 \n\t"
2181 "movd 3(%1, %%"REG_d
"), %%mm3 \n\t"
2182 "punpcklbw %%mm7, %%mm0 \n\t"
2183 "punpcklbw %%mm7, %%mm1 \n\t"
2184 "punpcklbw %%mm7, %%mm2 \n\t"
2185 "punpcklbw %%mm7, %%mm3 \n\t"
2186 "paddw %%mm1, %%mm0 \n\t"
2187 "paddw %%mm3, %%mm2 \n\t"
2188 "paddw %%mm2, %%mm0 \n\t"
2189 "movd 6(%0, %%"REG_d
"), %%mm4 \n\t"
2190 "movd 6(%1, %%"REG_d
"), %%mm1 \n\t"
2191 "movd 9(%0, %%"REG_d
"), %%mm2 \n\t"
2192 "movd 9(%1, %%"REG_d
"), %%mm3 \n\t"
2193 "punpcklbw %%mm7, %%mm4 \n\t"
2194 "punpcklbw %%mm7, %%mm1 \n\t"
2195 "punpcklbw %%mm7, %%mm2 \n\t"
2196 "punpcklbw %%mm7, %%mm3 \n\t"
2197 "paddw %%mm1, %%mm4 \n\t"
2198 "paddw %%mm3, %%mm2 \n\t"
2199 "paddw %%mm4, %%mm2 \n\t"
2200 "psrlw $2, %%mm0 \n\t"
2201 "psrlw $2, %%mm2 \n\t"
2203 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm1 \n\t"
2204 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm3 \n\t"
2206 "pmaddwd %%mm0, %%mm1 \n\t"
2207 "pmaddwd %%mm2, %%mm3 \n\t"
2208 "pmaddwd %%mm6, %%mm0 \n\t"
2209 "pmaddwd %%mm6, %%mm2 \n\t"
2210 #ifndef FAST_BGR2YV12
2211 "psrad $8, %%mm0 \n\t"
2212 "psrad $8, %%mm1 \n\t"
2213 "psrad $8, %%mm2 \n\t"
2214 "psrad $8, %%mm3 \n\t"
2216 "packssdw %%mm2, %%mm0 \n\t"
2217 "packssdw %%mm3, %%mm1 \n\t"
2218 "pmaddwd %%mm5, %%mm0 \n\t"
2219 "pmaddwd %%mm5, %%mm1 \n\t"
2220 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
2221 "psraw $7, %%mm0 \n\t"
2223 #if HAVE_MMX2 || HAVE_AMD3DNOW
2224 "movq 12(%0, %%"REG_d
"), %%mm4 \n\t"
2225 "movq 12(%1, %%"REG_d
"), %%mm1 \n\t"
2226 "movq 18(%0, %%"REG_d
"), %%mm2 \n\t"
2227 "movq 18(%1, %%"REG_d
"), %%mm3 \n\t"
2228 PAVGB
" %%mm1, %%mm4 \n\t"
2229 PAVGB
" %%mm3, %%mm2 \n\t"
2230 "movq %%mm4, %%mm1 \n\t"
2231 "movq %%mm2, %%mm3 \n\t"
2232 "psrlq $24, %%mm4 \n\t"
2233 "psrlq $24, %%mm2 \n\t"
2234 PAVGB
" %%mm1, %%mm4 \n\t"
2235 PAVGB
" %%mm3, %%mm2 \n\t"
2236 "punpcklbw %%mm7, %%mm4 \n\t"
2237 "punpcklbw %%mm7, %%mm2 \n\t"
2239 "movd 12(%0, %%"REG_d
"), %%mm4 \n\t"
2240 "movd 12(%1, %%"REG_d
"), %%mm1 \n\t"
2241 "movd 15(%0, %%"REG_d
"), %%mm2 \n\t"
2242 "movd 15(%1, %%"REG_d
"), %%mm3 \n\t"
2243 "punpcklbw %%mm7, %%mm4 \n\t"
2244 "punpcklbw %%mm7, %%mm1 \n\t"
2245 "punpcklbw %%mm7, %%mm2 \n\t"
2246 "punpcklbw %%mm7, %%mm3 \n\t"
2247 "paddw %%mm1, %%mm4 \n\t"
2248 "paddw %%mm3, %%mm2 \n\t"
2249 "paddw %%mm2, %%mm4 \n\t"
2250 "movd 18(%0, %%"REG_d
"), %%mm5 \n\t"
2251 "movd 18(%1, %%"REG_d
"), %%mm1 \n\t"
2252 "movd 21(%0, %%"REG_d
"), %%mm2 \n\t"
2253 "movd 21(%1, %%"REG_d
"), %%mm3 \n\t"
2254 "punpcklbw %%mm7, %%mm5 \n\t"
2255 "punpcklbw %%mm7, %%mm1 \n\t"
2256 "punpcklbw %%mm7, %%mm2 \n\t"
2257 "punpcklbw %%mm7, %%mm3 \n\t"
2258 "paddw %%mm1, %%mm5 \n\t"
2259 "paddw %%mm3, %%mm2 \n\t"
2260 "paddw %%mm5, %%mm2 \n\t"
2261 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2262 "psrlw $2, %%mm4 \n\t"
2263 "psrlw $2, %%mm2 \n\t"
2265 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm1 \n\t"
2266 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm3 \n\t"
2268 "pmaddwd %%mm4, %%mm1 \n\t"
2269 "pmaddwd %%mm2, %%mm3 \n\t"
2270 "pmaddwd %%mm6, %%mm4 \n\t"
2271 "pmaddwd %%mm6, %%mm2 \n\t"
2272 #ifndef FAST_BGR2YV12
2273 "psrad $8, %%mm4 \n\t"
2274 "psrad $8, %%mm1 \n\t"
2275 "psrad $8, %%mm2 \n\t"
2276 "psrad $8, %%mm3 \n\t"
2278 "packssdw %%mm2, %%mm4 \n\t"
2279 "packssdw %%mm3, %%mm1 \n\t"
2280 "pmaddwd %%mm5, %%mm4 \n\t"
2281 "pmaddwd %%mm5, %%mm1 \n\t"
2282 "add $24, %%"REG_d
" \n\t"
2283 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
2284 "psraw $7, %%mm4 \n\t"
2286 "movq %%mm0, %%mm1 \n\t"
2287 "punpckldq %%mm4, %%mm0 \n\t"
2288 "punpckhdq %%mm4, %%mm1 \n\t"
2289 "packsswb %%mm1, %%mm0 \n\t"
2290 "paddb "MANGLE(ff_bgr2UVOffset
)", %%mm0 \n\t"
2291 "movd %%mm0, (%2, %%"REG_a
") \n\t"
2292 "punpckhdq %%mm0, %%mm0 \n\t"
2293 "movd %%mm0, (%3, %%"REG_a
") \n\t"
2294 "add $4, %%"REG_a
" \n\t"
2296 : : "r" (src
+chromWidth
*6), "r" (src
+srcStride
+chromWidth
*6), "r" (udst
+chromWidth
), "r" (vdst
+chromWidth
), "g" (-chromWidth
)
2297 : "%"REG_a
, "%"REG_d
2300 udst
+= chromStride
;
2301 vdst
+= chromStride
;
2305 __asm__
volatile(EMMS
" \n\t"
2311 for (; y
<height
; y
+=2) {
2313 for (i
=0; i
<chromWidth
; i
++) {
2314 unsigned int b
= src
[6*i
+0];
2315 unsigned int g
= src
[6*i
+1];
2316 unsigned int r
= src
[6*i
+2];
2318 unsigned int Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2319 unsigned int V
= ((RV
*r
+ GV
*g
+ BV
*b
)>>RGB2YUV_SHIFT
) + 128;
2320 unsigned int U
= ((RU
*r
+ GU
*g
+ BU
*b
)>>RGB2YUV_SHIFT
) + 128;
2330 Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2336 for (i
=0; i
<chromWidth
; i
++) {
2337 unsigned int b
= src
[6*i
+0];
2338 unsigned int g
= src
[6*i
+1];
2339 unsigned int r
= src
[6*i
+2];
2341 unsigned int Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2349 Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2352 udst
+= chromStride
;
2353 vdst
+= chromStride
;
2359 static void RENAME(interleaveBytes
)(uint8_t *src1
, uint8_t *src2
, uint8_t *dest
,
2360 long width
, long height
, long src1Stride
,
2361 long src2Stride
, long dstStride
)
2365 for (h
=0; h
< height
; h
++) {
2371 "xor %%"REG_a
", %%"REG_a
" \n\t"
2373 PREFETCH
" 64(%1, %%"REG_a
") \n\t"
2374 PREFETCH
" 64(%2, %%"REG_a
") \n\t"
2375 "movdqa (%1, %%"REG_a
"), %%xmm0 \n\t"
2376 "movdqa (%1, %%"REG_a
"), %%xmm1 \n\t"
2377 "movdqa (%2, %%"REG_a
"), %%xmm2 \n\t"
2378 "punpcklbw %%xmm2, %%xmm0 \n\t"
2379 "punpckhbw %%xmm2, %%xmm1 \n\t"
2380 "movntdq %%xmm0, (%0, %%"REG_a
", 2) \n\t"
2381 "movntdq %%xmm1, 16(%0, %%"REG_a
", 2) \n\t"
2382 "add $16, %%"REG_a
" \n\t"
2383 "cmp %3, %%"REG_a
" \n\t"
2385 ::"r"(dest
), "r"(src1
), "r"(src2
), "r" ((x86_reg
)width
-15)
2386 : "memory", "%"REG_a
""
2390 "xor %%"REG_a
", %%"REG_a
" \n\t"
2392 PREFETCH
" 64(%1, %%"REG_a
") \n\t"
2393 PREFETCH
" 64(%2, %%"REG_a
") \n\t"
2394 "movq (%1, %%"REG_a
"), %%mm0 \n\t"
2395 "movq 8(%1, %%"REG_a
"), %%mm2 \n\t"
2396 "movq %%mm0, %%mm1 \n\t"
2397 "movq %%mm2, %%mm3 \n\t"
2398 "movq (%2, %%"REG_a
"), %%mm4 \n\t"
2399 "movq 8(%2, %%"REG_a
"), %%mm5 \n\t"
2400 "punpcklbw %%mm4, %%mm0 \n\t"
2401 "punpckhbw %%mm4, %%mm1 \n\t"
2402 "punpcklbw %%mm5, %%mm2 \n\t"
2403 "punpckhbw %%mm5, %%mm3 \n\t"
2404 MOVNTQ
" %%mm0, (%0, %%"REG_a
", 2) \n\t"
2405 MOVNTQ
" %%mm1, 8(%0, %%"REG_a
", 2) \n\t"
2406 MOVNTQ
" %%mm2, 16(%0, %%"REG_a
", 2) \n\t"
2407 MOVNTQ
" %%mm3, 24(%0, %%"REG_a
", 2) \n\t"
2408 "add $16, %%"REG_a
" \n\t"
2409 "cmp %3, %%"REG_a
" \n\t"
2411 ::"r"(dest
), "r"(src1
), "r"(src2
), "r" ((x86_reg
)width
-15)
2412 : "memory", "%"REG_a
2415 for (w
= (width
&(~15)); w
< width
; w
++) {
2416 dest
[2*w
+0] = src1
[w
];
2417 dest
[2*w
+1] = src2
[w
];
2420 for (w
=0; w
< width
; w
++) {
2421 dest
[2*w
+0] = src1
[w
];
2422 dest
[2*w
+1] = src2
[w
];
2438 static inline void RENAME(vu9_to_vu12
)(const uint8_t *src1
, const uint8_t *src2
,
2439 uint8_t *dst1
, uint8_t *dst2
,
2440 long width
, long height
,
2441 long srcStride1
, long srcStride2
,
2442 long dstStride1
, long dstStride2
)
2446 w
=width
/2; h
=height
/2;
2451 ::"m"(*(src1
+srcStride1
)),"m"(*(src2
+srcStride2
)):"memory");
2454 const uint8_t* s1
=src1
+srcStride1
*(y
>>1);
2455 uint8_t* d
=dst1
+dstStride1
*y
;
2458 for (;x
<w
-31;x
+=32) {
2460 PREFETCH
" 32%1 \n\t"
2461 "movq %1, %%mm0 \n\t"
2462 "movq 8%1, %%mm2 \n\t"
2463 "movq 16%1, %%mm4 \n\t"
2464 "movq 24%1, %%mm6 \n\t"
2465 "movq %%mm0, %%mm1 \n\t"
2466 "movq %%mm2, %%mm3 \n\t"
2467 "movq %%mm4, %%mm5 \n\t"
2468 "movq %%mm6, %%mm7 \n\t"
2469 "punpcklbw %%mm0, %%mm0 \n\t"
2470 "punpckhbw %%mm1, %%mm1 \n\t"
2471 "punpcklbw %%mm2, %%mm2 \n\t"
2472 "punpckhbw %%mm3, %%mm3 \n\t"
2473 "punpcklbw %%mm4, %%mm4 \n\t"
2474 "punpckhbw %%mm5, %%mm5 \n\t"
2475 "punpcklbw %%mm6, %%mm6 \n\t"
2476 "punpckhbw %%mm7, %%mm7 \n\t"
2477 MOVNTQ
" %%mm0, %0 \n\t"
2478 MOVNTQ
" %%mm1, 8%0 \n\t"
2479 MOVNTQ
" %%mm2, 16%0 \n\t"
2480 MOVNTQ
" %%mm3, 24%0 \n\t"
2481 MOVNTQ
" %%mm4, 32%0 \n\t"
2482 MOVNTQ
" %%mm5, 40%0 \n\t"
2483 MOVNTQ
" %%mm6, 48%0 \n\t"
2484 MOVNTQ
" %%mm7, 56%0"
2490 for (;x
<w
;x
++) d
[2*x
]=d
[2*x
+1]=s1
[x
];
2493 const uint8_t* s2
=src2
+srcStride2
*(y
>>1);
2494 uint8_t* d
=dst2
+dstStride2
*y
;
2497 for (;x
<w
-31;x
+=32) {
2499 PREFETCH
" 32%1 \n\t"
2500 "movq %1, %%mm0 \n\t"
2501 "movq 8%1, %%mm2 \n\t"
2502 "movq 16%1, %%mm4 \n\t"
2503 "movq 24%1, %%mm6 \n\t"
2504 "movq %%mm0, %%mm1 \n\t"
2505 "movq %%mm2, %%mm3 \n\t"
2506 "movq %%mm4, %%mm5 \n\t"
2507 "movq %%mm6, %%mm7 \n\t"
2508 "punpcklbw %%mm0, %%mm0 \n\t"
2509 "punpckhbw %%mm1, %%mm1 \n\t"
2510 "punpcklbw %%mm2, %%mm2 \n\t"
2511 "punpckhbw %%mm3, %%mm3 \n\t"
2512 "punpcklbw %%mm4, %%mm4 \n\t"
2513 "punpckhbw %%mm5, %%mm5 \n\t"
2514 "punpcklbw %%mm6, %%mm6 \n\t"
2515 "punpckhbw %%mm7, %%mm7 \n\t"
2516 MOVNTQ
" %%mm0, %0 \n\t"
2517 MOVNTQ
" %%mm1, 8%0 \n\t"
2518 MOVNTQ
" %%mm2, 16%0 \n\t"
2519 MOVNTQ
" %%mm3, 24%0 \n\t"
2520 MOVNTQ
" %%mm4, 32%0 \n\t"
2521 MOVNTQ
" %%mm5, 40%0 \n\t"
2522 MOVNTQ
" %%mm6, 48%0 \n\t"
2523 MOVNTQ
" %%mm7, 56%0"
2529 for (;x
<w
;x
++) d
[2*x
]=d
[2*x
+1]=s2
[x
];
2540 static inline void RENAME(yvu9_to_yuy2
)(const uint8_t *src1
, const uint8_t *src2
, const uint8_t *src3
,
2542 long width
, long height
,
2543 long srcStride1
, long srcStride2
,
2544 long srcStride3
, long dstStride
)
2548 w
=width
/2; h
=height
;
2550 const uint8_t* yp
=src1
+srcStride1
*y
;
2551 const uint8_t* up
=src2
+srcStride2
*(y
>>2);
2552 const uint8_t* vp
=src3
+srcStride3
*(y
>>2);
2553 uint8_t* d
=dst
+dstStride
*y
;
2558 PREFETCH
" 32(%1, %0) \n\t"
2559 PREFETCH
" 32(%2, %0) \n\t"
2560 PREFETCH
" 32(%3, %0) \n\t"
2561 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2562 "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */
2563 "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */
2564 "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2565 "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */
2566 "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */
2567 "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */
2568 "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */
2569 "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */
2570 "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */
2572 "movq %%mm1, %%mm6 \n\t"
2573 "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/
2574 "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
2575 "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
2576 MOVNTQ
" %%mm0, (%4, %0, 8) \n\t"
2577 MOVNTQ
" %%mm3, 8(%4, %0, 8) \n\t"
2579 "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/
2580 "movq 8(%1, %0, 4), %%mm0 \n\t"
2581 "movq %%mm0, %%mm3 \n\t"
2582 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/
2583 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/
2584 MOVNTQ
" %%mm0, 16(%4, %0, 8) \n\t"
2585 MOVNTQ
" %%mm3, 24(%4, %0, 8) \n\t"
2587 "movq %%mm4, %%mm6 \n\t"
2588 "movq 16(%1, %0, 4), %%mm0 \n\t"
2589 "movq %%mm0, %%mm3 \n\t"
2590 "punpcklbw %%mm5, %%mm4 \n\t"
2591 "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/
2592 "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/
2593 MOVNTQ
" %%mm0, 32(%4, %0, 8) \n\t"
2594 MOVNTQ
" %%mm3, 40(%4, %0, 8) \n\t"
2596 "punpckhbw %%mm5, %%mm6 \n\t"
2597 "movq 24(%1, %0, 4), %%mm0 \n\t"
2598 "movq %%mm0, %%mm3 \n\t"
2599 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/
2600 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/
2601 MOVNTQ
" %%mm0, 48(%4, %0, 8) \n\t"
2602 MOVNTQ
" %%mm3, 56(%4, %0, 8) \n\t"
2605 : "r"(yp
), "r" (up
), "r"(vp
), "r"(d
)
2610 const long x2
= x
<<2;
2613 d
[8*x
+2] = yp
[x2
+1];
2615 d
[8*x
+4] = yp
[x2
+2];
2617 d
[8*x
+6] = yp
[x2
+3];
2630 static void RENAME(extract_even
)(const uint8_t *src
, uint8_t *dst
, x86_reg count
)
2640 "pcmpeqw %%mm7, %%mm7 \n\t"
2641 "psrlw $8, %%mm7 \n\t"
2643 "movq -30(%1, %0, 2), %%mm0 \n\t"
2644 "movq -22(%1, %0, 2), %%mm1 \n\t"
2645 "movq -14(%1, %0, 2), %%mm2 \n\t"
2646 "movq -6(%1, %0, 2), %%mm3 \n\t"
2647 "pand %%mm7, %%mm0 \n\t"
2648 "pand %%mm7, %%mm1 \n\t"
2649 "pand %%mm7, %%mm2 \n\t"
2650 "pand %%mm7, %%mm3 \n\t"
2651 "packuswb %%mm1, %%mm0 \n\t"
2652 "packuswb %%mm3, %%mm2 \n\t"
2653 MOVNTQ
" %%mm0,-15(%2, %0) \n\t"
2654 MOVNTQ
" %%mm2,- 7(%2, %0) \n\t"
2658 : "r"(src
), "r"(dst
)
2664 dst
[count
]= src
[2*count
];
2669 static void RENAME(extract_even2
)(const uint8_t *src
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2679 "pcmpeqw %%mm7, %%mm7 \n\t"
2680 "psrlw $8, %%mm7 \n\t"
2682 "movq -28(%1, %0, 4), %%mm0 \n\t"
2683 "movq -20(%1, %0, 4), %%mm1 \n\t"
2684 "movq -12(%1, %0, 4), %%mm2 \n\t"
2685 "movq -4(%1, %0, 4), %%mm3 \n\t"
2686 "pand %%mm7, %%mm0 \n\t"
2687 "pand %%mm7, %%mm1 \n\t"
2688 "pand %%mm7, %%mm2 \n\t"
2689 "pand %%mm7, %%mm3 \n\t"
2690 "packuswb %%mm1, %%mm0 \n\t"
2691 "packuswb %%mm3, %%mm2 \n\t"
2692 "movq %%mm0, %%mm1 \n\t"
2693 "movq %%mm2, %%mm3 \n\t"
2694 "psrlw $8, %%mm0 \n\t"
2695 "psrlw $8, %%mm2 \n\t"
2696 "pand %%mm7, %%mm1 \n\t"
2697 "pand %%mm7, %%mm3 \n\t"
2698 "packuswb %%mm2, %%mm0 \n\t"
2699 "packuswb %%mm3, %%mm1 \n\t"
2700 MOVNTQ
" %%mm0,- 7(%3, %0) \n\t"
2701 MOVNTQ
" %%mm1,- 7(%2, %0) \n\t"
2705 : "r"(src
), "r"(dst0
), "r"(dst1
)
2711 dst0
[count
]= src
[4*count
+0];
2712 dst1
[count
]= src
[4*count
+2];
2717 static void RENAME(extract_even2avg
)(const uint8_t *src0
, const uint8_t *src1
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2728 "pcmpeqw %%mm7, %%mm7 \n\t"
2729 "psrlw $8, %%mm7 \n\t"
2731 "movq -28(%1, %0, 4), %%mm0 \n\t"
2732 "movq -20(%1, %0, 4), %%mm1 \n\t"
2733 "movq -12(%1, %0, 4), %%mm2 \n\t"
2734 "movq -4(%1, %0, 4), %%mm3 \n\t"
2735 PAVGB
" -28(%2, %0, 4), %%mm0 \n\t"
2736 PAVGB
" -20(%2, %0, 4), %%mm1 \n\t"
2737 PAVGB
" -12(%2, %0, 4), %%mm2 \n\t"
2738 PAVGB
" - 4(%2, %0, 4), %%mm3 \n\t"
2739 "pand %%mm7, %%mm0 \n\t"
2740 "pand %%mm7, %%mm1 \n\t"
2741 "pand %%mm7, %%mm2 \n\t"
2742 "pand %%mm7, %%mm3 \n\t"
2743 "packuswb %%mm1, %%mm0 \n\t"
2744 "packuswb %%mm3, %%mm2 \n\t"
2745 "movq %%mm0, %%mm1 \n\t"
2746 "movq %%mm2, %%mm3 \n\t"
2747 "psrlw $8, %%mm0 \n\t"
2748 "psrlw $8, %%mm2 \n\t"
2749 "pand %%mm7, %%mm1 \n\t"
2750 "pand %%mm7, %%mm3 \n\t"
2751 "packuswb %%mm2, %%mm0 \n\t"
2752 "packuswb %%mm3, %%mm1 \n\t"
2753 MOVNTQ
" %%mm0,- 7(%4, %0) \n\t"
2754 MOVNTQ
" %%mm1,- 7(%3, %0) \n\t"
2758 : "r"(src0
), "r"(src1
), "r"(dst0
), "r"(dst1
)
2764 dst0
[count
]= (src0
[4*count
+0]+src1
[4*count
+0])>>1;
2765 dst1
[count
]= (src0
[4*count
+2]+src1
[4*count
+2])>>1;
2770 static void RENAME(extract_odd2
)(const uint8_t *src
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2780 "pcmpeqw %%mm7, %%mm7 \n\t"
2781 "psrlw $8, %%mm7 \n\t"
2783 "movq -28(%1, %0, 4), %%mm0 \n\t"
2784 "movq -20(%1, %0, 4), %%mm1 \n\t"
2785 "movq -12(%1, %0, 4), %%mm2 \n\t"
2786 "movq -4(%1, %0, 4), %%mm3 \n\t"
2787 "psrlw $8, %%mm0 \n\t"
2788 "psrlw $8, %%mm1 \n\t"
2789 "psrlw $8, %%mm2 \n\t"
2790 "psrlw $8, %%mm3 \n\t"
2791 "packuswb %%mm1, %%mm0 \n\t"
2792 "packuswb %%mm3, %%mm2 \n\t"
2793 "movq %%mm0, %%mm1 \n\t"
2794 "movq %%mm2, %%mm3 \n\t"
2795 "psrlw $8, %%mm0 \n\t"
2796 "psrlw $8, %%mm2 \n\t"
2797 "pand %%mm7, %%mm1 \n\t"
2798 "pand %%mm7, %%mm3 \n\t"
2799 "packuswb %%mm2, %%mm0 \n\t"
2800 "packuswb %%mm3, %%mm1 \n\t"
2801 MOVNTQ
" %%mm0,- 7(%3, %0) \n\t"
2802 MOVNTQ
" %%mm1,- 7(%2, %0) \n\t"
2806 : "r"(src
), "r"(dst0
), "r"(dst1
)
2813 dst0
[count
]= src
[4*count
+0];
2814 dst1
[count
]= src
[4*count
+2];
2819 static void RENAME(extract_odd2avg
)(const uint8_t *src0
, const uint8_t *src1
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2830 "pcmpeqw %%mm7, %%mm7 \n\t"
2831 "psrlw $8, %%mm7 \n\t"
2833 "movq -28(%1, %0, 4), %%mm0 \n\t"
2834 "movq -20(%1, %0, 4), %%mm1 \n\t"
2835 "movq -12(%1, %0, 4), %%mm2 \n\t"
2836 "movq -4(%1, %0, 4), %%mm3 \n\t"
2837 PAVGB
" -28(%2, %0, 4), %%mm0 \n\t"
2838 PAVGB
" -20(%2, %0, 4), %%mm1 \n\t"
2839 PAVGB
" -12(%2, %0, 4), %%mm2 \n\t"
2840 PAVGB
" - 4(%2, %0, 4), %%mm3 \n\t"
2841 "psrlw $8, %%mm0 \n\t"
2842 "psrlw $8, %%mm1 \n\t"
2843 "psrlw $8, %%mm2 \n\t"
2844 "psrlw $8, %%mm3 \n\t"
2845 "packuswb %%mm1, %%mm0 \n\t"
2846 "packuswb %%mm3, %%mm2 \n\t"
2847 "movq %%mm0, %%mm1 \n\t"
2848 "movq %%mm2, %%mm3 \n\t"
2849 "psrlw $8, %%mm0 \n\t"
2850 "psrlw $8, %%mm2 \n\t"
2851 "pand %%mm7, %%mm1 \n\t"
2852 "pand %%mm7, %%mm3 \n\t"
2853 "packuswb %%mm2, %%mm0 \n\t"
2854 "packuswb %%mm3, %%mm1 \n\t"
2855 MOVNTQ
" %%mm0,- 7(%4, %0) \n\t"
2856 MOVNTQ
" %%mm1,- 7(%3, %0) \n\t"
2860 : "r"(src0
), "r"(src1
), "r"(dst0
), "r"(dst1
)
2868 dst0
[count
]= (src0
[4*count
+0]+src1
[4*count
+0])>>1;
2869 dst1
[count
]= (src0
[4*count
+2]+src1
[4*count
+2])>>1;
2874 static void RENAME(yuyvtoyuv420
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2875 long width
, long height
,
2876 long lumStride
, long chromStride
, long srcStride
)
2879 const long chromWidth
= -((-width
)>>1);
2881 for (y
=0; y
<height
; y
++) {
2882 RENAME(extract_even
)(src
, ydst
, width
);
2884 RENAME(extract_odd2avg
)(src
-srcStride
, src
, udst
, vdst
, chromWidth
);
2901 static void RENAME(yuyvtoyuv422
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2902 long width
, long height
,
2903 long lumStride
, long chromStride
, long srcStride
)
2906 const long chromWidth
= -((-width
)>>1);
2908 for (y
=0; y
<height
; y
++) {
2909 RENAME(extract_even
)(src
, ydst
, width
);
2910 RENAME(extract_odd2
)(src
, udst
, vdst
, chromWidth
);
2926 static void RENAME(uyvytoyuv420
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2927 long width
, long height
,
2928 long lumStride
, long chromStride
, long srcStride
)
2931 const long chromWidth
= -((-width
)>>1);
2933 for (y
=0; y
<height
; y
++) {
2934 RENAME(extract_even
)(src
+1, ydst
, width
);
2936 RENAME(extract_even2avg
)(src
-srcStride
, src
, udst
, vdst
, chromWidth
);
2953 static void RENAME(uyvytoyuv422
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2954 long width
, long height
,
2955 long lumStride
, long chromStride
, long srcStride
)
2958 const long chromWidth
= -((-width
)>>1);
2960 for (y
=0; y
<height
; y
++) {
2961 RENAME(extract_even
)(src
+1, ydst
, width
);
2962 RENAME(extract_even2
)(src
, udst
, vdst
, chromWidth
);
2978 static inline void RENAME(rgb2rgb_init
)(void)
2980 rgb15to16
= RENAME(rgb15to16
);
2981 rgb15tobgr24
= RENAME(rgb15tobgr24
);
2982 rgb15to32
= RENAME(rgb15to32
);
2983 rgb16tobgr24
= RENAME(rgb16tobgr24
);
2984 rgb16to32
= RENAME(rgb16to32
);
2985 rgb16to15
= RENAME(rgb16to15
);
2986 rgb24tobgr16
= RENAME(rgb24tobgr16
);
2987 rgb24tobgr15
= RENAME(rgb24tobgr15
);
2988 rgb24tobgr32
= RENAME(rgb24tobgr32
);
2989 rgb32to16
= RENAME(rgb32to16
);
2990 rgb32to15
= RENAME(rgb32to15
);
2991 rgb32tobgr24
= RENAME(rgb32tobgr24
);
2992 rgb24to15
= RENAME(rgb24to15
);
2993 rgb24to16
= RENAME(rgb24to16
);
2994 rgb24tobgr24
= RENAME(rgb24tobgr24
);
2995 rgb32tobgr32
= RENAME(rgb32tobgr32
);
2996 rgb32tobgr16
= RENAME(rgb32tobgr16
);
2997 rgb32tobgr15
= RENAME(rgb32tobgr15
);
2998 yv12toyuy2
= RENAME(yv12toyuy2
);
2999 yv12touyvy
= RENAME(yv12touyvy
);
3000 yuv422ptoyuy2
= RENAME(yuv422ptoyuy2
);
3001 yuv422ptouyvy
= RENAME(yuv422ptouyvy
);
3002 yuy2toyv12
= RENAME(yuy2toyv12
);
3003 // yvu9toyv12 = RENAME(yvu9toyv12);
3004 planar2x
= RENAME(planar2x
);
3005 rgb24toyv12
= RENAME(rgb24toyv12
);
3006 interleaveBytes
= RENAME(interleaveBytes
);
3007 vu9_to_vu12
= RENAME(vu9_to_vu12
);
3008 yvu9_to_yuy2
= RENAME(yvu9_to_yuy2
);
3010 uyvytoyuv420
= RENAME(uyvytoyuv420
);
3011 uyvytoyuv422
= RENAME(uyvytoyuv422
);
3012 yuyvtoyuv420
= RENAME(yuyvtoyuv420
);
3013 yuyvtoyuv422
= RENAME(yuyvtoyuv422
);