3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Modified for use with MPlayer, see libmpeg-0.4.1.diff for the exact changes.
24 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
30 #if defined(ARCH_X86) || defined(ARCH_X86_64)
35 #include "attributes.h"
36 #include "mpeg2_internal.h"
43 /* MMX code - needs a rewrite */
46 * Motion Compensation frequently needs to average values using the
47 * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
48 * to compute this, but it's been left out of classic MMX.
50 * We need to be careful of overflows when doing this computation.
51 * Rather than unpacking data to 16-bits, which reduces parallelism,
52 * we use the following formulas:
54 * (x+y)>>1 == (x&y)+((x^y)>>1)
55 * (x+y+1)>>1 == (x|y)-((x^y)>>1)
58 /* some rounding constants */
59 static mmx_t mask1
= {0xfefefefefefefefeLL
};
60 static mmx_t round4
= {0x0002000200020002LL
};
63 * This code should probably be compiled with loop unrolling
64 * (ie, -funroll-loops in gcc)becuase some of the loops
65 * use a small static number of iterations. This was written
66 * with the assumption the compiler knows best about when
70 static inline void mmx_zero_reg ()
76 static inline void mmx_average_2_U8 (uint8_t * dest
, const uint8_t * src1
,
79 /* *dest = (*src1 + *src2 + 1)/ 2; */
81 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
82 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
84 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
85 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
87 pxor_r2r (mm1
, mm3
); /* xor src1 and src2 */
88 pand_m2r (mask1
, mm3
); /* mask lower bits */
89 psrlq_i2r (1, mm3
); /* /2 */
90 por_r2r (mm2
, mm4
); /* or src1 and src2 */
91 psubb_r2r (mm3
, mm4
); /* subtract subresults */
92 movq_r2m (mm4
, *dest
); /* store result in dest */
95 static inline void mmx_interp_average_2_U8 (uint8_t * dest
,
99 /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
101 movq_m2r (*dest
, mm1
); /* load 8 dest bytes */
102 movq_r2r (mm1
, mm2
); /* copy 8 dest bytes */
104 movq_m2r (*src1
, mm3
); /* load 8 src1 bytes */
105 movq_r2r (mm3
, mm4
); /* copy 8 src1 bytes */
107 movq_m2r (*src2
, mm5
); /* load 8 src2 bytes */
108 movq_r2r (mm5
, mm6
); /* copy 8 src2 bytes */
110 pxor_r2r (mm3
, mm5
); /* xor src1 and src2 */
111 pand_m2r (mask1
, mm5
); /* mask lower bits */
112 psrlq_i2r (1, mm5
); /* /2 */
113 por_r2r (mm4
, mm6
); /* or src1 and src2 */
114 psubb_r2r (mm5
, mm6
); /* subtract subresults */
115 movq_r2r (mm6
, mm5
); /* copy subresult */
117 pxor_r2r (mm1
, mm5
); /* xor srcavg and dest */
118 pand_m2r (mask1
, mm5
); /* mask lower bits */
119 psrlq_i2r (1, mm5
); /* /2 */
120 por_r2r (mm2
, mm6
); /* or srcavg and dest */
121 psubb_r2r (mm5
, mm6
); /* subtract subresults */
122 movq_r2m (mm6
, *dest
); /* store result in dest */
125 static inline void mmx_average_4_U8 (uint8_t * dest
, const uint8_t * src1
,
126 const uint8_t * src2
,
127 const uint8_t * src3
,
128 const uint8_t * src4
)
130 /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
132 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
133 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
135 punpcklbw_r2r (mm0
, mm1
); /* unpack low src1 bytes */
136 punpckhbw_r2r (mm0
, mm2
); /* unpack high src1 bytes */
138 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
139 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
141 punpcklbw_r2r (mm0
, mm3
); /* unpack low src2 bytes */
142 punpckhbw_r2r (mm0
, mm4
); /* unpack high src2 bytes */
144 paddw_r2r (mm3
, mm1
); /* add lows */
145 paddw_r2r (mm4
, mm2
); /* add highs */
147 /* now have partials in mm1 and mm2 */
149 movq_m2r (*src3
, mm3
); /* load 8 src3 bytes */
150 movq_r2r (mm3
, mm4
); /* copy 8 src3 bytes */
152 punpcklbw_r2r (mm0
, mm3
); /* unpack low src3 bytes */
153 punpckhbw_r2r (mm0
, mm4
); /* unpack high src3 bytes */
155 paddw_r2r (mm3
, mm1
); /* add lows */
156 paddw_r2r (mm4
, mm2
); /* add highs */
158 movq_m2r (*src4
, mm5
); /* load 8 src4 bytes */
159 movq_r2r (mm5
, mm6
); /* copy 8 src4 bytes */
161 punpcklbw_r2r (mm0
, mm5
); /* unpack low src4 bytes */
162 punpckhbw_r2r (mm0
, mm6
); /* unpack high src4 bytes */
164 paddw_r2r (mm5
, mm1
); /* add lows */
165 paddw_r2r (mm6
, mm2
); /* add highs */
167 /* now have subtotal in mm1 and mm2 */
169 paddw_m2r (round4
, mm1
);
170 psraw_i2r (2, mm1
); /* /4 */
171 paddw_m2r (round4
, mm2
);
172 psraw_i2r (2, mm2
); /* /4 */
174 packuswb_r2r (mm2
, mm1
); /* pack (w/ saturation) */
175 movq_r2m (mm1
, *dest
); /* store result in dest */
178 static inline void mmx_interp_average_4_U8 (uint8_t * dest
,
179 const uint8_t * src1
,
180 const uint8_t * src2
,
181 const uint8_t * src3
,
182 const uint8_t * src4
)
184 /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
186 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
187 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
189 punpcklbw_r2r (mm0
, mm1
); /* unpack low src1 bytes */
190 punpckhbw_r2r (mm0
, mm2
); /* unpack high src1 bytes */
192 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
193 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
195 punpcklbw_r2r (mm0
, mm3
); /* unpack low src2 bytes */
196 punpckhbw_r2r (mm0
, mm4
); /* unpack high src2 bytes */
198 paddw_r2r (mm3
, mm1
); /* add lows */
199 paddw_r2r (mm4
, mm2
); /* add highs */
201 /* now have partials in mm1 and mm2 */
203 movq_m2r (*src3
, mm3
); /* load 8 src3 bytes */
204 movq_r2r (mm3
, mm4
); /* copy 8 src3 bytes */
206 punpcklbw_r2r (mm0
, mm3
); /* unpack low src3 bytes */
207 punpckhbw_r2r (mm0
, mm4
); /* unpack high src3 bytes */
209 paddw_r2r (mm3
, mm1
); /* add lows */
210 paddw_r2r (mm4
, mm2
); /* add highs */
212 movq_m2r (*src4
, mm5
); /* load 8 src4 bytes */
213 movq_r2r (mm5
, mm6
); /* copy 8 src4 bytes */
215 punpcklbw_r2r (mm0
, mm5
); /* unpack low src4 bytes */
216 punpckhbw_r2r (mm0
, mm6
); /* unpack high src4 bytes */
218 paddw_r2r (mm5
, mm1
); /* add lows */
219 paddw_r2r (mm6
, mm2
); /* add highs */
221 paddw_m2r (round4
, mm1
);
222 psraw_i2r (2, mm1
); /* /4 */
223 paddw_m2r (round4
, mm2
);
224 psraw_i2r (2, mm2
); /* /4 */
226 /* now have subtotal/4 in mm1 and mm2 */
228 movq_m2r (*dest
, mm3
); /* load 8 dest bytes */
229 movq_r2r (mm3
, mm4
); /* copy 8 dest bytes */
231 packuswb_r2r (mm2
, mm1
); /* pack (w/ saturation) */
232 movq_r2r (mm1
,mm2
); /* copy subresult */
234 pxor_r2r (mm1
, mm3
); /* xor srcavg and dest */
235 pand_m2r (mask1
, mm3
); /* mask lower bits */
236 psrlq_i2r (1, mm3
); /* /2 */
237 por_r2r (mm2
, mm4
); /* or srcavg and dest */
238 psubb_r2r (mm3
, mm4
); /* subtract subresults */
239 movq_r2m (mm4
, *dest
); /* store result in dest */
242 /*-----------------------------------------------------------------------*/
244 static inline void MC_avg_mmx (const int width
, int height
, uint8_t * dest
,
245 const uint8_t * ref
, const int stride
)
250 mmx_average_2_U8 (dest
, dest
, ref
);
253 mmx_average_2_U8 (dest
+8, dest
+8, ref
+8);
260 static void MC_avg_o_16_mmx (uint8_t * dest
, const uint8_t * ref
,
261 int stride
, int height
)
263 MC_avg_mmx (16, height
, dest
, ref
, stride
);
266 static void MC_avg_o_8_mmx (uint8_t * dest
, const uint8_t * ref
,
267 int stride
, int height
)
269 MC_avg_mmx (8, height
, dest
, ref
, stride
);
272 /*-----------------------------------------------------------------------*/
274 static inline void MC_put_mmx (const int width
, int height
, uint8_t * dest
,
275 const uint8_t * ref
, const int stride
)
280 movq_m2r (* ref
, mm1
); /* load 8 ref bytes */
281 movq_r2m (mm1
,* dest
); /* store 8 bytes at curr */
285 movq_m2r (* (ref
+8), mm1
); /* load 8 ref bytes */
286 movq_r2m (mm1
,* (dest
+8)); /* store 8 bytes at curr */
294 static void MC_put_o_16_mmx (uint8_t * dest
, const uint8_t * ref
,
295 int stride
, int height
)
297 MC_put_mmx (16, height
, dest
, ref
, stride
);
300 static void MC_put_o_8_mmx (uint8_t * dest
, const uint8_t * ref
,
301 int stride
, int height
)
303 MC_put_mmx (8, height
, dest
, ref
, stride
);
306 /*-----------------------------------------------------------------------*/
308 /* Half pixel interpolation in the x direction */
309 static inline void MC_avg_x_mmx (const int width
, int height
, uint8_t * dest
,
310 const uint8_t * ref
, const int stride
)
315 mmx_interp_average_2_U8 (dest
, ref
, ref
+1);
318 mmx_interp_average_2_U8 (dest
+8, ref
+8, ref
+9);
325 static void MC_avg_x_16_mmx (uint8_t * dest
, const uint8_t * ref
,
326 int stride
, int height
)
328 MC_avg_x_mmx (16, height
, dest
, ref
, stride
);
331 static void MC_avg_x_8_mmx (uint8_t * dest
, const uint8_t * ref
,
332 int stride
, int height
)
334 MC_avg_x_mmx (8, height
, dest
, ref
, stride
);
337 /*-----------------------------------------------------------------------*/
339 static inline void MC_put_x_mmx (const int width
, int height
, uint8_t * dest
,
340 const uint8_t * ref
, const int stride
)
345 mmx_average_2_U8 (dest
, ref
, ref
+1);
348 mmx_average_2_U8 (dest
+8, ref
+8, ref
+9);
355 static void MC_put_x_16_mmx (uint8_t * dest
, const uint8_t * ref
,
356 int stride
, int height
)
358 MC_put_x_mmx (16, height
, dest
, ref
, stride
);
361 static void MC_put_x_8_mmx (uint8_t * dest
, const uint8_t * ref
,
362 int stride
, int height
)
364 MC_put_x_mmx (8, height
, dest
, ref
, stride
);
367 /*-----------------------------------------------------------------------*/
369 static inline void MC_avg_xy_mmx (const int width
, int height
, uint8_t * dest
,
370 const uint8_t * ref
, const int stride
)
372 const uint8_t * ref_next
= ref
+ stride
;
377 mmx_interp_average_4_U8 (dest
, ref
, ref
+1, ref_next
, ref_next
+1);
380 mmx_interp_average_4_U8 (dest
+8, ref
+8, ref
+9,
381 ref_next
+8, ref_next
+9);
389 static void MC_avg_xy_16_mmx (uint8_t * dest
, const uint8_t * ref
,
390 int stride
, int height
)
392 MC_avg_xy_mmx (16, height
, dest
, ref
, stride
);
395 static void MC_avg_xy_8_mmx (uint8_t * dest
, const uint8_t * ref
,
396 int stride
, int height
)
398 MC_avg_xy_mmx (8, height
, dest
, ref
, stride
);
401 /*-----------------------------------------------------------------------*/
403 static inline void MC_put_xy_mmx (const int width
, int height
, uint8_t * dest
,
404 const uint8_t * ref
, const int stride
)
406 const uint8_t * ref_next
= ref
+ stride
;
411 mmx_average_4_U8 (dest
, ref
, ref
+1, ref_next
, ref_next
+1);
414 mmx_average_4_U8 (dest
+8, ref
+8, ref
+9, ref_next
+8, ref_next
+9);
422 static void MC_put_xy_16_mmx (uint8_t * dest
, const uint8_t * ref
,
423 int stride
, int height
)
425 MC_put_xy_mmx (16, height
, dest
, ref
, stride
);
428 static void MC_put_xy_8_mmx (uint8_t * dest
, const uint8_t * ref
,
429 int stride
, int height
)
431 MC_put_xy_mmx (8, height
, dest
, ref
, stride
);
434 /*-----------------------------------------------------------------------*/
436 static inline void MC_avg_y_mmx (const int width
, int height
, uint8_t * dest
,
437 const uint8_t * ref
, const int stride
)
439 const uint8_t * ref_next
= ref
+ stride
;
444 mmx_interp_average_2_U8 (dest
, ref
, ref_next
);
447 mmx_interp_average_2_U8 (dest
+8, ref
+8, ref_next
+8);
455 static void MC_avg_y_16_mmx (uint8_t * dest
, const uint8_t * ref
,
456 int stride
, int height
)
458 MC_avg_y_mmx (16, height
, dest
, ref
, stride
);
461 static void MC_avg_y_8_mmx (uint8_t * dest
, const uint8_t * ref
,
462 int stride
, int height
)
464 MC_avg_y_mmx (8, height
, dest
, ref
, stride
);
467 /*-----------------------------------------------------------------------*/
469 static inline void MC_put_y_mmx (const int width
, int height
, uint8_t * dest
,
470 const uint8_t * ref
, const int stride
)
472 const uint8_t * ref_next
= ref
+ stride
;
477 mmx_average_2_U8 (dest
, ref
, ref_next
);
480 mmx_average_2_U8 (dest
+8, ref
+8, ref_next
+8);
488 static void MC_put_y_16_mmx (uint8_t * dest
, const uint8_t * ref
,
489 int stride
, int height
)
491 MC_put_y_mmx (16, height
, dest
, ref
, stride
);
494 static void MC_put_y_8_mmx (uint8_t * dest
, const uint8_t * ref
,
495 int stride
, int height
)
497 MC_put_y_mmx (8, height
, dest
, ref
, stride
);
501 MPEG2_MC_EXTERN (mmx
)
509 /* CPU_MMXEXT/CPU_3DNOW adaptation layer */
511 #define pavg_r2r(src,dest) \
513 if (cpu == CPU_MMXEXT) \
514 pavgb_r2r (src, dest); \
516 pavgusb_r2r (src, dest); \
519 #define pavg_m2r(src,dest) \
521 if (cpu == CPU_MMXEXT) \
522 pavgb_m2r (src, dest); \
524 pavgusb_m2r (src, dest); \
528 /* CPU_MMXEXT code */
531 static inline void MC_put1_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
535 movq_m2r (*ref
, mm0
);
536 movq_r2m (mm0
, *dest
);
542 static inline void MC_put1_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
546 movq_m2r (*ref
, mm0
);
547 movq_m2r (*(ref
+8), mm1
);
549 movq_r2m (mm0
, *dest
);
550 movq_r2m (mm1
, *(dest
+8));
555 static inline void MC_avg1_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
556 const int stride
, const int cpu
)
559 movq_m2r (*ref
, mm0
);
560 pavg_m2r (*dest
, mm0
);
562 movq_r2m (mm0
, *dest
);
567 static inline void MC_avg1_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
568 const int stride
, const int cpu
)
571 movq_m2r (*ref
, mm0
);
572 movq_m2r (*(ref
+8), mm1
);
573 pavg_m2r (*dest
, mm0
);
574 pavg_m2r (*(dest
+8), mm1
);
575 movq_r2m (mm0
, *dest
);
577 movq_r2m (mm1
, *(dest
+8));
582 static inline void MC_put2_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
583 const int stride
, const int offset
,
587 movq_m2r (*ref
, mm0
);
588 pavg_m2r (*(ref
+offset
), mm0
);
590 movq_r2m (mm0
, *dest
);
595 static inline void MC_put2_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
596 const int stride
, const int offset
,
600 movq_m2r (*ref
, mm0
);
601 movq_m2r (*(ref
+8), mm1
);
602 pavg_m2r (*(ref
+offset
), mm0
);
603 pavg_m2r (*(ref
+offset
+8), mm1
);
604 movq_r2m (mm0
, *dest
);
606 movq_r2m (mm1
, *(dest
+8));
611 static inline void MC_avg2_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
612 const int stride
, const int offset
,
616 movq_m2r (*ref
, mm0
);
617 pavg_m2r (*(ref
+offset
), mm0
);
618 pavg_m2r (*dest
, mm0
);
620 movq_r2m (mm0
, *dest
);
625 static inline void MC_avg2_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
626 const int stride
, const int offset
,
630 movq_m2r (*ref
, mm0
);
631 movq_m2r (*(ref
+8), mm1
);
632 pavg_m2r (*(ref
+offset
), mm0
);
633 pavg_m2r (*(ref
+offset
+8), mm1
);
634 pavg_m2r (*dest
, mm0
);
635 pavg_m2r (*(dest
+8), mm1
);
637 movq_r2m (mm0
, *dest
);
638 movq_r2m (mm1
, *(dest
+8));
643 static mmx_t mask_one
= {0x0101010101010101LL
};
645 static inline void MC_put4_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
646 const int stride
, const int cpu
)
648 movq_m2r (*ref
, mm0
);
649 movq_m2r (*(ref
+1), mm1
);
656 movq_m2r (*ref
, mm2
);
659 movq_m2r (*(ref
+1), mm3
);
671 pand_m2r (mask_one
, mm7
);
673 psubusb_r2r (mm7
, mm0
);
676 movq_r2m (mm0
, *dest
);
679 movq_r2r (mm6
, mm7
); /* unroll ! */
680 movq_r2r (mm2
, mm0
); /* unroll ! */
684 static inline void MC_put4_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
685 const int stride
, const int cpu
)
688 movq_m2r (*ref
, mm0
);
689 movq_m2r (*(ref
+stride
+1), mm1
);
691 movq_m2r (*(ref
+1), mm2
);
693 movq_m2r (*(ref
+stride
), mm3
);
702 pand_m2r (mask_one
, mm7
);
704 psubusb_r2r (mm7
, mm0
);
705 movq_r2m (mm0
, *dest
);
707 movq_m2r (*(ref
+8), mm0
);
708 movq_m2r (*(ref
+stride
+9), mm1
);
710 movq_m2r (*(ref
+9), mm2
);
712 movq_m2r (*(ref
+stride
+8), mm3
);
721 pand_m2r (mask_one
, mm7
);
723 psubusb_r2r (mm7
, mm0
);
725 movq_r2m (mm0
, *(dest
+8));
730 static inline void MC_avg4_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
731 const int stride
, const int cpu
)
734 movq_m2r (*ref
, mm0
);
735 movq_m2r (*(ref
+stride
+1), mm1
);
737 movq_m2r (*(ref
+1), mm2
);
739 movq_m2r (*(ref
+stride
), mm3
);
748 pand_m2r (mask_one
, mm7
);
750 psubusb_r2r (mm7
, mm0
);
751 movq_m2r (*dest
, mm1
);
754 movq_r2m (mm0
, *dest
);
759 static inline void MC_avg4_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
760 const int stride
, const int cpu
)
763 movq_m2r (*ref
, mm0
);
764 movq_m2r (*(ref
+stride
+1), mm1
);
766 movq_m2r (*(ref
+1), mm2
);
768 movq_m2r (*(ref
+stride
), mm3
);
777 pand_m2r (mask_one
, mm7
);
779 psubusb_r2r (mm7
, mm0
);
780 movq_m2r (*dest
, mm1
);
782 movq_r2m (mm0
, *dest
);
784 movq_m2r (*(ref
+8), mm0
);
785 movq_m2r (*(ref
+stride
+9), mm1
);
787 movq_m2r (*(ref
+9), mm2
);
789 movq_m2r (*(ref
+stride
+8), mm3
);
798 pand_m2r (mask_one
, mm7
);
800 psubusb_r2r (mm7
, mm0
);
801 movq_m2r (*(dest
+8), mm1
);
804 movq_r2m (mm0
, *(dest
+8));
809 static void MC_avg_o_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
810 int stride
, int height
)
812 MC_avg1_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
815 static void MC_avg_o_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
816 int stride
, int height
)
818 MC_avg1_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
821 static void MC_put_o_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
822 int stride
, int height
)
824 MC_put1_16 (height
, dest
, ref
, stride
);
827 static void MC_put_o_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
828 int stride
, int height
)
830 MC_put1_8 (height
, dest
, ref
, stride
);
833 static void MC_avg_x_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
834 int stride
, int height
)
836 MC_avg2_16 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
839 static void MC_avg_x_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
840 int stride
, int height
)
842 MC_avg2_8 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
845 static void MC_put_x_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
846 int stride
, int height
)
848 MC_put2_16 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
851 static void MC_put_x_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
852 int stride
, int height
)
854 MC_put2_8 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
857 static void MC_avg_y_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
858 int stride
, int height
)
860 MC_avg2_16 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
863 static void MC_avg_y_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
864 int stride
, int height
)
866 MC_avg2_8 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
869 static void MC_put_y_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
870 int stride
, int height
)
872 MC_put2_16 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
875 static void MC_put_y_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
876 int stride
, int height
)
878 MC_put2_8 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
881 static void MC_avg_xy_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
882 int stride
, int height
)
884 MC_avg4_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
887 static void MC_avg_xy_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
888 int stride
, int height
)
890 MC_avg4_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
893 static void MC_put_xy_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
894 int stride
, int height
)
896 MC_put4_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
899 static void MC_put_xy_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
900 int stride
, int height
)
902 MC_put4_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
906 MPEG2_MC_EXTERN (mmxext
)
910 static void MC_avg_o_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
911 int stride
, int height
)
913 MC_avg1_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
916 static void MC_avg_o_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
917 int stride
, int height
)
919 MC_avg1_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
922 static void MC_put_o_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
923 int stride
, int height
)
925 MC_put1_16 (height
, dest
, ref
, stride
);
928 static void MC_put_o_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
929 int stride
, int height
)
931 MC_put1_8 (height
, dest
, ref
, stride
);
934 static void MC_avg_x_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
935 int stride
, int height
)
937 MC_avg2_16 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
940 static void MC_avg_x_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
941 int stride
, int height
)
943 MC_avg2_8 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
946 static void MC_put_x_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
947 int stride
, int height
)
949 MC_put2_16 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
952 static void MC_put_x_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
953 int stride
, int height
)
955 MC_put2_8 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
958 static void MC_avg_y_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
959 int stride
, int height
)
961 MC_avg2_16 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
964 static void MC_avg_y_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
965 int stride
, int height
)
967 MC_avg2_8 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
970 static void MC_put_y_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
971 int stride
, int height
)
973 MC_put2_16 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
976 static void MC_put_y_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
977 int stride
, int height
)
979 MC_put2_8 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
982 static void MC_avg_xy_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
983 int stride
, int height
)
985 MC_avg4_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
988 static void MC_avg_xy_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
989 int stride
, int height
)
991 MC_avg4_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
994 static void MC_put_xy_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
995 int stride
, int height
)
997 MC_put4_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
1000 static void MC_put_xy_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
1001 int stride
, int height
)
1003 MC_put4_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
1007 MPEG2_MC_EXTERN (3dnow
)