3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #if ARCH_X86 || ARCH_X86_64
31 #include "attributes.h"
32 #include "mpeg2_internal.h"
40 /* MMX code - needs a rewrite */
43 * Motion Compensation frequently needs to average values using the
44 * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
45 * to compute this, but it's been left out of classic MMX.
47 * We need to be careful of overflows when doing this computation.
48 * Rather than unpacking data to 16-bits, which reduces parallelism,
49 * we use the following formulas:
51 * (x+y)>>1 == (x&y)+((x^y)>>1)
52 * (x+y+1)>>1 == (x|y)-((x^y)>>1)
55 /* some rounding constants */
56 static mmx_t mask1
= {0xfefefefefefefefeLL
};
57 static mmx_t round4
= {0x0002000200020002LL
};
60 * This code should probably be compiled with loop unrolling
61 * (ie, -funroll-loops in gcc)becuase some of the loops
62 * use a small static number of iterations. This was written
63 * with the assumption the compiler knows best about when
67 static inline void mmx_zero_reg (void)
73 static inline void mmx_average_2_U8 (uint8_t * dest
, const uint8_t * src1
,
76 /* *dest = (*src1 + *src2 + 1)/ 2; */
78 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
79 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
81 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
82 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
84 pxor_r2r (mm1
, mm3
); /* xor src1 and src2 */
85 pand_m2r (mask1
, mm3
); /* mask lower bits */
86 psrlq_i2r (1, mm3
); /* /2 */
87 por_r2r (mm2
, mm4
); /* or src1 and src2 */
88 psubb_r2r (mm3
, mm4
); /* subtract subresults */
89 movq_r2m (mm4
, *dest
); /* store result in dest */
92 static inline void mmx_interp_average_2_U8 (uint8_t * dest
,
96 /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
98 movq_m2r (*dest
, mm1
); /* load 8 dest bytes */
99 movq_r2r (mm1
, mm2
); /* copy 8 dest bytes */
101 movq_m2r (*src1
, mm3
); /* load 8 src1 bytes */
102 movq_r2r (mm3
, mm4
); /* copy 8 src1 bytes */
104 movq_m2r (*src2
, mm5
); /* load 8 src2 bytes */
105 movq_r2r (mm5
, mm6
); /* copy 8 src2 bytes */
107 pxor_r2r (mm3
, mm5
); /* xor src1 and src2 */
108 pand_m2r (mask1
, mm5
); /* mask lower bits */
109 psrlq_i2r (1, mm5
); /* /2 */
110 por_r2r (mm4
, mm6
); /* or src1 and src2 */
111 psubb_r2r (mm5
, mm6
); /* subtract subresults */
112 movq_r2r (mm6
, mm5
); /* copy subresult */
114 pxor_r2r (mm1
, mm5
); /* xor srcavg and dest */
115 pand_m2r (mask1
, mm5
); /* mask lower bits */
116 psrlq_i2r (1, mm5
); /* /2 */
117 por_r2r (mm2
, mm6
); /* or srcavg and dest */
118 psubb_r2r (mm5
, mm6
); /* subtract subresults */
119 movq_r2m (mm6
, *dest
); /* store result in dest */
122 static inline void mmx_average_4_U8 (uint8_t * dest
, const uint8_t * src1
,
123 const uint8_t * src2
,
124 const uint8_t * src3
,
125 const uint8_t * src4
)
127 /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
129 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
130 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
132 punpcklbw_r2r (mm0
, mm1
); /* unpack low src1 bytes */
133 punpckhbw_r2r (mm0
, mm2
); /* unpack high src1 bytes */
135 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
136 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
138 punpcklbw_r2r (mm0
, mm3
); /* unpack low src2 bytes */
139 punpckhbw_r2r (mm0
, mm4
); /* unpack high src2 bytes */
141 paddw_r2r (mm3
, mm1
); /* add lows */
142 paddw_r2r (mm4
, mm2
); /* add highs */
144 /* now have partials in mm1 and mm2 */
146 movq_m2r (*src3
, mm3
); /* load 8 src3 bytes */
147 movq_r2r (mm3
, mm4
); /* copy 8 src3 bytes */
149 punpcklbw_r2r (mm0
, mm3
); /* unpack low src3 bytes */
150 punpckhbw_r2r (mm0
, mm4
); /* unpack high src3 bytes */
152 paddw_r2r (mm3
, mm1
); /* add lows */
153 paddw_r2r (mm4
, mm2
); /* add highs */
155 movq_m2r (*src4
, mm5
); /* load 8 src4 bytes */
156 movq_r2r (mm5
, mm6
); /* copy 8 src4 bytes */
158 punpcklbw_r2r (mm0
, mm5
); /* unpack low src4 bytes */
159 punpckhbw_r2r (mm0
, mm6
); /* unpack high src4 bytes */
161 paddw_r2r (mm5
, mm1
); /* add lows */
162 paddw_r2r (mm6
, mm2
); /* add highs */
164 /* now have subtotal in mm1 and mm2 */
166 paddw_m2r (round4
, mm1
);
167 psraw_i2r (2, mm1
); /* /4 */
168 paddw_m2r (round4
, mm2
);
169 psraw_i2r (2, mm2
); /* /4 */
171 packuswb_r2r (mm2
, mm1
); /* pack (w/ saturation) */
172 movq_r2m (mm1
, *dest
); /* store result in dest */
175 static inline void mmx_interp_average_4_U8 (uint8_t * dest
,
176 const uint8_t * src1
,
177 const uint8_t * src2
,
178 const uint8_t * src3
,
179 const uint8_t * src4
)
181 /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
183 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
184 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
186 punpcklbw_r2r (mm0
, mm1
); /* unpack low src1 bytes */
187 punpckhbw_r2r (mm0
, mm2
); /* unpack high src1 bytes */
189 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
190 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
192 punpcklbw_r2r (mm0
, mm3
); /* unpack low src2 bytes */
193 punpckhbw_r2r (mm0
, mm4
); /* unpack high src2 bytes */
195 paddw_r2r (mm3
, mm1
); /* add lows */
196 paddw_r2r (mm4
, mm2
); /* add highs */
198 /* now have partials in mm1 and mm2 */
200 movq_m2r (*src3
, mm3
); /* load 8 src3 bytes */
201 movq_r2r (mm3
, mm4
); /* copy 8 src3 bytes */
203 punpcklbw_r2r (mm0
, mm3
); /* unpack low src3 bytes */
204 punpckhbw_r2r (mm0
, mm4
); /* unpack high src3 bytes */
206 paddw_r2r (mm3
, mm1
); /* add lows */
207 paddw_r2r (mm4
, mm2
); /* add highs */
209 movq_m2r (*src4
, mm5
); /* load 8 src4 bytes */
210 movq_r2r (mm5
, mm6
); /* copy 8 src4 bytes */
212 punpcklbw_r2r (mm0
, mm5
); /* unpack low src4 bytes */
213 punpckhbw_r2r (mm0
, mm6
); /* unpack high src4 bytes */
215 paddw_r2r (mm5
, mm1
); /* add lows */
216 paddw_r2r (mm6
, mm2
); /* add highs */
218 paddw_m2r (round4
, mm1
);
219 psraw_i2r (2, mm1
); /* /4 */
220 paddw_m2r (round4
, mm2
);
221 psraw_i2r (2, mm2
); /* /4 */
223 /* now have subtotal/4 in mm1 and mm2 */
225 movq_m2r (*dest
, mm3
); /* load 8 dest bytes */
226 movq_r2r (mm3
, mm4
); /* copy 8 dest bytes */
228 packuswb_r2r (mm2
, mm1
); /* pack (w/ saturation) */
229 movq_r2r (mm1
,mm2
); /* copy subresult */
231 pxor_r2r (mm1
, mm3
); /* xor srcavg and dest */
232 pand_m2r (mask1
, mm3
); /* mask lower bits */
233 psrlq_i2r (1, mm3
); /* /2 */
234 por_r2r (mm2
, mm4
); /* or srcavg and dest */
235 psubb_r2r (mm3
, mm4
); /* subtract subresults */
236 movq_r2m (mm4
, *dest
); /* store result in dest */
239 /*-----------------------------------------------------------------------*/
241 static inline void MC_avg_mmx (const int width
, int height
, uint8_t * dest
,
242 const uint8_t * ref
, const int stride
)
247 mmx_average_2_U8 (dest
, dest
, ref
);
250 mmx_average_2_U8 (dest
+8, dest
+8, ref
+8);
257 static void MC_avg_o_16_mmx (uint8_t * dest
, const uint8_t * ref
,
258 int stride
, int height
)
260 MC_avg_mmx (16, height
, dest
, ref
, stride
);
263 static void MC_avg_o_8_mmx (uint8_t * dest
, const uint8_t * ref
,
264 int stride
, int height
)
266 MC_avg_mmx (8, height
, dest
, ref
, stride
);
269 /*-----------------------------------------------------------------------*/
271 static inline void MC_put_mmx (const int width
, int height
, uint8_t * dest
,
272 const uint8_t * ref
, const int stride
)
277 movq_m2r (* ref
, mm1
); /* load 8 ref bytes */
278 movq_r2m (mm1
,* dest
); /* store 8 bytes at curr */
282 movq_m2r (* (ref
+8), mm1
); /* load 8 ref bytes */
283 movq_r2m (mm1
,* (dest
+8)); /* store 8 bytes at curr */
291 static void MC_put_o_16_mmx (uint8_t * dest
, const uint8_t * ref
,
292 int stride
, int height
)
294 MC_put_mmx (16, height
, dest
, ref
, stride
);
297 static void MC_put_o_8_mmx (uint8_t * dest
, const uint8_t * ref
,
298 int stride
, int height
)
300 MC_put_mmx (8, height
, dest
, ref
, stride
);
303 /*-----------------------------------------------------------------------*/
305 /* Half pixel interpolation in the x direction */
306 static inline void MC_avg_x_mmx (const int width
, int height
, uint8_t * dest
,
307 const uint8_t * ref
, const int stride
)
312 mmx_interp_average_2_U8 (dest
, ref
, ref
+1);
315 mmx_interp_average_2_U8 (dest
+8, ref
+8, ref
+9);
322 static void MC_avg_x_16_mmx (uint8_t * dest
, const uint8_t * ref
,
323 int stride
, int height
)
325 MC_avg_x_mmx (16, height
, dest
, ref
, stride
);
328 static void MC_avg_x_8_mmx (uint8_t * dest
, const uint8_t * ref
,
329 int stride
, int height
)
331 MC_avg_x_mmx (8, height
, dest
, ref
, stride
);
334 /*-----------------------------------------------------------------------*/
336 static inline void MC_put_x_mmx (const int width
, int height
, uint8_t * dest
,
337 const uint8_t * ref
, const int stride
)
342 mmx_average_2_U8 (dest
, ref
, ref
+1);
345 mmx_average_2_U8 (dest
+8, ref
+8, ref
+9);
352 static void MC_put_x_16_mmx (uint8_t * dest
, const uint8_t * ref
,
353 int stride
, int height
)
355 MC_put_x_mmx (16, height
, dest
, ref
, stride
);
358 static void MC_put_x_8_mmx (uint8_t * dest
, const uint8_t * ref
,
359 int stride
, int height
)
361 MC_put_x_mmx (8, height
, dest
, ref
, stride
);
364 /*-----------------------------------------------------------------------*/
366 static inline void MC_avg_xy_mmx (const int width
, int height
, uint8_t * dest
,
367 const uint8_t * ref
, const int stride
)
369 const uint8_t * ref_next
= ref
+ stride
;
374 mmx_interp_average_4_U8 (dest
, ref
, ref
+1, ref_next
, ref_next
+1);
377 mmx_interp_average_4_U8 (dest
+8, ref
+8, ref
+9,
378 ref_next
+8, ref_next
+9);
386 static void MC_avg_xy_16_mmx (uint8_t * dest
, const uint8_t * ref
,
387 int stride
, int height
)
389 MC_avg_xy_mmx (16, height
, dest
, ref
, stride
);
392 static void MC_avg_xy_8_mmx (uint8_t * dest
, const uint8_t * ref
,
393 int stride
, int height
)
395 MC_avg_xy_mmx (8, height
, dest
, ref
, stride
);
398 /*-----------------------------------------------------------------------*/
400 static inline void MC_put_xy_mmx (const int width
, int height
, uint8_t * dest
,
401 const uint8_t * ref
, const int stride
)
403 const uint8_t * ref_next
= ref
+ stride
;
408 mmx_average_4_U8 (dest
, ref
, ref
+1, ref_next
, ref_next
+1);
411 mmx_average_4_U8 (dest
+8, ref
+8, ref
+9, ref_next
+8, ref_next
+9);
419 static void MC_put_xy_16_mmx (uint8_t * dest
, const uint8_t * ref
,
420 int stride
, int height
)
422 MC_put_xy_mmx (16, height
, dest
, ref
, stride
);
425 static void MC_put_xy_8_mmx (uint8_t * dest
, const uint8_t * ref
,
426 int stride
, int height
)
428 MC_put_xy_mmx (8, height
, dest
, ref
, stride
);
431 /*-----------------------------------------------------------------------*/
433 static inline void MC_avg_y_mmx (const int width
, int height
, uint8_t * dest
,
434 const uint8_t * ref
, const int stride
)
436 const uint8_t * ref_next
= ref
+ stride
;
441 mmx_interp_average_2_U8 (dest
, ref
, ref_next
);
444 mmx_interp_average_2_U8 (dest
+8, ref
+8, ref_next
+8);
452 static void MC_avg_y_16_mmx (uint8_t * dest
, const uint8_t * ref
,
453 int stride
, int height
)
455 MC_avg_y_mmx (16, height
, dest
, ref
, stride
);
458 static void MC_avg_y_8_mmx (uint8_t * dest
, const uint8_t * ref
,
459 int stride
, int height
)
461 MC_avg_y_mmx (8, height
, dest
, ref
, stride
);
464 /*-----------------------------------------------------------------------*/
466 static inline void MC_put_y_mmx (const int width
, int height
, uint8_t * dest
,
467 const uint8_t * ref
, const int stride
)
469 const uint8_t * ref_next
= ref
+ stride
;
474 mmx_average_2_U8 (dest
, ref
, ref_next
);
477 mmx_average_2_U8 (dest
+8, ref
+8, ref_next
+8);
485 static void MC_put_y_16_mmx (uint8_t * dest
, const uint8_t * ref
,
486 int stride
, int height
)
488 MC_put_y_mmx (16, height
, dest
, ref
, stride
);
491 static void MC_put_y_8_mmx (uint8_t * dest
, const uint8_t * ref
,
492 int stride
, int height
)
494 MC_put_y_mmx (8, height
, dest
, ref
, stride
);
498 MPEG2_MC_EXTERN (mmx
)
500 #endif /* HAVE_MMX */
507 /* CPU_MMXEXT/CPU_3DNOW adaptation layer */
509 #define pavg_r2r(src,dest) \
511 if (cpu == CPU_MMXEXT) \
512 pavgb_r2r (src, dest); \
514 pavgusb_r2r (src, dest); \
517 #define pavg_m2r(src,dest) \
519 if (cpu == CPU_MMXEXT) \
520 pavgb_m2r (src, dest); \
522 pavgusb_m2r (src, dest); \
526 /* CPU_MMXEXT code */
529 static inline void MC_put1_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
533 movq_m2r (*ref
, mm0
);
534 movq_r2m (mm0
, *dest
);
540 static inline void MC_put1_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
544 movq_m2r (*ref
, mm0
);
545 movq_m2r (*(ref
+8), mm1
);
547 movq_r2m (mm0
, *dest
);
548 movq_r2m (mm1
, *(dest
+8));
553 static inline void MC_avg1_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
554 const int stride
, const int cpu
)
557 movq_m2r (*ref
, mm0
);
558 pavg_m2r (*dest
, mm0
);
560 movq_r2m (mm0
, *dest
);
565 static inline void MC_avg1_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
566 const int stride
, const int cpu
)
569 movq_m2r (*ref
, mm0
);
570 movq_m2r (*(ref
+8), mm1
);
571 pavg_m2r (*dest
, mm0
);
572 pavg_m2r (*(dest
+8), mm1
);
573 movq_r2m (mm0
, *dest
);
575 movq_r2m (mm1
, *(dest
+8));
580 static inline void MC_put2_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
581 const int stride
, const int offset
,
585 movq_m2r (*ref
, mm0
);
586 pavg_m2r (*(ref
+offset
), mm0
);
588 movq_r2m (mm0
, *dest
);
593 static inline void MC_put2_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
594 const int stride
, const int offset
,
598 movq_m2r (*ref
, mm0
);
599 movq_m2r (*(ref
+8), mm1
);
600 pavg_m2r (*(ref
+offset
), mm0
);
601 pavg_m2r (*(ref
+offset
+8), mm1
);
602 movq_r2m (mm0
, *dest
);
604 movq_r2m (mm1
, *(dest
+8));
609 static inline void MC_avg2_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
610 const int stride
, const int offset
,
614 movq_m2r (*ref
, mm0
);
615 pavg_m2r (*(ref
+offset
), mm0
);
616 pavg_m2r (*dest
, mm0
);
618 movq_r2m (mm0
, *dest
);
623 static inline void MC_avg2_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
624 const int stride
, const int offset
,
628 movq_m2r (*ref
, mm0
);
629 movq_m2r (*(ref
+8), mm1
);
630 pavg_m2r (*(ref
+offset
), mm0
);
631 pavg_m2r (*(ref
+offset
+8), mm1
);
632 pavg_m2r (*dest
, mm0
);
633 pavg_m2r (*(dest
+8), mm1
);
635 movq_r2m (mm0
, *dest
);
636 movq_r2m (mm1
, *(dest
+8));
641 static mmx_t mask_one
= {0x0101010101010101LL
};
643 static inline void MC_put4_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
644 const int stride
, const int cpu
)
646 movq_m2r (*ref
, mm0
);
647 movq_m2r (*(ref
+1), mm1
);
654 movq_m2r (*ref
, mm2
);
657 movq_m2r (*(ref
+1), mm3
);
669 pand_m2r (mask_one
, mm7
);
671 psubusb_r2r (mm7
, mm0
);
674 movq_r2m (mm0
, *dest
);
677 movq_r2r (mm6
, mm7
); /* unroll ! */
678 movq_r2r (mm2
, mm0
); /* unroll ! */
682 static inline void MC_put4_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
683 const int stride
, const int cpu
)
686 movq_m2r (*ref
, mm0
);
687 movq_m2r (*(ref
+stride
+1), mm1
);
689 movq_m2r (*(ref
+1), mm2
);
691 movq_m2r (*(ref
+stride
), mm3
);
700 pand_m2r (mask_one
, mm7
);
702 psubusb_r2r (mm7
, mm0
);
703 movq_r2m (mm0
, *dest
);
705 movq_m2r (*(ref
+8), mm0
);
706 movq_m2r (*(ref
+stride
+9), mm1
);
708 movq_m2r (*(ref
+9), mm2
);
710 movq_m2r (*(ref
+stride
+8), mm3
);
719 pand_m2r (mask_one
, mm7
);
721 psubusb_r2r (mm7
, mm0
);
723 movq_r2m (mm0
, *(dest
+8));
728 static inline void MC_avg4_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
729 const int stride
, const int cpu
)
732 movq_m2r (*ref
, mm0
);
733 movq_m2r (*(ref
+stride
+1), mm1
);
735 movq_m2r (*(ref
+1), mm2
);
737 movq_m2r (*(ref
+stride
), mm3
);
746 pand_m2r (mask_one
, mm7
);
748 psubusb_r2r (mm7
, mm0
);
749 movq_m2r (*dest
, mm1
);
752 movq_r2m (mm0
, *dest
);
757 static inline void MC_avg4_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
758 const int stride
, const int cpu
)
761 movq_m2r (*ref
, mm0
);
762 movq_m2r (*(ref
+stride
+1), mm1
);
764 movq_m2r (*(ref
+1), mm2
);
766 movq_m2r (*(ref
+stride
), mm3
);
775 pand_m2r (mask_one
, mm7
);
777 psubusb_r2r (mm7
, mm0
);
778 movq_m2r (*dest
, mm1
);
780 movq_r2m (mm0
, *dest
);
782 movq_m2r (*(ref
+8), mm0
);
783 movq_m2r (*(ref
+stride
+9), mm1
);
785 movq_m2r (*(ref
+9), mm2
);
787 movq_m2r (*(ref
+stride
+8), mm3
);
796 pand_m2r (mask_one
, mm7
);
798 psubusb_r2r (mm7
, mm0
);
799 movq_m2r (*(dest
+8), mm1
);
802 movq_r2m (mm0
, *(dest
+8));
809 static void MC_avg_o_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
810 int stride
, int height
)
812 MC_avg1_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
815 static void MC_avg_o_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
816 int stride
, int height
)
818 MC_avg1_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
821 static void MC_put_o_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
822 int stride
, int height
)
824 MC_put1_16 (height
, dest
, ref
, stride
);
827 static void MC_put_o_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
828 int stride
, int height
)
830 MC_put1_8 (height
, dest
, ref
, stride
);
833 static void MC_avg_x_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
834 int stride
, int height
)
836 MC_avg2_16 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
839 static void MC_avg_x_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
840 int stride
, int height
)
842 MC_avg2_8 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
845 static void MC_put_x_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
846 int stride
, int height
)
848 MC_put2_16 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
851 static void MC_put_x_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
852 int stride
, int height
)
854 MC_put2_8 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
857 static void MC_avg_y_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
858 int stride
, int height
)
860 MC_avg2_16 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
863 static void MC_avg_y_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
864 int stride
, int height
)
866 MC_avg2_8 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
869 static void MC_put_y_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
870 int stride
, int height
)
872 MC_put2_16 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
875 static void MC_put_y_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
876 int stride
, int height
)
878 MC_put2_8 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
881 static void MC_avg_xy_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
882 int stride
, int height
)
884 MC_avg4_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
887 static void MC_avg_xy_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
888 int stride
, int height
)
890 MC_avg4_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
893 static void MC_put_xy_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
894 int stride
, int height
)
896 MC_put4_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
899 static void MC_put_xy_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
900 int stride
, int height
)
902 MC_put4_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
906 MPEG2_MC_EXTERN (mmxext
)
908 #endif /* HAVE_MMX2 */
912 static void MC_avg_o_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
913 int stride
, int height
)
915 MC_avg1_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
918 static void MC_avg_o_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
919 int stride
, int height
)
921 MC_avg1_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
924 static void MC_put_o_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
925 int stride
, int height
)
927 MC_put1_16 (height
, dest
, ref
, stride
);
930 static void MC_put_o_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
931 int stride
, int height
)
933 MC_put1_8 (height
, dest
, ref
, stride
);
936 static void MC_avg_x_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
937 int stride
, int height
)
939 MC_avg2_16 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
942 static void MC_avg_x_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
943 int stride
, int height
)
945 MC_avg2_8 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
948 static void MC_put_x_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
949 int stride
, int height
)
951 MC_put2_16 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
954 static void MC_put_x_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
955 int stride
, int height
)
957 MC_put2_8 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
960 static void MC_avg_y_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
961 int stride
, int height
)
963 MC_avg2_16 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
966 static void MC_avg_y_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
967 int stride
, int height
)
969 MC_avg2_8 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
972 static void MC_put_y_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
973 int stride
, int height
)
975 MC_put2_16 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
978 static void MC_put_y_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
979 int stride
, int height
)
981 MC_put2_8 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
984 static void MC_avg_xy_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
985 int stride
, int height
)
987 MC_avg4_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
990 static void MC_avg_xy_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
991 int stride
, int height
)
993 MC_avg4_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
996 static void MC_put_xy_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
997 int stride
, int height
)
999 MC_put4_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
1002 static void MC_put_xy_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
1003 int stride
, int height
)
1005 MC_put4_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
1009 MPEG2_MC_EXTERN (3dnow
)
1011 #endif /* HAVE_AMD3DNOW */