3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #if ARCH_X86 || ARCH_X86_64
31 #include "attributes.h"
32 #include "mpeg2_internal.h"
39 /* MMX code - needs a rewrite */
42 * Motion Compensation frequently needs to average values using the
43 * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
44 * to compute this, but it's been left out of classic MMX.
46 * We need to be careful of overflows when doing this computation.
47 * Rather than unpacking data to 16-bits, which reduces parallelism,
48 * we use the following formulas:
50 * (x+y)>>1 == (x&y)+((x^y)>>1)
51 * (x+y+1)>>1 == (x|y)-((x^y)>>1)
54 /* some rounding constants */
55 static mmx_t mask1
= {0xfefefefefefefefeLL
};
56 static mmx_t round4
= {0x0002000200020002LL
};
59 * This code should probably be compiled with loop unrolling
60 * (ie, -funroll-loops in gcc)becuase some of the loops
61 * use a small static number of iterations. This was written
62 * with the assumption the compiler knows best about when
66 static inline void mmx_zero_reg (void)
72 static inline void mmx_average_2_U8 (uint8_t * dest
, const uint8_t * src1
,
75 /* *dest = (*src1 + *src2 + 1)/ 2; */
77 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
78 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
80 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
81 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
83 pxor_r2r (mm1
, mm3
); /* xor src1 and src2 */
84 pand_m2r (mask1
, mm3
); /* mask lower bits */
85 psrlq_i2r (1, mm3
); /* /2 */
86 por_r2r (mm2
, mm4
); /* or src1 and src2 */
87 psubb_r2r (mm3
, mm4
); /* subtract subresults */
88 movq_r2m (mm4
, *dest
); /* store result in dest */
91 static inline void mmx_interp_average_2_U8 (uint8_t * dest
,
95 /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
97 movq_m2r (*dest
, mm1
); /* load 8 dest bytes */
98 movq_r2r (mm1
, mm2
); /* copy 8 dest bytes */
100 movq_m2r (*src1
, mm3
); /* load 8 src1 bytes */
101 movq_r2r (mm3
, mm4
); /* copy 8 src1 bytes */
103 movq_m2r (*src2
, mm5
); /* load 8 src2 bytes */
104 movq_r2r (mm5
, mm6
); /* copy 8 src2 bytes */
106 pxor_r2r (mm3
, mm5
); /* xor src1 and src2 */
107 pand_m2r (mask1
, mm5
); /* mask lower bits */
108 psrlq_i2r (1, mm5
); /* /2 */
109 por_r2r (mm4
, mm6
); /* or src1 and src2 */
110 psubb_r2r (mm5
, mm6
); /* subtract subresults */
111 movq_r2r (mm6
, mm5
); /* copy subresult */
113 pxor_r2r (mm1
, mm5
); /* xor srcavg and dest */
114 pand_m2r (mask1
, mm5
); /* mask lower bits */
115 psrlq_i2r (1, mm5
); /* /2 */
116 por_r2r (mm2
, mm6
); /* or srcavg and dest */
117 psubb_r2r (mm5
, mm6
); /* subtract subresults */
118 movq_r2m (mm6
, *dest
); /* store result in dest */
121 static inline void mmx_average_4_U8 (uint8_t * dest
, const uint8_t * src1
,
122 const uint8_t * src2
,
123 const uint8_t * src3
,
124 const uint8_t * src4
)
126 /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
128 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
129 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
131 punpcklbw_r2r (mm0
, mm1
); /* unpack low src1 bytes */
132 punpckhbw_r2r (mm0
, mm2
); /* unpack high src1 bytes */
134 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
135 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
137 punpcklbw_r2r (mm0
, mm3
); /* unpack low src2 bytes */
138 punpckhbw_r2r (mm0
, mm4
); /* unpack high src2 bytes */
140 paddw_r2r (mm3
, mm1
); /* add lows */
141 paddw_r2r (mm4
, mm2
); /* add highs */
143 /* now have partials in mm1 and mm2 */
145 movq_m2r (*src3
, mm3
); /* load 8 src3 bytes */
146 movq_r2r (mm3
, mm4
); /* copy 8 src3 bytes */
148 punpcklbw_r2r (mm0
, mm3
); /* unpack low src3 bytes */
149 punpckhbw_r2r (mm0
, mm4
); /* unpack high src3 bytes */
151 paddw_r2r (mm3
, mm1
); /* add lows */
152 paddw_r2r (mm4
, mm2
); /* add highs */
154 movq_m2r (*src4
, mm5
); /* load 8 src4 bytes */
155 movq_r2r (mm5
, mm6
); /* copy 8 src4 bytes */
157 punpcklbw_r2r (mm0
, mm5
); /* unpack low src4 bytes */
158 punpckhbw_r2r (mm0
, mm6
); /* unpack high src4 bytes */
160 paddw_r2r (mm5
, mm1
); /* add lows */
161 paddw_r2r (mm6
, mm2
); /* add highs */
163 /* now have subtotal in mm1 and mm2 */
165 paddw_m2r (round4
, mm1
);
166 psraw_i2r (2, mm1
); /* /4 */
167 paddw_m2r (round4
, mm2
);
168 psraw_i2r (2, mm2
); /* /4 */
170 packuswb_r2r (mm2
, mm1
); /* pack (w/ saturation) */
171 movq_r2m (mm1
, *dest
); /* store result in dest */
174 static inline void mmx_interp_average_4_U8 (uint8_t * dest
,
175 const uint8_t * src1
,
176 const uint8_t * src2
,
177 const uint8_t * src3
,
178 const uint8_t * src4
)
180 /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
182 movq_m2r (*src1
, mm1
); /* load 8 src1 bytes */
183 movq_r2r (mm1
, mm2
); /* copy 8 src1 bytes */
185 punpcklbw_r2r (mm0
, mm1
); /* unpack low src1 bytes */
186 punpckhbw_r2r (mm0
, mm2
); /* unpack high src1 bytes */
188 movq_m2r (*src2
, mm3
); /* load 8 src2 bytes */
189 movq_r2r (mm3
, mm4
); /* copy 8 src2 bytes */
191 punpcklbw_r2r (mm0
, mm3
); /* unpack low src2 bytes */
192 punpckhbw_r2r (mm0
, mm4
); /* unpack high src2 bytes */
194 paddw_r2r (mm3
, mm1
); /* add lows */
195 paddw_r2r (mm4
, mm2
); /* add highs */
197 /* now have partials in mm1 and mm2 */
199 movq_m2r (*src3
, mm3
); /* load 8 src3 bytes */
200 movq_r2r (mm3
, mm4
); /* copy 8 src3 bytes */
202 punpcklbw_r2r (mm0
, mm3
); /* unpack low src3 bytes */
203 punpckhbw_r2r (mm0
, mm4
); /* unpack high src3 bytes */
205 paddw_r2r (mm3
, mm1
); /* add lows */
206 paddw_r2r (mm4
, mm2
); /* add highs */
208 movq_m2r (*src4
, mm5
); /* load 8 src4 bytes */
209 movq_r2r (mm5
, mm6
); /* copy 8 src4 bytes */
211 punpcklbw_r2r (mm0
, mm5
); /* unpack low src4 bytes */
212 punpckhbw_r2r (mm0
, mm6
); /* unpack high src4 bytes */
214 paddw_r2r (mm5
, mm1
); /* add lows */
215 paddw_r2r (mm6
, mm2
); /* add highs */
217 paddw_m2r (round4
, mm1
);
218 psraw_i2r (2, mm1
); /* /4 */
219 paddw_m2r (round4
, mm2
);
220 psraw_i2r (2, mm2
); /* /4 */
222 /* now have subtotal/4 in mm1 and mm2 */
224 movq_m2r (*dest
, mm3
); /* load 8 dest bytes */
225 movq_r2r (mm3
, mm4
); /* copy 8 dest bytes */
227 packuswb_r2r (mm2
, mm1
); /* pack (w/ saturation) */
228 movq_r2r (mm1
,mm2
); /* copy subresult */
230 pxor_r2r (mm1
, mm3
); /* xor srcavg and dest */
231 pand_m2r (mask1
, mm3
); /* mask lower bits */
232 psrlq_i2r (1, mm3
); /* /2 */
233 por_r2r (mm2
, mm4
); /* or srcavg and dest */
234 psubb_r2r (mm3
, mm4
); /* subtract subresults */
235 movq_r2m (mm4
, *dest
); /* store result in dest */
238 /*-----------------------------------------------------------------------*/
240 static inline void MC_avg_mmx (const int width
, int height
, uint8_t * dest
,
241 const uint8_t * ref
, const int stride
)
246 mmx_average_2_U8 (dest
, dest
, ref
);
249 mmx_average_2_U8 (dest
+8, dest
+8, ref
+8);
256 static void MC_avg_o_16_mmx (uint8_t * dest
, const uint8_t * ref
,
257 int stride
, int height
)
259 MC_avg_mmx (16, height
, dest
, ref
, stride
);
262 static void MC_avg_o_8_mmx (uint8_t * dest
, const uint8_t * ref
,
263 int stride
, int height
)
265 MC_avg_mmx (8, height
, dest
, ref
, stride
);
268 /*-----------------------------------------------------------------------*/
270 static inline void MC_put_mmx (const int width
, int height
, uint8_t * dest
,
271 const uint8_t * ref
, const int stride
)
276 movq_m2r (* ref
, mm1
); /* load 8 ref bytes */
277 movq_r2m (mm1
,* dest
); /* store 8 bytes at curr */
281 movq_m2r (* (ref
+8), mm1
); /* load 8 ref bytes */
282 movq_r2m (mm1
,* (dest
+8)); /* store 8 bytes at curr */
290 static void MC_put_o_16_mmx (uint8_t * dest
, const uint8_t * ref
,
291 int stride
, int height
)
293 MC_put_mmx (16, height
, dest
, ref
, stride
);
296 static void MC_put_o_8_mmx (uint8_t * dest
, const uint8_t * ref
,
297 int stride
, int height
)
299 MC_put_mmx (8, height
, dest
, ref
, stride
);
302 /*-----------------------------------------------------------------------*/
304 /* Half pixel interpolation in the x direction */
305 static inline void MC_avg_x_mmx (const int width
, int height
, uint8_t * dest
,
306 const uint8_t * ref
, const int stride
)
311 mmx_interp_average_2_U8 (dest
, ref
, ref
+1);
314 mmx_interp_average_2_U8 (dest
+8, ref
+8, ref
+9);
321 static void MC_avg_x_16_mmx (uint8_t * dest
, const uint8_t * ref
,
322 int stride
, int height
)
324 MC_avg_x_mmx (16, height
, dest
, ref
, stride
);
327 static void MC_avg_x_8_mmx (uint8_t * dest
, const uint8_t * ref
,
328 int stride
, int height
)
330 MC_avg_x_mmx (8, height
, dest
, ref
, stride
);
333 /*-----------------------------------------------------------------------*/
335 static inline void MC_put_x_mmx (const int width
, int height
, uint8_t * dest
,
336 const uint8_t * ref
, const int stride
)
341 mmx_average_2_U8 (dest
, ref
, ref
+1);
344 mmx_average_2_U8 (dest
+8, ref
+8, ref
+9);
351 static void MC_put_x_16_mmx (uint8_t * dest
, const uint8_t * ref
,
352 int stride
, int height
)
354 MC_put_x_mmx (16, height
, dest
, ref
, stride
);
357 static void MC_put_x_8_mmx (uint8_t * dest
, const uint8_t * ref
,
358 int stride
, int height
)
360 MC_put_x_mmx (8, height
, dest
, ref
, stride
);
363 /*-----------------------------------------------------------------------*/
365 static inline void MC_avg_xy_mmx (const int width
, int height
, uint8_t * dest
,
366 const uint8_t * ref
, const int stride
)
368 const uint8_t * ref_next
= ref
+ stride
;
373 mmx_interp_average_4_U8 (dest
, ref
, ref
+1, ref_next
, ref_next
+1);
376 mmx_interp_average_4_U8 (dest
+8, ref
+8, ref
+9,
377 ref_next
+8, ref_next
+9);
385 static void MC_avg_xy_16_mmx (uint8_t * dest
, const uint8_t * ref
,
386 int stride
, int height
)
388 MC_avg_xy_mmx (16, height
, dest
, ref
, stride
);
391 static void MC_avg_xy_8_mmx (uint8_t * dest
, const uint8_t * ref
,
392 int stride
, int height
)
394 MC_avg_xy_mmx (8, height
, dest
, ref
, stride
);
397 /*-----------------------------------------------------------------------*/
399 static inline void MC_put_xy_mmx (const int width
, int height
, uint8_t * dest
,
400 const uint8_t * ref
, const int stride
)
402 const uint8_t * ref_next
= ref
+ stride
;
407 mmx_average_4_U8 (dest
, ref
, ref
+1, ref_next
, ref_next
+1);
410 mmx_average_4_U8 (dest
+8, ref
+8, ref
+9, ref_next
+8, ref_next
+9);
418 static void MC_put_xy_16_mmx (uint8_t * dest
, const uint8_t * ref
,
419 int stride
, int height
)
421 MC_put_xy_mmx (16, height
, dest
, ref
, stride
);
424 static void MC_put_xy_8_mmx (uint8_t * dest
, const uint8_t * ref
,
425 int stride
, int height
)
427 MC_put_xy_mmx (8, height
, dest
, ref
, stride
);
430 /*-----------------------------------------------------------------------*/
432 static inline void MC_avg_y_mmx (const int width
, int height
, uint8_t * dest
,
433 const uint8_t * ref
, const int stride
)
435 const uint8_t * ref_next
= ref
+ stride
;
440 mmx_interp_average_2_U8 (dest
, ref
, ref_next
);
443 mmx_interp_average_2_U8 (dest
+8, ref
+8, ref_next
+8);
451 static void MC_avg_y_16_mmx (uint8_t * dest
, const uint8_t * ref
,
452 int stride
, int height
)
454 MC_avg_y_mmx (16, height
, dest
, ref
, stride
);
457 static void MC_avg_y_8_mmx (uint8_t * dest
, const uint8_t * ref
,
458 int stride
, int height
)
460 MC_avg_y_mmx (8, height
, dest
, ref
, stride
);
463 /*-----------------------------------------------------------------------*/
465 static inline void MC_put_y_mmx (const int width
, int height
, uint8_t * dest
,
466 const uint8_t * ref
, const int stride
)
468 const uint8_t * ref_next
= ref
+ stride
;
473 mmx_average_2_U8 (dest
, ref
, ref_next
);
476 mmx_average_2_U8 (dest
+8, ref
+8, ref_next
+8);
484 static void MC_put_y_16_mmx (uint8_t * dest
, const uint8_t * ref
,
485 int stride
, int height
)
487 MC_put_y_mmx (16, height
, dest
, ref
, stride
);
490 static void MC_put_y_8_mmx (uint8_t * dest
, const uint8_t * ref
,
491 int stride
, int height
)
493 MC_put_y_mmx (8, height
, dest
, ref
, stride
);
497 MPEG2_MC_EXTERN (mmx
)
505 /* CPU_MMXEXT/CPU_3DNOW adaptation layer */
507 #define pavg_r2r(src,dest) \
509 if (cpu == CPU_MMXEXT) \
510 pavgb_r2r (src, dest); \
512 pavgusb_r2r (src, dest); \
515 #define pavg_m2r(src,dest) \
517 if (cpu == CPU_MMXEXT) \
518 pavgb_m2r (src, dest); \
520 pavgusb_m2r (src, dest); \
524 /* CPU_MMXEXT code */
527 static inline void MC_put1_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
531 movq_m2r (*ref
, mm0
);
532 movq_r2m (mm0
, *dest
);
538 static inline void MC_put1_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
542 movq_m2r (*ref
, mm0
);
543 movq_m2r (*(ref
+8), mm1
);
545 movq_r2m (mm0
, *dest
);
546 movq_r2m (mm1
, *(dest
+8));
551 static inline void MC_avg1_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
552 const int stride
, const int cpu
)
555 movq_m2r (*ref
, mm0
);
556 pavg_m2r (*dest
, mm0
);
558 movq_r2m (mm0
, *dest
);
563 static inline void MC_avg1_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
564 const int stride
, const int cpu
)
567 movq_m2r (*ref
, mm0
);
568 movq_m2r (*(ref
+8), mm1
);
569 pavg_m2r (*dest
, mm0
);
570 pavg_m2r (*(dest
+8), mm1
);
571 movq_r2m (mm0
, *dest
);
573 movq_r2m (mm1
, *(dest
+8));
578 static inline void MC_put2_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
579 const int stride
, const int offset
,
583 movq_m2r (*ref
, mm0
);
584 pavg_m2r (*(ref
+offset
), mm0
);
586 movq_r2m (mm0
, *dest
);
591 static inline void MC_put2_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
592 const int stride
, const int offset
,
596 movq_m2r (*ref
, mm0
);
597 movq_m2r (*(ref
+8), mm1
);
598 pavg_m2r (*(ref
+offset
), mm0
);
599 pavg_m2r (*(ref
+offset
+8), mm1
);
600 movq_r2m (mm0
, *dest
);
602 movq_r2m (mm1
, *(dest
+8));
607 static inline void MC_avg2_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
608 const int stride
, const int offset
,
612 movq_m2r (*ref
, mm0
);
613 pavg_m2r (*(ref
+offset
), mm0
);
614 pavg_m2r (*dest
, mm0
);
616 movq_r2m (mm0
, *dest
);
621 static inline void MC_avg2_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
622 const int stride
, const int offset
,
626 movq_m2r (*ref
, mm0
);
627 movq_m2r (*(ref
+8), mm1
);
628 pavg_m2r (*(ref
+offset
), mm0
);
629 pavg_m2r (*(ref
+offset
+8), mm1
);
630 pavg_m2r (*dest
, mm0
);
631 pavg_m2r (*(dest
+8), mm1
);
633 movq_r2m (mm0
, *dest
);
634 movq_r2m (mm1
, *(dest
+8));
639 static mmx_t mask_one
= {0x0101010101010101LL
};
641 static inline void MC_put4_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
642 const int stride
, const int cpu
)
644 movq_m2r (*ref
, mm0
);
645 movq_m2r (*(ref
+1), mm1
);
652 movq_m2r (*ref
, mm2
);
655 movq_m2r (*(ref
+1), mm3
);
667 pand_m2r (mask_one
, mm7
);
669 psubusb_r2r (mm7
, mm0
);
672 movq_r2m (mm0
, *dest
);
675 movq_r2r (mm6
, mm7
); /* unroll ! */
676 movq_r2r (mm2
, mm0
); /* unroll ! */
680 static inline void MC_put4_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
681 const int stride
, const int cpu
)
684 movq_m2r (*ref
, mm0
);
685 movq_m2r (*(ref
+stride
+1), mm1
);
687 movq_m2r (*(ref
+1), mm2
);
689 movq_m2r (*(ref
+stride
), mm3
);
698 pand_m2r (mask_one
, mm7
);
700 psubusb_r2r (mm7
, mm0
);
701 movq_r2m (mm0
, *dest
);
703 movq_m2r (*(ref
+8), mm0
);
704 movq_m2r (*(ref
+stride
+9), mm1
);
706 movq_m2r (*(ref
+9), mm2
);
708 movq_m2r (*(ref
+stride
+8), mm3
);
717 pand_m2r (mask_one
, mm7
);
719 psubusb_r2r (mm7
, mm0
);
721 movq_r2m (mm0
, *(dest
+8));
726 static inline void MC_avg4_8 (int height
, uint8_t * dest
, const uint8_t * ref
,
727 const int stride
, const int cpu
)
730 movq_m2r (*ref
, mm0
);
731 movq_m2r (*(ref
+stride
+1), mm1
);
733 movq_m2r (*(ref
+1), mm2
);
735 movq_m2r (*(ref
+stride
), mm3
);
744 pand_m2r (mask_one
, mm7
);
746 psubusb_r2r (mm7
, mm0
);
747 movq_m2r (*dest
, mm1
);
750 movq_r2m (mm0
, *dest
);
755 static inline void MC_avg4_16 (int height
, uint8_t * dest
, const uint8_t * ref
,
756 const int stride
, const int cpu
)
759 movq_m2r (*ref
, mm0
);
760 movq_m2r (*(ref
+stride
+1), mm1
);
762 movq_m2r (*(ref
+1), mm2
);
764 movq_m2r (*(ref
+stride
), mm3
);
773 pand_m2r (mask_one
, mm7
);
775 psubusb_r2r (mm7
, mm0
);
776 movq_m2r (*dest
, mm1
);
778 movq_r2m (mm0
, *dest
);
780 movq_m2r (*(ref
+8), mm0
);
781 movq_m2r (*(ref
+stride
+9), mm1
);
783 movq_m2r (*(ref
+9), mm2
);
785 movq_m2r (*(ref
+stride
+8), mm3
);
794 pand_m2r (mask_one
, mm7
);
796 psubusb_r2r (mm7
, mm0
);
797 movq_m2r (*(dest
+8), mm1
);
800 movq_r2m (mm0
, *(dest
+8));
805 static void MC_avg_o_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
806 int stride
, int height
)
808 MC_avg1_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
811 static void MC_avg_o_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
812 int stride
, int height
)
814 MC_avg1_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
817 static void MC_put_o_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
818 int stride
, int height
)
820 MC_put1_16 (height
, dest
, ref
, stride
);
823 static void MC_put_o_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
824 int stride
, int height
)
826 MC_put1_8 (height
, dest
, ref
, stride
);
829 static void MC_avg_x_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
830 int stride
, int height
)
832 MC_avg2_16 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
835 static void MC_avg_x_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
836 int stride
, int height
)
838 MC_avg2_8 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
841 static void MC_put_x_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
842 int stride
, int height
)
844 MC_put2_16 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
847 static void MC_put_x_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
848 int stride
, int height
)
850 MC_put2_8 (height
, dest
, ref
, stride
, 1, CPU_MMXEXT
);
853 static void MC_avg_y_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
854 int stride
, int height
)
856 MC_avg2_16 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
859 static void MC_avg_y_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
860 int stride
, int height
)
862 MC_avg2_8 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
865 static void MC_put_y_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
866 int stride
, int height
)
868 MC_put2_16 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
871 static void MC_put_y_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
872 int stride
, int height
)
874 MC_put2_8 (height
, dest
, ref
, stride
, stride
, CPU_MMXEXT
);
877 static void MC_avg_xy_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
878 int stride
, int height
)
880 MC_avg4_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
883 static void MC_avg_xy_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
884 int stride
, int height
)
886 MC_avg4_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
889 static void MC_put_xy_16_mmxext (uint8_t * dest
, const uint8_t * ref
,
890 int stride
, int height
)
892 MC_put4_16 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
895 static void MC_put_xy_8_mmxext (uint8_t * dest
, const uint8_t * ref
,
896 int stride
, int height
)
898 MC_put4_8 (height
, dest
, ref
, stride
, CPU_MMXEXT
);
902 MPEG2_MC_EXTERN (mmxext
)
906 static void MC_avg_o_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
907 int stride
, int height
)
909 MC_avg1_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
912 static void MC_avg_o_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
913 int stride
, int height
)
915 MC_avg1_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
918 static void MC_put_o_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
919 int stride
, int height
)
921 MC_put1_16 (height
, dest
, ref
, stride
);
924 static void MC_put_o_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
925 int stride
, int height
)
927 MC_put1_8 (height
, dest
, ref
, stride
);
930 static void MC_avg_x_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
931 int stride
, int height
)
933 MC_avg2_16 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
936 static void MC_avg_x_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
937 int stride
, int height
)
939 MC_avg2_8 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
942 static void MC_put_x_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
943 int stride
, int height
)
945 MC_put2_16 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
948 static void MC_put_x_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
949 int stride
, int height
)
951 MC_put2_8 (height
, dest
, ref
, stride
, 1, CPU_3DNOW
);
954 static void MC_avg_y_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
955 int stride
, int height
)
957 MC_avg2_16 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
960 static void MC_avg_y_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
961 int stride
, int height
)
963 MC_avg2_8 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
966 static void MC_put_y_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
967 int stride
, int height
)
969 MC_put2_16 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
972 static void MC_put_y_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
973 int stride
, int height
)
975 MC_put2_8 (height
, dest
, ref
, stride
, stride
, CPU_3DNOW
);
978 static void MC_avg_xy_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
979 int stride
, int height
)
981 MC_avg4_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
984 static void MC_avg_xy_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
985 int stride
, int height
)
987 MC_avg4_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
990 static void MC_put_xy_16_3dnow (uint8_t * dest
, const uint8_t * ref
,
991 int stride
, int height
)
993 MC_put4_16 (height
, dest
, ref
, stride
, CPU_3DNOW
);
996 static void MC_put_xy_8_3dnow (uint8_t * dest
, const uint8_t * ref
,
997 int stride
, int height
)
999 MC_put4_8 (height
, dest
, ref
, stride
, CPU_3DNOW
);
1003 MPEG2_MC_EXTERN (3dnow
)