20% faster hqdn3d on x86_64
[mplayer/glamo.git] / libmpcodecs / native / mmx.h
blob1661b318a45823af4aaf9f3e5fea5d583ae4f021
1 #ifndef MPLAYER_MMX_H
2 #define MPLAYER_MMX_H
4 typedef union {
5 long long q; /* Quadword (64-bit) value */
6 unsigned long long uq; /* Unsigned Quadword */
7 int d[2]; /* 2 Doubleword (32-bit) values */
8 unsigned int ud[2]; /* 2 Unsigned Doubleword */
9 short w[4]; /* 4 Word (16-bit) values */
10 unsigned short uw[4]; /* 4 Unsigned Word */
11 char b[8]; /* 8 Byte (8-bit) values */
12 unsigned char ub[8]; /* 8 Unsigned Byte */
13 float s[2]; /* Single-precision (32-bit) value */
14 } mmx_t; /* On an 8-byte (64-bit) boundary */
17 #define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
18 #define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
19 #define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
21 #define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
22 #define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
24 #define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
25 #define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
27 #define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
28 #define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
29 #define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
30 #define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
32 #define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
33 #define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
34 #define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
36 #define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
37 #define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
38 #define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
40 #define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
42 #define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
44 #define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
45 #define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
47 #define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
49 #define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
50 #define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
52 #define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
54 #define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
56 #define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
59 #define mmx_i2r(op,imm,reg) \
60 __asm__ volatile (#op " %0, %%" #reg \
61 : /* nothing */ \
62 : "i" (imm) )
64 #define mmx_m2r(op, mem, reg) \
65 __asm__ volatile (#op " %0, %%" #reg \
66 : /* nothing */ \
67 : "m" (mem))
69 #define mmx_r2m(op, reg, mem) \
70 __asm__ volatile (#op " %%" #reg ", %0" \
71 : "=m" (mem) \
72 : /* nothing */ )
74 #define mmx_r2r(op, regs, regd) \
75 __asm__ volatile (#op " %" #regs ", %" #regd)
78 #define emms() __asm__ volatile ("emms")
80 #endif /* MPLAYER_MMX_H */