vo_glamo: sub.h was moved to sub directory in c9026cb3210205b07e2e068467a18ee40f9259a3
[mplayer/glamo.git] / libmpcodecs / cmmx.h
blob51ffd235bc1e9a9b7d4a9d4f4ca8f623a22f4ee0
1 /*
2 * x86 MMX and MMX2 packed byte operations in portable C.
3 * Extra instructions: pdiffub, pcmpzb, psumbw, pcmpgtub
4 * Author: Zoltan Hidvegi
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 #ifndef MPLAYER_CMMX_H
24 #define MPLAYER_CMMX_H
26 typedef unsigned long cmmx_t;
28 #define ONE_BYTES (~(cmmx_t)0 / 255)
29 #define SIGN_BITS (ONE_BYTES << 7)
30 #define LOWBW_MASK (~(cmmx_t)0 / 257)
32 static inline cmmx_t
33 paddb(cmmx_t a, cmmx_t b)
35 return ((a & ~SIGN_BITS) + (b & ~SIGN_BITS)) ^ ((a^b) & SIGN_BITS);
38 static inline cmmx_t
39 psubb(cmmx_t a, cmmx_t b)
41 return ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ (~(a^b) & SIGN_BITS);
44 static inline cmmx_t
45 paddusb(cmmx_t a, cmmx_t b)
47 cmmx_t s = (a & ~SIGN_BITS) + (b & ~SIGN_BITS);
48 cmmx_t abs = (a | b) & SIGN_BITS;
49 cmmx_t c = abs & (s | (a & b));
50 return s | abs | (abs - (c >> 7));
53 static inline cmmx_t
54 paddusb_s(cmmx_t a, cmmx_t b)
56 cmmx_t sum = a+b;
57 cmmx_t ov = sum & SIGN_BITS;
58 return sum + (sum ^ (ov - (ov>>7)));
61 static inline cmmx_t
62 psubusb(cmmx_t a, cmmx_t b)
64 cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS);
65 cmmx_t anb = a & ~b;
66 cmmx_t c = (anb | (s & ~(a^b))) & SIGN_BITS;
67 return s & ((c & anb) | (c - (c >> 7)));
70 static inline cmmx_t
71 psubusb_s(cmmx_t a, cmmx_t b)
73 cmmx_t d = (a|SIGN_BITS) - b;
74 cmmx_t m = d & SIGN_BITS;
75 return d & (m - (m>>7));
78 static inline cmmx_t
79 pcmpgtub(cmmx_t b, cmmx_t a)
81 cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS);
82 cmmx_t ret = ((~a & b) | (~s & ~(a ^ b))) & SIGN_BITS;
83 return ret | (ret - (ret >> 7));
86 static inline cmmx_t
87 pdiffub(cmmx_t a, cmmx_t b)
89 cmmx_t xs = (~a ^ b) & SIGN_BITS;
90 cmmx_t s = ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ xs;
91 cmmx_t gt = ((~a & b) | (s & xs)) & SIGN_BITS;
92 cmmx_t gt7 = gt >> 7;
93 return (s ^ gt ^ (gt - gt7)) + gt7;
96 static inline cmmx_t
97 pdiffub_s(cmmx_t a, cmmx_t b)
99 cmmx_t d = (a|SIGN_BITS) - b;
100 cmmx_t g = (~d & SIGN_BITS) >> 7;
101 return (d ^ (SIGN_BITS-g)) + g;
104 static inline cmmx_t
105 pmaxub(cmmx_t a, cmmx_t b)
107 return psubusb(a,b) + b;
110 static inline cmmx_t
111 pminub(cmmx_t a, cmmx_t b)
113 return paddusb(a,~b) - ~b;
116 static inline cmmx_t
117 pminub_s(cmmx_t a, cmmx_t b)
119 cmmx_t d = (a|SIGN_BITS) - b;
120 cmmx_t m = ~SIGN_BITS + ((d&SIGN_BITS)>>7);
121 return ((d&m) + b) & ~SIGN_BITS;
124 static inline cmmx_t
125 pavgb(cmmx_t a, cmmx_t b)
127 cmmx_t ao = a & ONE_BYTES;
128 cmmx_t bo = b & ONE_BYTES;
129 return ((a^ao)>>1) + ((b^bo)>>1) + (ao|bo);
132 static inline cmmx_t
133 pavgb_s(cmmx_t a, cmmx_t b)
135 return ((a+b+ONE_BYTES)>>1) & ~SIGN_BITS;
138 static inline cmmx_t
139 p31avgb(cmmx_t a, cmmx_t b)
141 cmmx_t ao = a & (3*ONE_BYTES);
142 cmmx_t bo = b & (3*ONE_BYTES);
143 return 3*((a^ao)>>2) + ((b^bo)>>2) +
144 (((3*ao+bo+2*ONE_BYTES)>>2) & (3*ONE_BYTES));
147 static inline cmmx_t
148 p31avgb_s(cmmx_t a, cmmx_t b)
150 cmmx_t avg = ((a+b)>>1) & ~SIGN_BITS;
151 return pavgb_s(avg, a);
154 static inline unsigned long
155 psumbw(cmmx_t a)
157 cmmx_t t = (a & LOWBW_MASK) + ((a>>8) & LOWBW_MASK);
158 unsigned long ret =
159 (unsigned long)t + (unsigned long)(t >> (4*sizeof(cmmx_t)));
160 if (sizeof(cmmx_t) > 4)
161 ret += ret >> 16;
162 return ret & 0xffff;
165 static inline unsigned long
166 psumbw_s(cmmx_t a)
168 unsigned long ret =
169 (unsigned long)a + (unsigned long)(a >> (4*sizeof(cmmx_t)));
170 if (sizeof(cmmx_t) <= 4)
171 return (ret & 0xff) + ((ret>>8) & 0xff);
172 ret = (ret & 0xff00ff) + ((ret>>8) & 0xff00ff);
173 ret += ret >> 16;
174 return ret & 0xffff;
177 static inline unsigned long
178 psadbw(cmmx_t a, cmmx_t b)
180 return psumbw(pdiffub(a,b));
183 static inline unsigned long
184 psadbw_s(cmmx_t a, cmmx_t b)
186 return psumbw_s(pdiffub_s(a,b));
189 static inline cmmx_t
190 pcmpzb(cmmx_t a)
192 cmmx_t ret = (((a | SIGN_BITS) - ONE_BYTES) | a) & SIGN_BITS;
193 return ~(ret | (ret - (ret >> 7)));
196 static inline cmmx_t
197 pcmpeqb(cmmx_t a, cmmx_t b)
199 return pcmpzb(a ^ b);
202 #endif /* MPLAYER_CMMX_H */