2 * x86 MMX and MMX2 packed byte operations in portable C.
3 * Extra instructions: pdiffub, pcmpzb, psumbw, pcmpgtub
4 * Author: Zoltan Hidvegi
6 * This file is part of MPlayer.
8 * MPlayer is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * MPlayer is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 #ifndef MPLAYER_CMMX_H
24 #define MPLAYER_CMMX_H
26 typedef unsigned long cmmx_t
;
28 #define ONE_BYTES (~(cmmx_t)0 / 255)
29 #define SIGN_BITS (ONE_BYTES << 7)
30 #define LOWBW_MASK (~(cmmx_t)0 / 257)
33 paddb(cmmx_t a
, cmmx_t b
)
35 return ((a
& ~SIGN_BITS
) + (b
& ~SIGN_BITS
)) ^ ((a
^b
) & SIGN_BITS
);
39 psubb(cmmx_t a
, cmmx_t b
)
41 return ((a
| SIGN_BITS
) - (b
& ~SIGN_BITS
)) ^ (~(a
^b
) & SIGN_BITS
);
45 paddusb(cmmx_t a
, cmmx_t b
)
47 cmmx_t s
= (a
& ~SIGN_BITS
) + (b
& ~SIGN_BITS
);
48 cmmx_t abs
= (a
| b
) & SIGN_BITS
;
49 cmmx_t c
= abs
& (s
| (a
& b
));
50 return s
| abs
| (abs
- (c
>> 7));
54 paddusb_s(cmmx_t a
, cmmx_t b
)
57 cmmx_t ov
= sum
& SIGN_BITS
;
58 return sum
+ (sum
^ (ov
- (ov
>>7)));
62 psubusb(cmmx_t a
, cmmx_t b
)
64 cmmx_t s
= (a
| SIGN_BITS
) - (b
& ~SIGN_BITS
);
66 cmmx_t c
= (anb
| (s
& ~(a
^b
))) & SIGN_BITS
;
67 return s
& ((c
& anb
) | (c
- (c
>> 7)));
71 psubusb_s(cmmx_t a
, cmmx_t b
)
73 cmmx_t d
= (a
|SIGN_BITS
) - b
;
74 cmmx_t m
= d
& SIGN_BITS
;
75 return d
& (m
- (m
>>7));
79 pcmpgtub(cmmx_t b
, cmmx_t a
)
81 cmmx_t s
= (a
| SIGN_BITS
) - (b
& ~SIGN_BITS
);
82 cmmx_t ret
= ((~a
& b
) | (~s
& ~(a
^ b
))) & SIGN_BITS
;
83 return ret
| (ret
- (ret
>> 7));
87 pdiffub(cmmx_t a
, cmmx_t b
)
89 cmmx_t xs
= (~a
^ b
) & SIGN_BITS
;
90 cmmx_t s
= ((a
| SIGN_BITS
) - (b
& ~SIGN_BITS
)) ^ xs
;
91 cmmx_t gt
= ((~a
& b
) | (s
& xs
)) & SIGN_BITS
;
93 return (s
^ gt
^ (gt
- gt7
)) + gt7
;
97 pdiffub_s(cmmx_t a
, cmmx_t b
)
99 cmmx_t d
= (a
|SIGN_BITS
) - b
;
100 cmmx_t g
= (~d
& SIGN_BITS
) >> 7;
101 return (d
^ (SIGN_BITS
-g
)) + g
;
105 pmaxub(cmmx_t a
, cmmx_t b
)
107 return psubusb(a
,b
) + b
;
111 pminub(cmmx_t a
, cmmx_t b
)
113 return paddusb(a
,~b
) - ~b
;
117 pminub_s(cmmx_t a
, cmmx_t b
)
119 cmmx_t d
= (a
|SIGN_BITS
) - b
;
120 cmmx_t m
= ~SIGN_BITS
+ ((d
&SIGN_BITS
)>>7);
121 return ((d
&m
) + b
) & ~SIGN_BITS
;
125 pavgb(cmmx_t a
, cmmx_t b
)
127 cmmx_t ao
= a
& ONE_BYTES
;
128 cmmx_t bo
= b
& ONE_BYTES
;
129 return ((a
^ao
)>>1) + ((b
^bo
)>>1) + (ao
|bo
);
133 pavgb_s(cmmx_t a
, cmmx_t b
)
135 return ((a
+b
+ONE_BYTES
)>>1) & ~SIGN_BITS
;
139 p31avgb(cmmx_t a
, cmmx_t b
)
141 cmmx_t ao
= a
& (3*ONE_BYTES
);
142 cmmx_t bo
= b
& (3*ONE_BYTES
);
143 return 3*((a
^ao
)>>2) + ((b
^bo
)>>2) +
144 (((3*ao
+bo
+2*ONE_BYTES
)>>2) & (3*ONE_BYTES
));
148 p31avgb_s(cmmx_t a
, cmmx_t b
)
150 cmmx_t avg
= ((a
+b
)>>1) & ~SIGN_BITS
;
151 return pavgb_s(avg
, a
);
154 static inline unsigned long
157 cmmx_t t
= (a
& LOWBW_MASK
) + ((a
>>8) & LOWBW_MASK
);
159 (unsigned long)t
+ (unsigned long)(t
>> (4*sizeof(cmmx_t
)));
160 if (sizeof(cmmx_t
) > 4)
165 static inline unsigned long
169 (unsigned long)a
+ (unsigned long)(a
>> (4*sizeof(cmmx_t
)));
170 if (sizeof(cmmx_t
) <= 4)
171 return (ret
& 0xff) + ((ret
>>8) & 0xff);
172 ret
= (ret
& 0xff00ff) + ((ret
>>8) & 0xff00ff);
177 static inline unsigned long
178 psadbw(cmmx_t a
, cmmx_t b
)
180 return psumbw(pdiffub(a
,b
));
183 static inline unsigned long
184 psadbw_s(cmmx_t a
, cmmx_t b
)
186 return psumbw_s(pdiffub_s(a
,b
));
192 cmmx_t ret
= (((a
| SIGN_BITS
) - ONE_BYTES
) | a
) & SIGN_BITS
;
193 return ~(ret
| (ret
- (ret
>> 7)));
197 pcmpeqb(cmmx_t a
, cmmx_t b
)
199 return pcmpzb(a
^ b
);
202 #endif /* MPLAYER_CMMX_H */