2 * Loongson Multimedia Instruction emulation helpers for QEMU.
4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/helper-proto.h"
25 * If the byte ordering doesn't matter, i.e. all columns are treated
26 * identically, then this union can be used directly. If byte ordering
27 * does matter, we generally ignore dumping to memory.
39 /* Some byte ordering issues can be mitigated by XORing in the following. */
40 #ifdef HOST_WORDS_BIGENDIAN
41 # define BYTE_ORDER_XOR(N) N
43 # define BYTE_ORDER_XOR(N) 0
46 #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
47 #define SATUB(x) (x > 0xff ? 0xff : x)
49 #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
50 #define SATUH(x) (x > 0xffff ? 0xffff : x)
53 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
54 #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
56 uint64_t helper_paddsb(uint64_t fs
, uint64_t ft
)
63 for (i
= 0; i
< 8; ++i
) {
64 int r
= vs
.sb
[i
] + vt
.sb
[i
];
70 uint64_t helper_paddusb(uint64_t fs
, uint64_t ft
)
77 for (i
= 0; i
< 8; ++i
) {
78 int r
= vs
.ub
[i
] + vt
.ub
[i
];
84 uint64_t helper_paddsh(uint64_t fs
, uint64_t ft
)
91 for (i
= 0; i
< 4; ++i
) {
92 int r
= vs
.sh
[i
] + vt
.sh
[i
];
98 uint64_t helper_paddush(uint64_t fs
, uint64_t ft
)
105 for (i
= 0; i
< 4; ++i
) {
106 int r
= vs
.uh
[i
] + vt
.uh
[i
];
112 uint64_t helper_paddb(uint64_t fs
, uint64_t ft
)
119 for (i
= 0; i
< 8; ++i
) {
120 vs
.ub
[i
] += vt
.ub
[i
];
125 uint64_t helper_paddh(uint64_t fs
, uint64_t ft
)
132 for (i
= 0; i
< 4; ++i
) {
133 vs
.uh
[i
] += vt
.uh
[i
];
138 uint64_t helper_paddw(uint64_t fs
, uint64_t ft
)
145 for (i
= 0; i
< 2; ++i
) {
146 vs
.uw
[i
] += vt
.uw
[i
];
151 uint64_t helper_psubsb(uint64_t fs
, uint64_t ft
)
158 for (i
= 0; i
< 8; ++i
) {
159 int r
= vs
.sb
[i
] - vt
.sb
[i
];
165 uint64_t helper_psubusb(uint64_t fs
, uint64_t ft
)
172 for (i
= 0; i
< 8; ++i
) {
173 int r
= vs
.ub
[i
] - vt
.ub
[i
];
179 uint64_t helper_psubsh(uint64_t fs
, uint64_t ft
)
186 for (i
= 0; i
< 4; ++i
) {
187 int r
= vs
.sh
[i
] - vt
.sh
[i
];
193 uint64_t helper_psubush(uint64_t fs
, uint64_t ft
)
200 for (i
= 0; i
< 4; ++i
) {
201 int r
= vs
.uh
[i
] - vt
.uh
[i
];
207 uint64_t helper_psubb(uint64_t fs
, uint64_t ft
)
214 for (i
= 0; i
< 8; ++i
) {
215 vs
.ub
[i
] -= vt
.ub
[i
];
220 uint64_t helper_psubh(uint64_t fs
, uint64_t ft
)
227 for (i
= 0; i
< 4; ++i
) {
228 vs
.uh
[i
] -= vt
.uh
[i
];
233 uint64_t helper_psubw(uint64_t fs
, uint64_t ft
)
240 for (i
= 0; i
< 2; ++i
) {
241 vs
.uw
[i
] -= vt
.uw
[i
];
246 uint64_t helper_pshufh(uint64_t fs
, uint64_t ft
)
248 unsigned host
= BYTE_ORDER_XOR(3);
254 for (i
= 0; i
< 4; i
++, ft
>>= 2) {
255 vd
.uh
[i
^ host
] = vs
.uh
[(ft
& 3) ^ host
];
260 uint64_t helper_packsswh(uint64_t fs
, uint64_t ft
)
265 tmp
= (int32_t)(fs
>> 0);
267 fd
|= (tmp
& 0xffff) << 0;
269 tmp
= (int32_t)(fs
>> 32);
271 fd
|= (tmp
& 0xffff) << 16;
273 tmp
= (int32_t)(ft
>> 0);
275 fd
|= (tmp
& 0xffff) << 32;
277 tmp
= (int32_t)(ft
>> 32);
279 fd
|= (tmp
& 0xffff) << 48;
284 uint64_t helper_packsshb(uint64_t fs
, uint64_t ft
)
289 for (i
= 0; i
< 4; ++i
) {
290 int16_t tmp
= fs
>> (i
* 16);
292 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8);
294 for (i
= 0; i
< 4; ++i
) {
295 int16_t tmp
= ft
>> (i
* 16);
297 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8 + 32);
303 uint64_t helper_packushb(uint64_t fs
, uint64_t ft
)
308 for (i
= 0; i
< 4; ++i
) {
309 int16_t tmp
= fs
>> (i
* 16);
311 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8);
313 for (i
= 0; i
< 4; ++i
) {
314 int16_t tmp
= ft
>> (i
* 16);
316 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8 + 32);
322 uint64_t helper_punpcklwd(uint64_t fs
, uint64_t ft
)
324 return (fs
& 0xffffffff) | (ft
<< 32);
327 uint64_t helper_punpckhwd(uint64_t fs
, uint64_t ft
)
329 return (fs
>> 32) | (ft
& ~0xffffffffull
);
332 uint64_t helper_punpcklhw(uint64_t fs
, uint64_t ft
)
334 unsigned host
= BYTE_ORDER_XOR(3);
339 vd
.uh
[0 ^ host
] = vs
.uh
[0 ^ host
];
340 vd
.uh
[1 ^ host
] = vt
.uh
[0 ^ host
];
341 vd
.uh
[2 ^ host
] = vs
.uh
[1 ^ host
];
342 vd
.uh
[3 ^ host
] = vt
.uh
[1 ^ host
];
347 uint64_t helper_punpckhhw(uint64_t fs
, uint64_t ft
)
349 unsigned host
= BYTE_ORDER_XOR(3);
354 vd
.uh
[0 ^ host
] = vs
.uh
[2 ^ host
];
355 vd
.uh
[1 ^ host
] = vt
.uh
[2 ^ host
];
356 vd
.uh
[2 ^ host
] = vs
.uh
[3 ^ host
];
357 vd
.uh
[3 ^ host
] = vt
.uh
[3 ^ host
];
362 uint64_t helper_punpcklbh(uint64_t fs
, uint64_t ft
)
364 unsigned host
= BYTE_ORDER_XOR(7);
369 vd
.ub
[0 ^ host
] = vs
.ub
[0 ^ host
];
370 vd
.ub
[1 ^ host
] = vt
.ub
[0 ^ host
];
371 vd
.ub
[2 ^ host
] = vs
.ub
[1 ^ host
];
372 vd
.ub
[3 ^ host
] = vt
.ub
[1 ^ host
];
373 vd
.ub
[4 ^ host
] = vs
.ub
[2 ^ host
];
374 vd
.ub
[5 ^ host
] = vt
.ub
[2 ^ host
];
375 vd
.ub
[6 ^ host
] = vs
.ub
[3 ^ host
];
376 vd
.ub
[7 ^ host
] = vt
.ub
[3 ^ host
];
381 uint64_t helper_punpckhbh(uint64_t fs
, uint64_t ft
)
383 unsigned host
= BYTE_ORDER_XOR(7);
388 vd
.ub
[0 ^ host
] = vs
.ub
[4 ^ host
];
389 vd
.ub
[1 ^ host
] = vt
.ub
[4 ^ host
];
390 vd
.ub
[2 ^ host
] = vs
.ub
[5 ^ host
];
391 vd
.ub
[3 ^ host
] = vt
.ub
[5 ^ host
];
392 vd
.ub
[4 ^ host
] = vs
.ub
[6 ^ host
];
393 vd
.ub
[5 ^ host
] = vt
.ub
[6 ^ host
];
394 vd
.ub
[6 ^ host
] = vs
.ub
[7 ^ host
];
395 vd
.ub
[7 ^ host
] = vt
.ub
[7 ^ host
];
400 uint64_t helper_pavgh(uint64_t fs
, uint64_t ft
)
407 for (i
= 0; i
< 4; i
++) {
408 vs
.uh
[i
] = (vs
.uh
[i
] + vt
.uh
[i
] + 1) >> 1;
413 uint64_t helper_pavgb(uint64_t fs
, uint64_t ft
)
420 for (i
= 0; i
< 8; i
++) {
421 vs
.ub
[i
] = (vs
.ub
[i
] + vt
.ub
[i
] + 1) >> 1;
426 uint64_t helper_pmaxsh(uint64_t fs
, uint64_t ft
)
433 for (i
= 0; i
< 4; i
++) {
434 vs
.sh
[i
] = (vs
.sh
[i
] >= vt
.sh
[i
] ? vs
.sh
[i
] : vt
.sh
[i
]);
439 uint64_t helper_pminsh(uint64_t fs
, uint64_t ft
)
446 for (i
= 0; i
< 4; i
++) {
447 vs
.sh
[i
] = (vs
.sh
[i
] <= vt
.sh
[i
] ? vs
.sh
[i
] : vt
.sh
[i
]);
452 uint64_t helper_pmaxub(uint64_t fs
, uint64_t ft
)
459 for (i
= 0; i
< 4; i
++) {
460 vs
.ub
[i
] = (vs
.ub
[i
] >= vt
.ub
[i
] ? vs
.ub
[i
] : vt
.ub
[i
]);
465 uint64_t helper_pminub(uint64_t fs
, uint64_t ft
)
472 for (i
= 0; i
< 4; i
++) {
473 vs
.ub
[i
] = (vs
.ub
[i
] <= vt
.ub
[i
] ? vs
.ub
[i
] : vt
.ub
[i
]);
478 uint64_t helper_pcmpeqw(uint64_t fs
, uint64_t ft
)
485 for (i
= 0; i
< 2; i
++) {
486 vs
.uw
[i
] = -(vs
.uw
[i
] == vt
.uw
[i
]);
491 uint64_t helper_pcmpgtw(uint64_t fs
, uint64_t ft
)
498 for (i
= 0; i
< 2; i
++) {
499 vs
.uw
[i
] = -(vs
.uw
[i
] > vt
.uw
[i
]);
504 uint64_t helper_pcmpeqh(uint64_t fs
, uint64_t ft
)
511 for (i
= 0; i
< 4; i
++) {
512 vs
.uh
[i
] = -(vs
.uh
[i
] == vt
.uh
[i
]);
517 uint64_t helper_pcmpgth(uint64_t fs
, uint64_t ft
)
524 for (i
= 0; i
< 4; i
++) {
525 vs
.uh
[i
] = -(vs
.uh
[i
] > vt
.uh
[i
]);
530 uint64_t helper_pcmpeqb(uint64_t fs
, uint64_t ft
)
537 for (i
= 0; i
< 8; i
++) {
538 vs
.ub
[i
] = -(vs
.ub
[i
] == vt
.ub
[i
]);
543 uint64_t helper_pcmpgtb(uint64_t fs
, uint64_t ft
)
550 for (i
= 0; i
< 8; i
++) {
551 vs
.ub
[i
] = -(vs
.ub
[i
] > vt
.ub
[i
]);
556 uint64_t helper_psllw(uint64_t fs
, uint64_t ft
)
566 for (i
= 0; i
< 2; ++i
) {
572 uint64_t helper_psrlw(uint64_t fs
, uint64_t ft
)
582 for (i
= 0; i
< 2; ++i
) {
588 uint64_t helper_psraw(uint64_t fs
, uint64_t ft
)
598 for (i
= 0; i
< 2; ++i
) {
604 uint64_t helper_psllh(uint64_t fs
, uint64_t ft
)
614 for (i
= 0; i
< 4; ++i
) {
620 uint64_t helper_psrlh(uint64_t fs
, uint64_t ft
)
630 for (i
= 0; i
< 4; ++i
) {
636 uint64_t helper_psrah(uint64_t fs
, uint64_t ft
)
646 for (i
= 0; i
< 4; ++i
) {
652 uint64_t helper_pmullh(uint64_t fs
, uint64_t ft
)
659 for (i
= 0; i
< 4; ++i
) {
660 vs
.sh
[i
] *= vt
.sh
[i
];
665 uint64_t helper_pmulhh(uint64_t fs
, uint64_t ft
)
672 for (i
= 0; i
< 4; ++i
) {
673 int32_t r
= vs
.sh
[i
] * vt
.sh
[i
];
679 uint64_t helper_pmulhuh(uint64_t fs
, uint64_t ft
)
686 for (i
= 0; i
< 4; ++i
) {
687 uint32_t r
= vs
.uh
[i
] * vt
.uh
[i
];
693 uint64_t helper_pmaddhw(uint64_t fs
, uint64_t ft
)
695 unsigned host
= BYTE_ORDER_XOR(3);
701 p0
= vs
.sh
[0 ^ host
] * vt
.sh
[0 ^ host
];
702 p0
+= vs
.sh
[1 ^ host
] * vt
.sh
[1 ^ host
];
703 p1
= vs
.sh
[2 ^ host
] * vt
.sh
[2 ^ host
];
704 p1
+= vs
.sh
[3 ^ host
] * vt
.sh
[3 ^ host
];
706 return ((uint64_t)p1
<< 32) | p0
;
709 uint64_t helper_pasubub(uint64_t fs
, uint64_t ft
)
716 for (i
= 0; i
< 8; ++i
) {
717 int r
= vs
.ub
[i
] - vt
.ub
[i
];
718 vs
.ub
[i
] = (r
< 0 ? -r
: r
);
723 uint64_t helper_biadd(uint64_t fs
)
727 for (i
= fd
= 0; i
< 8; ++i
) {
728 fd
+= (fs
>> (i
* 8)) & 0xff;
733 uint64_t helper_pmovmskb(uint64_t fs
)
737 fd
|= ((fs
>> 7) & 1) << 0;
738 fd
|= ((fs
>> 15) & 1) << 1;
739 fd
|= ((fs
>> 23) & 1) << 2;
740 fd
|= ((fs
>> 31) & 1) << 3;
741 fd
|= ((fs
>> 39) & 1) << 4;
742 fd
|= ((fs
>> 47) & 1) << 5;
743 fd
|= ((fs
>> 55) & 1) << 6;
744 fd
|= ((fs
>> 63) & 1) << 7;