2 * Loongson Multimedia Instruction emulation helpers for QEMU.
4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/helper-proto.h"
24 /* If the byte ordering doesn't matter, i.e. all columns are treated
25 identically, then this union can be used directly. If byte ordering
26 does matter, we generally ignore dumping to memory. */
37 /* Some byte ordering issues can be mitigated by XORing in the following. */
38 #ifdef HOST_WORDS_BIGENDIAN
39 # define BYTE_ORDER_XOR(N) N
41 # define BYTE_ORDER_XOR(N) 0
44 #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
45 #define SATUB(x) (x > 0xff ? 0xff : x)
47 #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
48 #define SATUH(x) (x > 0xffff ? 0xffff : x)
51 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
52 #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
54 uint64_t helper_paddsb(uint64_t fs
, uint64_t ft
)
61 for (i
= 0; i
< 8; ++i
) {
62 int r
= vs
.sb
[i
] + vt
.sb
[i
];
68 uint64_t helper_paddusb(uint64_t fs
, uint64_t ft
)
75 for (i
= 0; i
< 8; ++i
) {
76 int r
= vs
.ub
[i
] + vt
.ub
[i
];
82 uint64_t helper_paddsh(uint64_t fs
, uint64_t ft
)
89 for (i
= 0; i
< 4; ++i
) {
90 int r
= vs
.sh
[i
] + vt
.sh
[i
];
96 uint64_t helper_paddush(uint64_t fs
, uint64_t ft
)
103 for (i
= 0; i
< 4; ++i
) {
104 int r
= vs
.uh
[i
] + vt
.uh
[i
];
110 uint64_t helper_paddb(uint64_t fs
, uint64_t ft
)
117 for (i
= 0; i
< 8; ++i
) {
118 vs
.ub
[i
] += vt
.ub
[i
];
123 uint64_t helper_paddh(uint64_t fs
, uint64_t ft
)
130 for (i
= 0; i
< 4; ++i
) {
131 vs
.uh
[i
] += vt
.uh
[i
];
136 uint64_t helper_paddw(uint64_t fs
, uint64_t ft
)
143 for (i
= 0; i
< 2; ++i
) {
144 vs
.uw
[i
] += vt
.uw
[i
];
149 uint64_t helper_psubsb(uint64_t fs
, uint64_t ft
)
156 for (i
= 0; i
< 8; ++i
) {
157 int r
= vs
.sb
[i
] - vt
.sb
[i
];
163 uint64_t helper_psubusb(uint64_t fs
, uint64_t ft
)
170 for (i
= 0; i
< 8; ++i
) {
171 int r
= vs
.ub
[i
] - vt
.ub
[i
];
177 uint64_t helper_psubsh(uint64_t fs
, uint64_t ft
)
184 for (i
= 0; i
< 4; ++i
) {
185 int r
= vs
.sh
[i
] - vt
.sh
[i
];
191 uint64_t helper_psubush(uint64_t fs
, uint64_t ft
)
198 for (i
= 0; i
< 4; ++i
) {
199 int r
= vs
.uh
[i
] - vt
.uh
[i
];
205 uint64_t helper_psubb(uint64_t fs
, uint64_t ft
)
212 for (i
= 0; i
< 8; ++i
) {
213 vs
.ub
[i
] -= vt
.ub
[i
];
218 uint64_t helper_psubh(uint64_t fs
, uint64_t ft
)
225 for (i
= 0; i
< 4; ++i
) {
226 vs
.uh
[i
] -= vt
.uh
[i
];
231 uint64_t helper_psubw(uint64_t fs
, uint64_t ft
)
238 for (i
= 0; i
< 2; ++i
) {
239 vs
.uw
[i
] -= vt
.uw
[i
];
244 uint64_t helper_pshufh(uint64_t fs
, uint64_t ft
)
246 unsigned host
= BYTE_ORDER_XOR(3);
252 for (i
= 0; i
< 4; i
++, ft
>>= 2) {
253 vd
.uh
[i
^ host
] = vs
.uh
[(ft
& 3) ^ host
];
258 uint64_t helper_packsswh(uint64_t fs
, uint64_t ft
)
263 tmp
= (int32_t)(fs
>> 0);
265 fd
|= (tmp
& 0xffff) << 0;
267 tmp
= (int32_t)(fs
>> 32);
269 fd
|= (tmp
& 0xffff) << 16;
271 tmp
= (int32_t)(ft
>> 0);
273 fd
|= (tmp
& 0xffff) << 32;
275 tmp
= (int32_t)(ft
>> 32);
277 fd
|= (tmp
& 0xffff) << 48;
282 uint64_t helper_packsshb(uint64_t fs
, uint64_t ft
)
287 for (i
= 0; i
< 4; ++i
) {
288 int16_t tmp
= fs
>> (i
* 16);
290 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8);
292 for (i
= 0; i
< 4; ++i
) {
293 int16_t tmp
= ft
>> (i
* 16);
295 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8 + 32);
301 uint64_t helper_packushb(uint64_t fs
, uint64_t ft
)
306 for (i
= 0; i
< 4; ++i
) {
307 int16_t tmp
= fs
>> (i
* 16);
309 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8);
311 for (i
= 0; i
< 4; ++i
) {
312 int16_t tmp
= ft
>> (i
* 16);
314 fd
|= (uint64_t)(tmp
& 0xff) << (i
* 8 + 32);
320 uint64_t helper_punpcklwd(uint64_t fs
, uint64_t ft
)
322 return (fs
& 0xffffffff) | (ft
<< 32);
325 uint64_t helper_punpckhwd(uint64_t fs
, uint64_t ft
)
327 return (fs
>> 32) | (ft
& ~0xffffffffull
);
330 uint64_t helper_punpcklhw(uint64_t fs
, uint64_t ft
)
332 unsigned host
= BYTE_ORDER_XOR(3);
337 vd
.uh
[0 ^ host
] = vs
.uh
[0 ^ host
];
338 vd
.uh
[1 ^ host
] = vt
.uh
[0 ^ host
];
339 vd
.uh
[2 ^ host
] = vs
.uh
[1 ^ host
];
340 vd
.uh
[3 ^ host
] = vt
.uh
[1 ^ host
];
345 uint64_t helper_punpckhhw(uint64_t fs
, uint64_t ft
)
347 unsigned host
= BYTE_ORDER_XOR(3);
352 vd
.uh
[0 ^ host
] = vs
.uh
[2 ^ host
];
353 vd
.uh
[1 ^ host
] = vt
.uh
[2 ^ host
];
354 vd
.uh
[2 ^ host
] = vs
.uh
[3 ^ host
];
355 vd
.uh
[3 ^ host
] = vt
.uh
[3 ^ host
];
360 uint64_t helper_punpcklbh(uint64_t fs
, uint64_t ft
)
362 unsigned host
= BYTE_ORDER_XOR(7);
367 vd
.ub
[0 ^ host
] = vs
.ub
[0 ^ host
];
368 vd
.ub
[1 ^ host
] = vt
.ub
[0 ^ host
];
369 vd
.ub
[2 ^ host
] = vs
.ub
[1 ^ host
];
370 vd
.ub
[3 ^ host
] = vt
.ub
[1 ^ host
];
371 vd
.ub
[4 ^ host
] = vs
.ub
[2 ^ host
];
372 vd
.ub
[5 ^ host
] = vt
.ub
[2 ^ host
];
373 vd
.ub
[6 ^ host
] = vs
.ub
[3 ^ host
];
374 vd
.ub
[7 ^ host
] = vt
.ub
[3 ^ host
];
379 uint64_t helper_punpckhbh(uint64_t fs
, uint64_t ft
)
381 unsigned host
= BYTE_ORDER_XOR(7);
386 vd
.ub
[0 ^ host
] = vs
.ub
[4 ^ host
];
387 vd
.ub
[1 ^ host
] = vt
.ub
[4 ^ host
];
388 vd
.ub
[2 ^ host
] = vs
.ub
[5 ^ host
];
389 vd
.ub
[3 ^ host
] = vt
.ub
[5 ^ host
];
390 vd
.ub
[4 ^ host
] = vs
.ub
[6 ^ host
];
391 vd
.ub
[5 ^ host
] = vt
.ub
[6 ^ host
];
392 vd
.ub
[6 ^ host
] = vs
.ub
[7 ^ host
];
393 vd
.ub
[7 ^ host
] = vt
.ub
[7 ^ host
];
398 uint64_t helper_pavgh(uint64_t fs
, uint64_t ft
)
405 for (i
= 0; i
< 4; i
++) {
406 vs
.uh
[i
] = (vs
.uh
[i
] + vt
.uh
[i
] + 1) >> 1;
411 uint64_t helper_pavgb(uint64_t fs
, uint64_t ft
)
418 for (i
= 0; i
< 8; i
++) {
419 vs
.ub
[i
] = (vs
.ub
[i
] + vt
.ub
[i
] + 1) >> 1;
424 uint64_t helper_pmaxsh(uint64_t fs
, uint64_t ft
)
431 for (i
= 0; i
< 4; i
++) {
432 vs
.sh
[i
] = (vs
.sh
[i
] >= vt
.sh
[i
] ? vs
.sh
[i
] : vt
.sh
[i
]);
437 uint64_t helper_pminsh(uint64_t fs
, uint64_t ft
)
444 for (i
= 0; i
< 4; i
++) {
445 vs
.sh
[i
] = (vs
.sh
[i
] <= vt
.sh
[i
] ? vs
.sh
[i
] : vt
.sh
[i
]);
450 uint64_t helper_pmaxub(uint64_t fs
, uint64_t ft
)
457 for (i
= 0; i
< 4; i
++) {
458 vs
.ub
[i
] = (vs
.ub
[i
] >= vt
.ub
[i
] ? vs
.ub
[i
] : vt
.ub
[i
]);
463 uint64_t helper_pminub(uint64_t fs
, uint64_t ft
)
470 for (i
= 0; i
< 4; i
++) {
471 vs
.ub
[i
] = (vs
.ub
[i
] <= vt
.ub
[i
] ? vs
.ub
[i
] : vt
.ub
[i
]);
476 uint64_t helper_pcmpeqw(uint64_t fs
, uint64_t ft
)
483 for (i
= 0; i
< 2; i
++) {
484 vs
.uw
[i
] = -(vs
.uw
[i
] == vt
.uw
[i
]);
489 uint64_t helper_pcmpgtw(uint64_t fs
, uint64_t ft
)
496 for (i
= 0; i
< 2; i
++) {
497 vs
.uw
[i
] = -(vs
.uw
[i
] > vt
.uw
[i
]);
502 uint64_t helper_pcmpeqh(uint64_t fs
, uint64_t ft
)
509 for (i
= 0; i
< 4; i
++) {
510 vs
.uh
[i
] = -(vs
.uh
[i
] == vt
.uh
[i
]);
515 uint64_t helper_pcmpgth(uint64_t fs
, uint64_t ft
)
522 for (i
= 0; i
< 4; i
++) {
523 vs
.uh
[i
] = -(vs
.uh
[i
] > vt
.uh
[i
]);
528 uint64_t helper_pcmpeqb(uint64_t fs
, uint64_t ft
)
535 for (i
= 0; i
< 8; i
++) {
536 vs
.ub
[i
] = -(vs
.ub
[i
] == vt
.ub
[i
]);
541 uint64_t helper_pcmpgtb(uint64_t fs
, uint64_t ft
)
548 for (i
= 0; i
< 8; i
++) {
549 vs
.ub
[i
] = -(vs
.ub
[i
] > vt
.ub
[i
]);
554 uint64_t helper_psllw(uint64_t fs
, uint64_t ft
)
564 for (i
= 0; i
< 2; ++i
) {
570 uint64_t helper_psrlw(uint64_t fs
, uint64_t ft
)
580 for (i
= 0; i
< 2; ++i
) {
586 uint64_t helper_psraw(uint64_t fs
, uint64_t ft
)
596 for (i
= 0; i
< 2; ++i
) {
602 uint64_t helper_psllh(uint64_t fs
, uint64_t ft
)
612 for (i
= 0; i
< 4; ++i
) {
618 uint64_t helper_psrlh(uint64_t fs
, uint64_t ft
)
628 for (i
= 0; i
< 4; ++i
) {
634 uint64_t helper_psrah(uint64_t fs
, uint64_t ft
)
644 for (i
= 0; i
< 4; ++i
) {
650 uint64_t helper_pmullh(uint64_t fs
, uint64_t ft
)
657 for (i
= 0; i
< 4; ++i
) {
658 vs
.sh
[i
] *= vt
.sh
[i
];
663 uint64_t helper_pmulhh(uint64_t fs
, uint64_t ft
)
670 for (i
= 0; i
< 4; ++i
) {
671 int32_t r
= vs
.sh
[i
] * vt
.sh
[i
];
677 uint64_t helper_pmulhuh(uint64_t fs
, uint64_t ft
)
684 for (i
= 0; i
< 4; ++i
) {
685 uint32_t r
= vs
.uh
[i
] * vt
.uh
[i
];
691 uint64_t helper_pmaddhw(uint64_t fs
, uint64_t ft
)
693 unsigned host
= BYTE_ORDER_XOR(3);
699 p0
= vs
.sh
[0 ^ host
] * vt
.sh
[0 ^ host
];
700 p0
+= vs
.sh
[1 ^ host
] * vt
.sh
[1 ^ host
];
701 p1
= vs
.sh
[2 ^ host
] * vt
.sh
[2 ^ host
];
702 p1
+= vs
.sh
[3 ^ host
] * vt
.sh
[3 ^ host
];
704 return ((uint64_t)p1
<< 32) | p0
;
707 uint64_t helper_pasubub(uint64_t fs
, uint64_t ft
)
714 for (i
= 0; i
< 8; ++i
) {
715 int r
= vs
.ub
[i
] - vt
.ub
[i
];
716 vs
.ub
[i
] = (r
< 0 ? -r
: r
);
721 uint64_t helper_biadd(uint64_t fs
)
725 for (i
= fd
= 0; i
< 8; ++i
) {
726 fd
+= (fs
>> (i
* 8)) & 0xff;
731 uint64_t helper_pmovmskb(uint64_t fs
)
735 fd
|= ((fs
>> 7) & 1) << 0;
736 fd
|= ((fs
>> 15) & 1) << 1;
737 fd
|= ((fs
>> 23) & 1) << 2;
738 fd
|= ((fs
>> 31) & 1) << 3;
739 fd
|= ((fs
>> 39) & 1) << 4;
740 fd
|= ((fs
>> 47) & 1) << 5;
741 fd
|= ((fs
>> 55) & 1) << 6;
742 fd
|= ((fs
>> 63) & 1) << 7;