docs: Add a doc about multiple thread compression
[qemu/ar7.git] / target-mips / lmi_helper.c
blobbbfcd59cdb4354409fac6a67d90dfdaa5001db33
1 /*
2 * Loongson Multimedia Instruction emulation helpers for QEMU.
4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "cpu.h"
21 #include "exec/helper-proto.h"
23 /* If the byte ordering doesn't matter, i.e. all columns are treated
24 identically, then this union can be used directly. If byte ordering
25 does matter, we generally ignore dumping to memory. */
26 typedef union {
27 uint8_t ub[8];
28 int8_t sb[8];
29 uint16_t uh[4];
30 int16_t sh[4];
31 uint32_t uw[2];
32 int32_t sw[2];
33 uint64_t d;
34 } LMIValue;
36 /* Some byte ordering issues can be mitigated by XORing in the following. */
37 #ifdef HOST_WORDS_BIGENDIAN
38 # define BYTE_ORDER_XOR(N) N
39 #else
40 # define BYTE_ORDER_XOR(N) 0
41 #endif
43 #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
44 #define SATUB(x) (x > 0xff ? 0xff : x)
46 #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
47 #define SATUH(x) (x > 0xffff ? 0xffff : x)
49 #define SATSW(x) \
50 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
51 #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
53 uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
55 LMIValue vs, vt;
56 unsigned int i;
58 vs.d = fs;
59 vt.d = ft;
60 for (i = 0; i < 8; ++i) {
61 int r = vs.sb[i] + vt.sb[i];
62 vs.sb[i] = SATSB(r);
64 return vs.d;
67 uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
69 LMIValue vs, vt;
70 unsigned int i;
72 vs.d = fs;
73 vt.d = ft;
74 for (i = 0; i < 8; ++i) {
75 int r = vs.ub[i] + vt.ub[i];
76 vs.ub[i] = SATUB(r);
78 return vs.d;
81 uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
83 LMIValue vs, vt;
84 unsigned int i;
86 vs.d = fs;
87 vt.d = ft;
88 for (i = 0; i < 4; ++i) {
89 int r = vs.sh[i] + vt.sh[i];
90 vs.sh[i] = SATSH(r);
92 return vs.d;
95 uint64_t helper_paddush(uint64_t fs, uint64_t ft)
97 LMIValue vs, vt;
98 unsigned int i;
100 vs.d = fs;
101 vt.d = ft;
102 for (i = 0; i < 4; ++i) {
103 int r = vs.uh[i] + vt.uh[i];
104 vs.uh[i] = SATUH(r);
106 return vs.d;
109 uint64_t helper_paddb(uint64_t fs, uint64_t ft)
111 LMIValue vs, vt;
112 unsigned int i;
114 vs.d = fs;
115 vt.d = ft;
116 for (i = 0; i < 8; ++i) {
117 vs.ub[i] += vt.ub[i];
119 return vs.d;
122 uint64_t helper_paddh(uint64_t fs, uint64_t ft)
124 LMIValue vs, vt;
125 unsigned int i;
127 vs.d = fs;
128 vt.d = ft;
129 for (i = 0; i < 4; ++i) {
130 vs.uh[i] += vt.uh[i];
132 return vs.d;
135 uint64_t helper_paddw(uint64_t fs, uint64_t ft)
137 LMIValue vs, vt;
138 unsigned int i;
140 vs.d = fs;
141 vt.d = ft;
142 for (i = 0; i < 2; ++i) {
143 vs.uw[i] += vt.uw[i];
145 return vs.d;
148 uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
150 LMIValue vs, vt;
151 unsigned int i;
153 vs.d = fs;
154 vt.d = ft;
155 for (i = 0; i < 8; ++i) {
156 int r = vs.sb[i] - vt.sb[i];
157 vs.sb[i] = SATSB(r);
159 return vs.d;
162 uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
164 LMIValue vs, vt;
165 unsigned int i;
167 vs.d = fs;
168 vt.d = ft;
169 for (i = 0; i < 8; ++i) {
170 int r = vs.ub[i] - vt.ub[i];
171 vs.ub[i] = SATUB(r);
173 return vs.d;
176 uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
178 LMIValue vs, vt;
179 unsigned int i;
181 vs.d = fs;
182 vt.d = ft;
183 for (i = 0; i < 4; ++i) {
184 int r = vs.sh[i] - vt.sh[i];
185 vs.sh[i] = SATSH(r);
187 return vs.d;
190 uint64_t helper_psubush(uint64_t fs, uint64_t ft)
192 LMIValue vs, vt;
193 unsigned int i;
195 vs.d = fs;
196 vt.d = ft;
197 for (i = 0; i < 4; ++i) {
198 int r = vs.uh[i] - vt.uh[i];
199 vs.uh[i] = SATUH(r);
201 return vs.d;
204 uint64_t helper_psubb(uint64_t fs, uint64_t ft)
206 LMIValue vs, vt;
207 unsigned int i;
209 vs.d = fs;
210 vt.d = ft;
211 for (i = 0; i < 8; ++i) {
212 vs.ub[i] -= vt.ub[i];
214 return vs.d;
217 uint64_t helper_psubh(uint64_t fs, uint64_t ft)
219 LMIValue vs, vt;
220 unsigned int i;
222 vs.d = fs;
223 vt.d = ft;
224 for (i = 0; i < 4; ++i) {
225 vs.uh[i] -= vt.uh[i];
227 return vs.d;
230 uint64_t helper_psubw(uint64_t fs, uint64_t ft)
232 LMIValue vs, vt;
233 unsigned int i;
235 vs.d = fs;
236 vt.d = ft;
237 for (i = 0; i < 2; ++i) {
238 vs.uw[i] -= vt.uw[i];
240 return vs.d;
243 uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
245 unsigned host = BYTE_ORDER_XOR(3);
246 LMIValue vd, vs;
247 unsigned i;
249 vs.d = fs;
250 vd.d = 0;
251 for (i = 0; i < 4; i++, ft >>= 2) {
252 vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
254 return vd.d;
257 uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
259 uint64_t fd = 0;
260 int64_t tmp;
262 tmp = (int32_t)(fs >> 0);
263 tmp = SATSH(tmp);
264 fd |= (tmp & 0xffff) << 0;
266 tmp = (int32_t)(fs >> 32);
267 tmp = SATSH(tmp);
268 fd |= (tmp & 0xffff) << 16;
270 tmp = (int32_t)(ft >> 0);
271 tmp = SATSH(tmp);
272 fd |= (tmp & 0xffff) << 32;
274 tmp = (int32_t)(ft >> 32);
275 tmp = SATSH(tmp);
276 fd |= (tmp & 0xffff) << 48;
278 return fd;
281 uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
283 uint64_t fd = 0;
284 unsigned int i;
286 for (i = 0; i < 4; ++i) {
287 int16_t tmp = fs >> (i * 16);
288 tmp = SATSB(tmp);
289 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
291 for (i = 0; i < 4; ++i) {
292 int16_t tmp = ft >> (i * 16);
293 tmp = SATSB(tmp);
294 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
297 return fd;
300 uint64_t helper_packushb(uint64_t fs, uint64_t ft)
302 uint64_t fd = 0;
303 unsigned int i;
305 for (i = 0; i < 4; ++i) {
306 int16_t tmp = fs >> (i * 16);
307 tmp = SATUB(tmp);
308 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
310 for (i = 0; i < 4; ++i) {
311 int16_t tmp = ft >> (i * 16);
312 tmp = SATUB(tmp);
313 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
316 return fd;
319 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
321 return (fs & 0xffffffff) | (ft << 32);
324 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
326 return (fs >> 32) | (ft & ~0xffffffffull);
329 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
331 unsigned host = BYTE_ORDER_XOR(3);
332 LMIValue vd, vs, vt;
334 vs.d = fs;
335 vt.d = ft;
336 vd.uh[0 ^ host] = vs.uh[0 ^ host];
337 vd.uh[1 ^ host] = vt.uh[0 ^ host];
338 vd.uh[2 ^ host] = vs.uh[1 ^ host];
339 vd.uh[3 ^ host] = vt.uh[1 ^ host];
341 return vd.d;
344 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
346 unsigned host = BYTE_ORDER_XOR(3);
347 LMIValue vd, vs, vt;
349 vs.d = fs;
350 vt.d = ft;
351 vd.uh[0 ^ host] = vs.uh[2 ^ host];
352 vd.uh[1 ^ host] = vt.uh[2 ^ host];
353 vd.uh[2 ^ host] = vs.uh[3 ^ host];
354 vd.uh[3 ^ host] = vt.uh[3 ^ host];
356 return vd.d;
359 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
361 unsigned host = BYTE_ORDER_XOR(7);
362 LMIValue vd, vs, vt;
364 vs.d = fs;
365 vt.d = ft;
366 vd.ub[0 ^ host] = vs.ub[0 ^ host];
367 vd.ub[1 ^ host] = vt.ub[0 ^ host];
368 vd.ub[2 ^ host] = vs.ub[1 ^ host];
369 vd.ub[3 ^ host] = vt.ub[1 ^ host];
370 vd.ub[4 ^ host] = vs.ub[2 ^ host];
371 vd.ub[5 ^ host] = vt.ub[2 ^ host];
372 vd.ub[6 ^ host] = vs.ub[3 ^ host];
373 vd.ub[7 ^ host] = vt.ub[3 ^ host];
375 return vd.d;
378 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
380 unsigned host = BYTE_ORDER_XOR(7);
381 LMIValue vd, vs, vt;
383 vs.d = fs;
384 vt.d = ft;
385 vd.ub[0 ^ host] = vs.ub[4 ^ host];
386 vd.ub[1 ^ host] = vt.ub[4 ^ host];
387 vd.ub[2 ^ host] = vs.ub[5 ^ host];
388 vd.ub[3 ^ host] = vt.ub[5 ^ host];
389 vd.ub[4 ^ host] = vs.ub[6 ^ host];
390 vd.ub[5 ^ host] = vt.ub[6 ^ host];
391 vd.ub[6 ^ host] = vs.ub[7 ^ host];
392 vd.ub[7 ^ host] = vt.ub[7 ^ host];
394 return vd.d;
397 uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
399 LMIValue vs, vt;
400 unsigned i;
402 vs.d = fs;
403 vt.d = ft;
404 for (i = 0; i < 4; i++) {
405 vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
407 return vs.d;
410 uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
412 LMIValue vs, vt;
413 unsigned i;
415 vs.d = fs;
416 vt.d = ft;
417 for (i = 0; i < 8; i++) {
418 vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
420 return vs.d;
423 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
425 LMIValue vs, vt;
426 unsigned i;
428 vs.d = fs;
429 vt.d = ft;
430 for (i = 0; i < 4; i++) {
431 vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
433 return vs.d;
436 uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
438 LMIValue vs, vt;
439 unsigned i;
441 vs.d = fs;
442 vt.d = ft;
443 for (i = 0; i < 4; i++) {
444 vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
446 return vs.d;
449 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
451 LMIValue vs, vt;
452 unsigned i;
454 vs.d = fs;
455 vt.d = ft;
456 for (i = 0; i < 4; i++) {
457 vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
459 return vs.d;
462 uint64_t helper_pminub(uint64_t fs, uint64_t ft)
464 LMIValue vs, vt;
465 unsigned i;
467 vs.d = fs;
468 vt.d = ft;
469 for (i = 0; i < 4; i++) {
470 vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
472 return vs.d;
475 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
477 LMIValue vs, vt;
478 unsigned i;
480 vs.d = fs;
481 vt.d = ft;
482 for (i = 0; i < 2; i++) {
483 vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
485 return vs.d;
488 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
490 LMIValue vs, vt;
491 unsigned i;
493 vs.d = fs;
494 vt.d = ft;
495 for (i = 0; i < 2; i++) {
496 vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
498 return vs.d;
501 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
503 LMIValue vs, vt;
504 unsigned i;
506 vs.d = fs;
507 vt.d = ft;
508 for (i = 0; i < 4; i++) {
509 vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
511 return vs.d;
514 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
516 LMIValue vs, vt;
517 unsigned i;
519 vs.d = fs;
520 vt.d = ft;
521 for (i = 0; i < 4; i++) {
522 vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
524 return vs.d;
527 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
529 LMIValue vs, vt;
530 unsigned i;
532 vs.d = fs;
533 vt.d = ft;
534 for (i = 0; i < 8; i++) {
535 vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
537 return vs.d;
540 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
542 LMIValue vs, vt;
543 unsigned i;
545 vs.d = fs;
546 vt.d = ft;
547 for (i = 0; i < 8; i++) {
548 vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
550 return vs.d;
553 uint64_t helper_psllw(uint64_t fs, uint64_t ft)
555 LMIValue vs;
556 unsigned i;
558 ft &= 0x7f;
559 if (ft > 31) {
560 return 0;
562 vs.d = fs;
563 for (i = 0; i < 2; ++i) {
564 vs.uw[i] <<= ft;
566 return vs.d;
569 uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
571 LMIValue vs;
572 unsigned i;
574 ft &= 0x7f;
575 if (ft > 31) {
576 return 0;
578 vs.d = fs;
579 for (i = 0; i < 2; ++i) {
580 vs.uw[i] >>= ft;
582 return vs.d;
585 uint64_t helper_psraw(uint64_t fs, uint64_t ft)
587 LMIValue vs;
588 unsigned i;
590 ft &= 0x7f;
591 if (ft > 31) {
592 ft = 31;
594 vs.d = fs;
595 for (i = 0; i < 2; ++i) {
596 vs.sw[i] >>= ft;
598 return vs.d;
601 uint64_t helper_psllh(uint64_t fs, uint64_t ft)
603 LMIValue vs;
604 unsigned i;
606 ft &= 0x7f;
607 if (ft > 15) {
608 return 0;
610 vs.d = fs;
611 for (i = 0; i < 4; ++i) {
612 vs.uh[i] <<= ft;
614 return vs.d;
617 uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
619 LMIValue vs;
620 unsigned i;
622 ft &= 0x7f;
623 if (ft > 15) {
624 return 0;
626 vs.d = fs;
627 for (i = 0; i < 4; ++i) {
628 vs.uh[i] >>= ft;
630 return vs.d;
633 uint64_t helper_psrah(uint64_t fs, uint64_t ft)
635 LMIValue vs;
636 unsigned i;
638 ft &= 0x7f;
639 if (ft > 15) {
640 ft = 15;
642 vs.d = fs;
643 for (i = 0; i < 4; ++i) {
644 vs.sh[i] >>= ft;
646 return vs.d;
649 uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
651 LMIValue vs, vt;
652 unsigned i;
654 vs.d = fs;
655 vt.d = ft;
656 for (i = 0; i < 4; ++i) {
657 vs.sh[i] *= vt.sh[i];
659 return vs.d;
662 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
664 LMIValue vs, vt;
665 unsigned i;
667 vs.d = fs;
668 vt.d = ft;
669 for (i = 0; i < 4; ++i) {
670 int32_t r = vs.sh[i] * vt.sh[i];
671 vs.sh[i] = r >> 16;
673 return vs.d;
676 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
678 LMIValue vs, vt;
679 unsigned i;
681 vs.d = fs;
682 vt.d = ft;
683 for (i = 0; i < 4; ++i) {
684 uint32_t r = vs.uh[i] * vt.uh[i];
685 vs.uh[i] = r >> 16;
687 return vs.d;
690 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
692 unsigned host = BYTE_ORDER_XOR(3);
693 LMIValue vs, vt;
694 uint32_t p0, p1;
696 vs.d = fs;
697 vt.d = ft;
698 p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host];
699 p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
700 p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host];
701 p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
703 return ((uint64_t)p1 << 32) | p0;
706 uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
708 LMIValue vs, vt;
709 unsigned i;
711 vs.d = fs;
712 vt.d = ft;
713 for (i = 0; i < 8; ++i) {
714 int r = vs.ub[i] - vt.ub[i];
715 vs.ub[i] = (r < 0 ? -r : r);
717 return vs.d;
720 uint64_t helper_biadd(uint64_t fs)
722 unsigned i, fd;
724 for (i = fd = 0; i < 8; ++i) {
725 fd += (fs >> (i * 8)) & 0xff;
727 return fd & 0xffff;
730 uint64_t helper_pmovmskb(uint64_t fs)
732 unsigned fd = 0;
734 fd |= ((fs >> 7) & 1) << 0;
735 fd |= ((fs >> 15) & 1) << 1;
736 fd |= ((fs >> 23) & 1) << 2;
737 fd |= ((fs >> 31) & 1) << 3;
738 fd |= ((fs >> 39) & 1) << 4;
739 fd |= ((fs >> 47) & 1) << 5;
740 fd |= ((fs >> 55) & 1) << 6;
741 fd |= ((fs >> 63) & 1) << 7;
743 return fd & 0xff;