block: Pass unaligned discard requests to drivers
[qemu/ar7.git] / target-mips / lmi_helper.c
blobfb1245b39d826a7d35d6e6b7eb55b769abb6e532
1 /*
2 * Loongson Multimedia Instruction emulation helpers for QEMU.
4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
24 /* If the byte ordering doesn't matter, i.e. all columns are treated
25 identically, then this union can be used directly. If byte ordering
26 does matter, we generally ignore dumping to memory. */
27 typedef union {
28 uint8_t ub[8];
29 int8_t sb[8];
30 uint16_t uh[4];
31 int16_t sh[4];
32 uint32_t uw[2];
33 int32_t sw[2];
34 uint64_t d;
35 } LMIValue;
37 /* Some byte ordering issues can be mitigated by XORing in the following. */
38 #ifdef HOST_WORDS_BIGENDIAN
39 # define BYTE_ORDER_XOR(N) N
40 #else
41 # define BYTE_ORDER_XOR(N) 0
42 #endif
44 #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
45 #define SATUB(x) (x > 0xff ? 0xff : x)
47 #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
48 #define SATUH(x) (x > 0xffff ? 0xffff : x)
50 #define SATSW(x) \
51 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
52 #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
54 uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
56 LMIValue vs, vt;
57 unsigned int i;
59 vs.d = fs;
60 vt.d = ft;
61 for (i = 0; i < 8; ++i) {
62 int r = vs.sb[i] + vt.sb[i];
63 vs.sb[i] = SATSB(r);
65 return vs.d;
68 uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
70 LMIValue vs, vt;
71 unsigned int i;
73 vs.d = fs;
74 vt.d = ft;
75 for (i = 0; i < 8; ++i) {
76 int r = vs.ub[i] + vt.ub[i];
77 vs.ub[i] = SATUB(r);
79 return vs.d;
82 uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
84 LMIValue vs, vt;
85 unsigned int i;
87 vs.d = fs;
88 vt.d = ft;
89 for (i = 0; i < 4; ++i) {
90 int r = vs.sh[i] + vt.sh[i];
91 vs.sh[i] = SATSH(r);
93 return vs.d;
96 uint64_t helper_paddush(uint64_t fs, uint64_t ft)
98 LMIValue vs, vt;
99 unsigned int i;
101 vs.d = fs;
102 vt.d = ft;
103 for (i = 0; i < 4; ++i) {
104 int r = vs.uh[i] + vt.uh[i];
105 vs.uh[i] = SATUH(r);
107 return vs.d;
110 uint64_t helper_paddb(uint64_t fs, uint64_t ft)
112 LMIValue vs, vt;
113 unsigned int i;
115 vs.d = fs;
116 vt.d = ft;
117 for (i = 0; i < 8; ++i) {
118 vs.ub[i] += vt.ub[i];
120 return vs.d;
123 uint64_t helper_paddh(uint64_t fs, uint64_t ft)
125 LMIValue vs, vt;
126 unsigned int i;
128 vs.d = fs;
129 vt.d = ft;
130 for (i = 0; i < 4; ++i) {
131 vs.uh[i] += vt.uh[i];
133 return vs.d;
136 uint64_t helper_paddw(uint64_t fs, uint64_t ft)
138 LMIValue vs, vt;
139 unsigned int i;
141 vs.d = fs;
142 vt.d = ft;
143 for (i = 0; i < 2; ++i) {
144 vs.uw[i] += vt.uw[i];
146 return vs.d;
149 uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
151 LMIValue vs, vt;
152 unsigned int i;
154 vs.d = fs;
155 vt.d = ft;
156 for (i = 0; i < 8; ++i) {
157 int r = vs.sb[i] - vt.sb[i];
158 vs.sb[i] = SATSB(r);
160 return vs.d;
163 uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
165 LMIValue vs, vt;
166 unsigned int i;
168 vs.d = fs;
169 vt.d = ft;
170 for (i = 0; i < 8; ++i) {
171 int r = vs.ub[i] - vt.ub[i];
172 vs.ub[i] = SATUB(r);
174 return vs.d;
177 uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
179 LMIValue vs, vt;
180 unsigned int i;
182 vs.d = fs;
183 vt.d = ft;
184 for (i = 0; i < 4; ++i) {
185 int r = vs.sh[i] - vt.sh[i];
186 vs.sh[i] = SATSH(r);
188 return vs.d;
191 uint64_t helper_psubush(uint64_t fs, uint64_t ft)
193 LMIValue vs, vt;
194 unsigned int i;
196 vs.d = fs;
197 vt.d = ft;
198 for (i = 0; i < 4; ++i) {
199 int r = vs.uh[i] - vt.uh[i];
200 vs.uh[i] = SATUH(r);
202 return vs.d;
205 uint64_t helper_psubb(uint64_t fs, uint64_t ft)
207 LMIValue vs, vt;
208 unsigned int i;
210 vs.d = fs;
211 vt.d = ft;
212 for (i = 0; i < 8; ++i) {
213 vs.ub[i] -= vt.ub[i];
215 return vs.d;
218 uint64_t helper_psubh(uint64_t fs, uint64_t ft)
220 LMIValue vs, vt;
221 unsigned int i;
223 vs.d = fs;
224 vt.d = ft;
225 for (i = 0; i < 4; ++i) {
226 vs.uh[i] -= vt.uh[i];
228 return vs.d;
231 uint64_t helper_psubw(uint64_t fs, uint64_t ft)
233 LMIValue vs, vt;
234 unsigned int i;
236 vs.d = fs;
237 vt.d = ft;
238 for (i = 0; i < 2; ++i) {
239 vs.uw[i] -= vt.uw[i];
241 return vs.d;
244 uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
246 unsigned host = BYTE_ORDER_XOR(3);
247 LMIValue vd, vs;
248 unsigned i;
250 vs.d = fs;
251 vd.d = 0;
252 for (i = 0; i < 4; i++, ft >>= 2) {
253 vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
255 return vd.d;
258 uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
260 uint64_t fd = 0;
261 int64_t tmp;
263 tmp = (int32_t)(fs >> 0);
264 tmp = SATSH(tmp);
265 fd |= (tmp & 0xffff) << 0;
267 tmp = (int32_t)(fs >> 32);
268 tmp = SATSH(tmp);
269 fd |= (tmp & 0xffff) << 16;
271 tmp = (int32_t)(ft >> 0);
272 tmp = SATSH(tmp);
273 fd |= (tmp & 0xffff) << 32;
275 tmp = (int32_t)(ft >> 32);
276 tmp = SATSH(tmp);
277 fd |= (tmp & 0xffff) << 48;
279 return fd;
282 uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
284 uint64_t fd = 0;
285 unsigned int i;
287 for (i = 0; i < 4; ++i) {
288 int16_t tmp = fs >> (i * 16);
289 tmp = SATSB(tmp);
290 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
292 for (i = 0; i < 4; ++i) {
293 int16_t tmp = ft >> (i * 16);
294 tmp = SATSB(tmp);
295 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
298 return fd;
301 uint64_t helper_packushb(uint64_t fs, uint64_t ft)
303 uint64_t fd = 0;
304 unsigned int i;
306 for (i = 0; i < 4; ++i) {
307 int16_t tmp = fs >> (i * 16);
308 tmp = SATUB(tmp);
309 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
311 for (i = 0; i < 4; ++i) {
312 int16_t tmp = ft >> (i * 16);
313 tmp = SATUB(tmp);
314 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
317 return fd;
320 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
322 return (fs & 0xffffffff) | (ft << 32);
325 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
327 return (fs >> 32) | (ft & ~0xffffffffull);
330 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
332 unsigned host = BYTE_ORDER_XOR(3);
333 LMIValue vd, vs, vt;
335 vs.d = fs;
336 vt.d = ft;
337 vd.uh[0 ^ host] = vs.uh[0 ^ host];
338 vd.uh[1 ^ host] = vt.uh[0 ^ host];
339 vd.uh[2 ^ host] = vs.uh[1 ^ host];
340 vd.uh[3 ^ host] = vt.uh[1 ^ host];
342 return vd.d;
345 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
347 unsigned host = BYTE_ORDER_XOR(3);
348 LMIValue vd, vs, vt;
350 vs.d = fs;
351 vt.d = ft;
352 vd.uh[0 ^ host] = vs.uh[2 ^ host];
353 vd.uh[1 ^ host] = vt.uh[2 ^ host];
354 vd.uh[2 ^ host] = vs.uh[3 ^ host];
355 vd.uh[3 ^ host] = vt.uh[3 ^ host];
357 return vd.d;
360 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
362 unsigned host = BYTE_ORDER_XOR(7);
363 LMIValue vd, vs, vt;
365 vs.d = fs;
366 vt.d = ft;
367 vd.ub[0 ^ host] = vs.ub[0 ^ host];
368 vd.ub[1 ^ host] = vt.ub[0 ^ host];
369 vd.ub[2 ^ host] = vs.ub[1 ^ host];
370 vd.ub[3 ^ host] = vt.ub[1 ^ host];
371 vd.ub[4 ^ host] = vs.ub[2 ^ host];
372 vd.ub[5 ^ host] = vt.ub[2 ^ host];
373 vd.ub[6 ^ host] = vs.ub[3 ^ host];
374 vd.ub[7 ^ host] = vt.ub[3 ^ host];
376 return vd.d;
379 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
381 unsigned host = BYTE_ORDER_XOR(7);
382 LMIValue vd, vs, vt;
384 vs.d = fs;
385 vt.d = ft;
386 vd.ub[0 ^ host] = vs.ub[4 ^ host];
387 vd.ub[1 ^ host] = vt.ub[4 ^ host];
388 vd.ub[2 ^ host] = vs.ub[5 ^ host];
389 vd.ub[3 ^ host] = vt.ub[5 ^ host];
390 vd.ub[4 ^ host] = vs.ub[6 ^ host];
391 vd.ub[5 ^ host] = vt.ub[6 ^ host];
392 vd.ub[6 ^ host] = vs.ub[7 ^ host];
393 vd.ub[7 ^ host] = vt.ub[7 ^ host];
395 return vd.d;
398 uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
400 LMIValue vs, vt;
401 unsigned i;
403 vs.d = fs;
404 vt.d = ft;
405 for (i = 0; i < 4; i++) {
406 vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
408 return vs.d;
411 uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
413 LMIValue vs, vt;
414 unsigned i;
416 vs.d = fs;
417 vt.d = ft;
418 for (i = 0; i < 8; i++) {
419 vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
421 return vs.d;
424 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
426 LMIValue vs, vt;
427 unsigned i;
429 vs.d = fs;
430 vt.d = ft;
431 for (i = 0; i < 4; i++) {
432 vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
434 return vs.d;
437 uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
439 LMIValue vs, vt;
440 unsigned i;
442 vs.d = fs;
443 vt.d = ft;
444 for (i = 0; i < 4; i++) {
445 vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
447 return vs.d;
450 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
452 LMIValue vs, vt;
453 unsigned i;
455 vs.d = fs;
456 vt.d = ft;
457 for (i = 0; i < 4; i++) {
458 vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
460 return vs.d;
463 uint64_t helper_pminub(uint64_t fs, uint64_t ft)
465 LMIValue vs, vt;
466 unsigned i;
468 vs.d = fs;
469 vt.d = ft;
470 for (i = 0; i < 4; i++) {
471 vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
473 return vs.d;
476 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
478 LMIValue vs, vt;
479 unsigned i;
481 vs.d = fs;
482 vt.d = ft;
483 for (i = 0; i < 2; i++) {
484 vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
486 return vs.d;
489 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
491 LMIValue vs, vt;
492 unsigned i;
494 vs.d = fs;
495 vt.d = ft;
496 for (i = 0; i < 2; i++) {
497 vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
499 return vs.d;
502 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
504 LMIValue vs, vt;
505 unsigned i;
507 vs.d = fs;
508 vt.d = ft;
509 for (i = 0; i < 4; i++) {
510 vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
512 return vs.d;
515 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
517 LMIValue vs, vt;
518 unsigned i;
520 vs.d = fs;
521 vt.d = ft;
522 for (i = 0; i < 4; i++) {
523 vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
525 return vs.d;
528 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
530 LMIValue vs, vt;
531 unsigned i;
533 vs.d = fs;
534 vt.d = ft;
535 for (i = 0; i < 8; i++) {
536 vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
538 return vs.d;
541 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
543 LMIValue vs, vt;
544 unsigned i;
546 vs.d = fs;
547 vt.d = ft;
548 for (i = 0; i < 8; i++) {
549 vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
551 return vs.d;
554 uint64_t helper_psllw(uint64_t fs, uint64_t ft)
556 LMIValue vs;
557 unsigned i;
559 ft &= 0x7f;
560 if (ft > 31) {
561 return 0;
563 vs.d = fs;
564 for (i = 0; i < 2; ++i) {
565 vs.uw[i] <<= ft;
567 return vs.d;
570 uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
572 LMIValue vs;
573 unsigned i;
575 ft &= 0x7f;
576 if (ft > 31) {
577 return 0;
579 vs.d = fs;
580 for (i = 0; i < 2; ++i) {
581 vs.uw[i] >>= ft;
583 return vs.d;
586 uint64_t helper_psraw(uint64_t fs, uint64_t ft)
588 LMIValue vs;
589 unsigned i;
591 ft &= 0x7f;
592 if (ft > 31) {
593 ft = 31;
595 vs.d = fs;
596 for (i = 0; i < 2; ++i) {
597 vs.sw[i] >>= ft;
599 return vs.d;
602 uint64_t helper_psllh(uint64_t fs, uint64_t ft)
604 LMIValue vs;
605 unsigned i;
607 ft &= 0x7f;
608 if (ft > 15) {
609 return 0;
611 vs.d = fs;
612 for (i = 0; i < 4; ++i) {
613 vs.uh[i] <<= ft;
615 return vs.d;
618 uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
620 LMIValue vs;
621 unsigned i;
623 ft &= 0x7f;
624 if (ft > 15) {
625 return 0;
627 vs.d = fs;
628 for (i = 0; i < 4; ++i) {
629 vs.uh[i] >>= ft;
631 return vs.d;
634 uint64_t helper_psrah(uint64_t fs, uint64_t ft)
636 LMIValue vs;
637 unsigned i;
639 ft &= 0x7f;
640 if (ft > 15) {
641 ft = 15;
643 vs.d = fs;
644 for (i = 0; i < 4; ++i) {
645 vs.sh[i] >>= ft;
647 return vs.d;
650 uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
652 LMIValue vs, vt;
653 unsigned i;
655 vs.d = fs;
656 vt.d = ft;
657 for (i = 0; i < 4; ++i) {
658 vs.sh[i] *= vt.sh[i];
660 return vs.d;
663 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
665 LMIValue vs, vt;
666 unsigned i;
668 vs.d = fs;
669 vt.d = ft;
670 for (i = 0; i < 4; ++i) {
671 int32_t r = vs.sh[i] * vt.sh[i];
672 vs.sh[i] = r >> 16;
674 return vs.d;
677 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
679 LMIValue vs, vt;
680 unsigned i;
682 vs.d = fs;
683 vt.d = ft;
684 for (i = 0; i < 4; ++i) {
685 uint32_t r = vs.uh[i] * vt.uh[i];
686 vs.uh[i] = r >> 16;
688 return vs.d;
691 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
693 unsigned host = BYTE_ORDER_XOR(3);
694 LMIValue vs, vt;
695 uint32_t p0, p1;
697 vs.d = fs;
698 vt.d = ft;
699 p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host];
700 p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
701 p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host];
702 p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
704 return ((uint64_t)p1 << 32) | p0;
707 uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
709 LMIValue vs, vt;
710 unsigned i;
712 vs.d = fs;
713 vt.d = ft;
714 for (i = 0; i < 8; ++i) {
715 int r = vs.ub[i] - vt.ub[i];
716 vs.ub[i] = (r < 0 ? -r : r);
718 return vs.d;
721 uint64_t helper_biadd(uint64_t fs)
723 unsigned i, fd;
725 for (i = fd = 0; i < 8; ++i) {
726 fd += (fs >> (i * 8)) & 0xff;
728 return fd & 0xffff;
731 uint64_t helper_pmovmskb(uint64_t fs)
733 unsigned fd = 0;
735 fd |= ((fs >> 7) & 1) << 0;
736 fd |= ((fs >> 15) & 1) << 1;
737 fd |= ((fs >> 23) & 1) << 2;
738 fd |= ((fs >> 31) & 1) << 3;
739 fd |= ((fs >> 39) & 1) << 4;
740 fd |= ((fs >> 47) & 1) << 5;
741 fd |= ((fs >> 55) & 1) << 6;
742 fd |= ((fs >> 63) & 1) << 7;
744 return fd & 0xff;