Merge remote-tracking branch 'remotes/mjt/tags/pull-trivial-patches-2015-04-04' into...
[qemu.git] / target-ppc / int_helper.c
blob4c2b71c7080a5e46142dcf724e2c4f72e3cc5d88
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "exec/helper-proto.h"
22 #include "qemu/aes.h"
24 #include "helper_regs.h"
25 /*****************************************************************************/
26 /* Fixed point operations helpers */
28 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
29 uint32_t oe)
31 uint64_t rt = 0;
32 int overflow = 0;
34 uint64_t dividend = (uint64_t)ra << 32;
35 uint64_t divisor = (uint32_t)rb;
37 if (unlikely(divisor == 0)) {
38 overflow = 1;
39 } else {
40 rt = dividend / divisor;
41 overflow = rt > UINT32_MAX;
44 if (unlikely(overflow)) {
45 rt = 0; /* Undefined */
48 if (oe) {
49 if (unlikely(overflow)) {
50 env->so = env->ov = 1;
51 } else {
52 env->ov = 0;
56 return (target_ulong)rt;
59 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
60 uint32_t oe)
62 int64_t rt = 0;
63 int overflow = 0;
65 int64_t dividend = (int64_t)ra << 32;
66 int64_t divisor = (int64_t)((int32_t)rb);
68 if (unlikely((divisor == 0) ||
69 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
70 overflow = 1;
71 } else {
72 rt = dividend / divisor;
73 overflow = rt != (int32_t)rt;
76 if (unlikely(overflow)) {
77 rt = 0; /* Undefined */
80 if (oe) {
81 if (unlikely(overflow)) {
82 env->so = env->ov = 1;
83 } else {
84 env->ov = 0;
88 return (target_ulong)rt;
91 #if defined(TARGET_PPC64)
93 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
95 uint64_t rt = 0;
96 int overflow = 0;
98 overflow = divu128(&rt, &ra, rb);
100 if (unlikely(overflow)) {
101 rt = 0; /* Undefined */
104 if (oe) {
105 if (unlikely(overflow)) {
106 env->so = env->ov = 1;
107 } else {
108 env->ov = 0;
112 return rt;
115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
117 int64_t rt = 0;
118 int64_t ra = (int64_t)rau;
119 int64_t rb = (int64_t)rbu;
120 int overflow = divs128(&rt, &ra, rb);
122 if (unlikely(overflow)) {
123 rt = 0; /* Undefined */
126 if (oe) {
128 if (unlikely(overflow)) {
129 env->so = env->ov = 1;
130 } else {
131 env->ov = 0;
135 return rt;
138 #endif
141 target_ulong helper_cntlzw(target_ulong t)
143 return clz32(t);
146 #if defined(TARGET_PPC64)
147 target_ulong helper_cntlzd(target_ulong t)
149 return clz64(t);
151 #endif
153 #if defined(TARGET_PPC64)
155 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
157 int i;
158 uint64_t ra = 0;
160 for (i = 0; i < 8; i++) {
161 int index = (rs >> (i*8)) & 0xFF;
162 if (index < 64) {
163 if (rb & (1ull << (63-index))) {
164 ra |= 1 << i;
168 return ra;
171 #endif
173 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
175 target_ulong mask = 0xff;
176 target_ulong ra = 0;
177 int i;
179 for (i = 0; i < sizeof(target_ulong); i++) {
180 if ((rs & mask) == (rb & mask)) {
181 ra |= mask;
183 mask <<= 8;
185 return ra;
188 /* shift right arithmetic helper */
189 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
190 target_ulong shift)
192 int32_t ret;
194 if (likely(!(shift & 0x20))) {
195 if (likely((uint32_t)shift != 0)) {
196 shift &= 0x1f;
197 ret = (int32_t)value >> shift;
198 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
199 env->ca = 0;
200 } else {
201 env->ca = 1;
203 } else {
204 ret = (int32_t)value;
205 env->ca = 0;
207 } else {
208 ret = (int32_t)value >> 31;
209 env->ca = (ret != 0);
211 return (target_long)ret;
214 #if defined(TARGET_PPC64)
215 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
216 target_ulong shift)
218 int64_t ret;
220 if (likely(!(shift & 0x40))) {
221 if (likely((uint64_t)shift != 0)) {
222 shift &= 0x3f;
223 ret = (int64_t)value >> shift;
224 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
225 env->ca = 0;
226 } else {
227 env->ca = 1;
229 } else {
230 ret = (int64_t)value;
231 env->ca = 0;
233 } else {
234 ret = (int64_t)value >> 63;
235 env->ca = (ret != 0);
237 return ret;
239 #endif
241 #if defined(TARGET_PPC64)
242 target_ulong helper_popcntb(target_ulong val)
244 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
245 0x5555555555555555ULL);
246 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
247 0x3333333333333333ULL);
248 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
249 0x0f0f0f0f0f0f0f0fULL);
250 return val;
253 target_ulong helper_popcntw(target_ulong val)
255 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
256 0x5555555555555555ULL);
257 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
258 0x3333333333333333ULL);
259 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
260 0x0f0f0f0f0f0f0f0fULL);
261 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
262 0x00ff00ff00ff00ffULL);
263 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
264 0x0000ffff0000ffffULL);
265 return val;
268 target_ulong helper_popcntd(target_ulong val)
270 return ctpop64(val);
272 #else
273 target_ulong helper_popcntb(target_ulong val)
275 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
276 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
277 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
278 return val;
281 target_ulong helper_popcntw(target_ulong val)
283 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
284 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
285 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
286 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
287 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
288 return val;
290 #endif
292 /*****************************************************************************/
293 /* PowerPC 601 specific instructions (POWER bridge) */
294 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
296 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
298 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
299 (int32_t)arg2 == 0) {
300 env->spr[SPR_MQ] = 0;
301 return INT32_MIN;
302 } else {
303 env->spr[SPR_MQ] = tmp % arg2;
304 return tmp / (int32_t)arg2;
308 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
309 target_ulong arg2)
311 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
313 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
314 (int32_t)arg2 == 0) {
315 env->so = env->ov = 1;
316 env->spr[SPR_MQ] = 0;
317 return INT32_MIN;
318 } else {
319 env->spr[SPR_MQ] = tmp % arg2;
320 tmp /= (int32_t)arg2;
321 if ((int32_t)tmp != tmp) {
322 env->so = env->ov = 1;
323 } else {
324 env->ov = 0;
326 return tmp;
330 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
331 target_ulong arg2)
333 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
334 (int32_t)arg2 == 0) {
335 env->spr[SPR_MQ] = 0;
336 return INT32_MIN;
337 } else {
338 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
339 return (int32_t)arg1 / (int32_t)arg2;
343 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
344 target_ulong arg2)
346 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
347 (int32_t)arg2 == 0) {
348 env->so = env->ov = 1;
349 env->spr[SPR_MQ] = 0;
350 return INT32_MIN;
351 } else {
352 env->ov = 0;
353 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
354 return (int32_t)arg1 / (int32_t)arg2;
358 /*****************************************************************************/
359 /* 602 specific instructions */
360 /* mfrom is the most crazy instruction ever seen, imho ! */
361 /* Real implementation uses a ROM table. Do the same */
362 /* Extremely decomposed:
363 * -arg / 256
364 * return 256 * log10(10 + 1.0) + 0.5
366 #if !defined(CONFIG_USER_ONLY)
367 target_ulong helper_602_mfrom(target_ulong arg)
369 if (likely(arg < 602)) {
370 #include "mfrom_table.c"
371 return mfrom_ROM_table[arg];
372 } else {
373 return 0;
376 #endif
378 /*****************************************************************************/
379 /* Altivec extension helpers */
380 #if defined(HOST_WORDS_BIGENDIAN)
381 #define HI_IDX 0
382 #define LO_IDX 1
383 #define AVRB(i) u8[i]
384 #define AVRW(i) u32[i]
385 #else
386 #define HI_IDX 1
387 #define LO_IDX 0
388 #define AVRB(i) u8[15-(i)]
389 #define AVRW(i) u32[3-(i)]
390 #endif
392 #if defined(HOST_WORDS_BIGENDIAN)
393 #define VECTOR_FOR_INORDER_I(index, element) \
394 for (index = 0; index < ARRAY_SIZE(r->element); index++)
395 #else
396 #define VECTOR_FOR_INORDER_I(index, element) \
397 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
398 #endif
400 /* Saturating arithmetic helpers. */
401 #define SATCVT(from, to, from_type, to_type, min, max) \
402 static inline to_type cvt##from##to(from_type x, int *sat) \
404 to_type r; \
406 if (x < (from_type)min) { \
407 r = min; \
408 *sat = 1; \
409 } else if (x > (from_type)max) { \
410 r = max; \
411 *sat = 1; \
412 } else { \
413 r = x; \
415 return r; \
417 #define SATCVTU(from, to, from_type, to_type, min, max) \
418 static inline to_type cvt##from##to(from_type x, int *sat) \
420 to_type r; \
422 if (x > (from_type)max) { \
423 r = max; \
424 *sat = 1; \
425 } else { \
426 r = x; \
428 return r; \
430 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
431 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
432 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
434 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
435 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
436 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
437 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
438 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
439 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
440 #undef SATCVT
441 #undef SATCVTU
443 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
445 int i, j = (sh & 0xf);
447 VECTOR_FOR_INORDER_I(i, u8) {
448 r->u8[i] = j++;
452 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
454 int i, j = 0x10 - (sh & 0xf);
456 VECTOR_FOR_INORDER_I(i, u8) {
457 r->u8[i] = j++;
461 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
463 #if defined(HOST_WORDS_BIGENDIAN)
464 env->vscr = r->u32[3];
465 #else
466 env->vscr = r->u32[0];
467 #endif
468 set_flush_to_zero(vscr_nj, &env->vec_status);
471 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
473 int i;
475 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
476 r->u32[i] = ~a->u32[i] < b->u32[i];
480 #define VARITH_DO(name, op, element) \
481 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
483 int i; \
485 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
486 r->element[i] = a->element[i] op b->element[i]; \
489 #define VARITH(suffix, element) \
490 VARITH_DO(add##suffix, +, element) \
491 VARITH_DO(sub##suffix, -, element)
492 VARITH(ubm, u8)
493 VARITH(uhm, u16)
494 VARITH(uwm, u32)
495 VARITH(udm, u64)
496 VARITH_DO(muluwm, *, u32)
497 #undef VARITH_DO
498 #undef VARITH
500 #define VARITHFP(suffix, func) \
501 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
502 ppc_avr_t *b) \
504 int i; \
506 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
507 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
510 VARITHFP(addfp, float32_add)
511 VARITHFP(subfp, float32_sub)
512 VARITHFP(minfp, float32_min)
513 VARITHFP(maxfp, float32_max)
514 #undef VARITHFP
516 #define VARITHFPFMA(suffix, type) \
517 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
518 ppc_avr_t *b, ppc_avr_t *c) \
520 int i; \
521 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
522 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
523 type, &env->vec_status); \
526 VARITHFPFMA(maddfp, 0);
527 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
528 #undef VARITHFPFMA
530 #define VARITHSAT_CASE(type, op, cvt, element) \
532 type result = (type)a->element[i] op (type)b->element[i]; \
533 r->element[i] = cvt(result, &sat); \
536 #define VARITHSAT_DO(name, op, optype, cvt, element) \
537 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
538 ppc_avr_t *b) \
540 int sat = 0; \
541 int i; \
543 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
544 switch (sizeof(r->element[0])) { \
545 case 1: \
546 VARITHSAT_CASE(optype, op, cvt, element); \
547 break; \
548 case 2: \
549 VARITHSAT_CASE(optype, op, cvt, element); \
550 break; \
551 case 4: \
552 VARITHSAT_CASE(optype, op, cvt, element); \
553 break; \
556 if (sat) { \
557 env->vscr |= (1 << VSCR_SAT); \
560 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
561 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
562 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
563 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
564 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
565 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
566 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
567 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
568 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
569 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
570 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
571 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
572 #undef VARITHSAT_CASE
573 #undef VARITHSAT_DO
574 #undef VARITHSAT_SIGNED
575 #undef VARITHSAT_UNSIGNED
577 #define VAVG_DO(name, element, etype) \
578 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
580 int i; \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
584 r->element[i] = x >> 1; \
588 #define VAVG(type, signed_element, signed_type, unsigned_element, \
589 unsigned_type) \
590 VAVG_DO(avgs##type, signed_element, signed_type) \
591 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
592 VAVG(b, s8, int16_t, u8, uint16_t)
593 VAVG(h, s16, int32_t, u16, uint32_t)
594 VAVG(w, s32, int64_t, u32, uint64_t)
595 #undef VAVG_DO
596 #undef VAVG
598 #define VCF(suffix, cvt, element) \
599 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
600 ppc_avr_t *b, uint32_t uim) \
602 int i; \
604 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
605 float32 t = cvt(b->element[i], &env->vec_status); \
606 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
609 VCF(ux, uint32_to_float32, u32)
610 VCF(sx, int32_to_float32, s32)
611 #undef VCF
613 #define VCMP_DO(suffix, compare, element, record) \
614 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
615 ppc_avr_t *a, ppc_avr_t *b) \
617 uint64_t ones = (uint64_t)-1; \
618 uint64_t all = ones; \
619 uint64_t none = 0; \
620 int i; \
622 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
623 uint64_t result = (a->element[i] compare b->element[i] ? \
624 ones : 0x0); \
625 switch (sizeof(a->element[0])) { \
626 case 8: \
627 r->u64[i] = result; \
628 break; \
629 case 4: \
630 r->u32[i] = result; \
631 break; \
632 case 2: \
633 r->u16[i] = result; \
634 break; \
635 case 1: \
636 r->u8[i] = result; \
637 break; \
639 all &= result; \
640 none |= result; \
642 if (record) { \
643 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
646 #define VCMP(suffix, compare, element) \
647 VCMP_DO(suffix, compare, element, 0) \
648 VCMP_DO(suffix##_dot, compare, element, 1)
649 VCMP(equb, ==, u8)
650 VCMP(equh, ==, u16)
651 VCMP(equw, ==, u32)
652 VCMP(equd, ==, u64)
653 VCMP(gtub, >, u8)
654 VCMP(gtuh, >, u16)
655 VCMP(gtuw, >, u32)
656 VCMP(gtud, >, u64)
657 VCMP(gtsb, >, s8)
658 VCMP(gtsh, >, s16)
659 VCMP(gtsw, >, s32)
660 VCMP(gtsd, >, s64)
661 #undef VCMP_DO
662 #undef VCMP
664 #define VCMPFP_DO(suffix, compare, order, record) \
665 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
666 ppc_avr_t *a, ppc_avr_t *b) \
668 uint32_t ones = (uint32_t)-1; \
669 uint32_t all = ones; \
670 uint32_t none = 0; \
671 int i; \
673 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
674 uint32_t result; \
675 int rel = float32_compare_quiet(a->f[i], b->f[i], \
676 &env->vec_status); \
677 if (rel == float_relation_unordered) { \
678 result = 0; \
679 } else if (rel compare order) { \
680 result = ones; \
681 } else { \
682 result = 0; \
684 r->u32[i] = result; \
685 all &= result; \
686 none |= result; \
688 if (record) { \
689 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
692 #define VCMPFP(suffix, compare, order) \
693 VCMPFP_DO(suffix, compare, order, 0) \
694 VCMPFP_DO(suffix##_dot, compare, order, 1)
695 VCMPFP(eqfp, ==, float_relation_equal)
696 VCMPFP(gefp, !=, float_relation_less)
697 VCMPFP(gtfp, ==, float_relation_greater)
698 #undef VCMPFP_DO
699 #undef VCMPFP
701 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
702 ppc_avr_t *a, ppc_avr_t *b, int record)
704 int i;
705 int all_in = 0;
707 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
708 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
709 if (le_rel == float_relation_unordered) {
710 r->u32[i] = 0xc0000000;
711 all_in = 1;
712 } else {
713 float32 bneg = float32_chs(b->f[i]);
714 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
715 int le = le_rel != float_relation_greater;
716 int ge = ge_rel != float_relation_less;
718 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
719 all_in |= (!le | !ge);
722 if (record) {
723 env->crf[6] = (all_in == 0) << 1;
727 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
729 vcmpbfp_internal(env, r, a, b, 0);
732 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
733 ppc_avr_t *b)
735 vcmpbfp_internal(env, r, a, b, 1);
738 #define VCT(suffix, satcvt, element) \
739 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
740 ppc_avr_t *b, uint32_t uim) \
742 int i; \
743 int sat = 0; \
744 float_status s = env->vec_status; \
746 set_float_rounding_mode(float_round_to_zero, &s); \
747 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
748 if (float32_is_any_nan(b->f[i])) { \
749 r->element[i] = 0; \
750 } else { \
751 float64 t = float32_to_float64(b->f[i], &s); \
752 int64_t j; \
754 t = float64_scalbn(t, uim, &s); \
755 j = float64_to_int64(t, &s); \
756 r->element[i] = satcvt(j, &sat); \
759 if (sat) { \
760 env->vscr |= (1 << VSCR_SAT); \
763 VCT(uxs, cvtsduw, u32)
764 VCT(sxs, cvtsdsw, s32)
765 #undef VCT
767 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
768 ppc_avr_t *b, ppc_avr_t *c)
770 int sat = 0;
771 int i;
773 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
774 int32_t prod = a->s16[i] * b->s16[i];
775 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
777 r->s16[i] = cvtswsh(t, &sat);
780 if (sat) {
781 env->vscr |= (1 << VSCR_SAT);
785 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
786 ppc_avr_t *b, ppc_avr_t *c)
788 int sat = 0;
789 int i;
791 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
792 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
793 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
794 r->s16[i] = cvtswsh(t, &sat);
797 if (sat) {
798 env->vscr |= (1 << VSCR_SAT);
802 #define VMINMAX_DO(name, compare, element) \
803 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
805 int i; \
807 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
808 if (a->element[i] compare b->element[i]) { \
809 r->element[i] = b->element[i]; \
810 } else { \
811 r->element[i] = a->element[i]; \
815 #define VMINMAX(suffix, element) \
816 VMINMAX_DO(min##suffix, >, element) \
817 VMINMAX_DO(max##suffix, <, element)
818 VMINMAX(sb, s8)
819 VMINMAX(sh, s16)
820 VMINMAX(sw, s32)
821 VMINMAX(sd, s64)
822 VMINMAX(ub, u8)
823 VMINMAX(uh, u16)
824 VMINMAX(uw, u32)
825 VMINMAX(ud, u64)
826 #undef VMINMAX_DO
827 #undef VMINMAX
829 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
831 int i;
833 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
834 int32_t prod = a->s16[i] * b->s16[i];
835 r->s16[i] = (int16_t) (prod + c->s16[i]);
839 #define VMRG_DO(name, element, highp) \
840 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
842 ppc_avr_t result; \
843 int i; \
844 size_t n_elems = ARRAY_SIZE(r->element); \
846 for (i = 0; i < n_elems / 2; i++) { \
847 if (highp) { \
848 result.element[i*2+HI_IDX] = a->element[i]; \
849 result.element[i*2+LO_IDX] = b->element[i]; \
850 } else { \
851 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
852 b->element[n_elems - i - 1]; \
853 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
854 a->element[n_elems - i - 1]; \
857 *r = result; \
859 #if defined(HOST_WORDS_BIGENDIAN)
860 #define MRGHI 0
861 #define MRGLO 1
862 #else
863 #define MRGHI 1
864 #define MRGLO 0
865 #endif
866 #define VMRG(suffix, element) \
867 VMRG_DO(mrgl##suffix, element, MRGHI) \
868 VMRG_DO(mrgh##suffix, element, MRGLO)
869 VMRG(b, u8)
870 VMRG(h, u16)
871 VMRG(w, u32)
872 #undef VMRG_DO
873 #undef VMRG
874 #undef MRGHI
875 #undef MRGLO
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
880 int32_t prod[16];
881 int i;
883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884 prod[i] = (int32_t)a->s8[i] * b->u8[i];
887 VECTOR_FOR_INORDER_I(i, s32) {
888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889 prod[4 * i + 2] + prod[4 * i + 3];
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
896 int32_t prod[8];
897 int i;
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 prod[i] = a->s16[i] * b->s16[i];
903 VECTOR_FOR_INORDER_I(i, s32) {
904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909 ppc_avr_t *b, ppc_avr_t *c)
911 int32_t prod[8];
912 int i;
913 int sat = 0;
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 prod[i] = (int32_t)a->s16[i] * b->s16[i];
919 VECTOR_FOR_INORDER_I(i, s32) {
920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922 r->u32[i] = cvtsdsw(t, &sat);
925 if (sat) {
926 env->vscr |= (1 << VSCR_SAT);
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931 ppc_avr_t *b, ppc_avr_t *c)
933 uint16_t prod[16];
934 int i;
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 prod[i] = a->u8[i] * b->u8[i];
940 VECTOR_FOR_INORDER_I(i, u32) {
941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942 prod[4 * i + 2] + prod[4 * i + 3];
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947 ppc_avr_t *b, ppc_avr_t *c)
949 uint32_t prod[8];
950 int i;
952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953 prod[i] = a->u16[i] * b->u16[i];
956 VECTOR_FOR_INORDER_I(i, u32) {
957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
964 uint32_t prod[8];
965 int i;
966 int sat = 0;
968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969 prod[i] = a->u16[i] * b->u16[i];
972 VECTOR_FOR_INORDER_I(i, s32) {
973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975 r->u32[i] = cvtuduw(t, &sat);
978 if (sat) {
979 env->vscr |= (1 << VSCR_SAT);
983 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
984 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 int i; \
988 VECTOR_FOR_INORDER_I(i, prod_element) { \
989 if (evenp) { \
990 r->prod_element[i] = \
991 (cast)a->mul_element[i * 2 + HI_IDX] * \
992 (cast)b->mul_element[i * 2 + HI_IDX]; \
993 } else { \
994 r->prod_element[i] = \
995 (cast)a->mul_element[i * 2 + LO_IDX] * \
996 (cast)b->mul_element[i * 2 + LO_IDX]; \
1000 #define VMUL(suffix, mul_element, prod_element, cast) \
1001 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1002 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1003 VMUL(sb, s8, s16, int16_t)
1004 VMUL(sh, s16, s32, int32_t)
1005 VMUL(sw, s32, s64, int64_t)
1006 VMUL(ub, u8, u16, uint16_t)
1007 VMUL(uh, u16, u32, uint32_t)
1008 VMUL(uw, u32, u64, uint64_t)
1009 #undef VMUL_DO
1010 #undef VMUL
1012 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1013 ppc_avr_t *c)
1015 ppc_avr_t result;
1016 int i;
1018 VECTOR_FOR_INORDER_I(i, u8) {
1019 int s = c->u8[i] & 0x1f;
1020 #if defined(HOST_WORDS_BIGENDIAN)
1021 int index = s & 0xf;
1022 #else
1023 int index = 15 - (s & 0xf);
1024 #endif
1026 if (s & 0x10) {
1027 result.u8[i] = b->u8[index];
1028 } else {
1029 result.u8[i] = a->u8[index];
1032 *r = result;
1035 #if defined(HOST_WORDS_BIGENDIAN)
1036 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1037 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1038 #else
1039 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1040 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1041 #endif
1043 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1045 int i;
1046 uint64_t perm = 0;
1048 VECTOR_FOR_INORDER_I(i, u8) {
1049 int index = VBPERMQ_INDEX(b, i);
1051 if (index < 128) {
1052 uint64_t mask = (1ull << (63-(index & 0x3F)));
1053 if (a->u64[VBPERMQ_DW(index)] & mask) {
1054 perm |= (0x8000 >> i);
1059 r->u64[HI_IDX] = perm;
1060 r->u64[LO_IDX] = 0;
1063 #undef VBPERMQ_INDEX
1064 #undef VBPERMQ_DW
1066 static const uint64_t VGBBD_MASKS[256] = {
1067 0x0000000000000000ull, /* 00 */
1068 0x0000000000000080ull, /* 01 */
1069 0x0000000000008000ull, /* 02 */
1070 0x0000000000008080ull, /* 03 */
1071 0x0000000000800000ull, /* 04 */
1072 0x0000000000800080ull, /* 05 */
1073 0x0000000000808000ull, /* 06 */
1074 0x0000000000808080ull, /* 07 */
1075 0x0000000080000000ull, /* 08 */
1076 0x0000000080000080ull, /* 09 */
1077 0x0000000080008000ull, /* 0A */
1078 0x0000000080008080ull, /* 0B */
1079 0x0000000080800000ull, /* 0C */
1080 0x0000000080800080ull, /* 0D */
1081 0x0000000080808000ull, /* 0E */
1082 0x0000000080808080ull, /* 0F */
1083 0x0000008000000000ull, /* 10 */
1084 0x0000008000000080ull, /* 11 */
1085 0x0000008000008000ull, /* 12 */
1086 0x0000008000008080ull, /* 13 */
1087 0x0000008000800000ull, /* 14 */
1088 0x0000008000800080ull, /* 15 */
1089 0x0000008000808000ull, /* 16 */
1090 0x0000008000808080ull, /* 17 */
1091 0x0000008080000000ull, /* 18 */
1092 0x0000008080000080ull, /* 19 */
1093 0x0000008080008000ull, /* 1A */
1094 0x0000008080008080ull, /* 1B */
1095 0x0000008080800000ull, /* 1C */
1096 0x0000008080800080ull, /* 1D */
1097 0x0000008080808000ull, /* 1E */
1098 0x0000008080808080ull, /* 1F */
1099 0x0000800000000000ull, /* 20 */
1100 0x0000800000000080ull, /* 21 */
1101 0x0000800000008000ull, /* 22 */
1102 0x0000800000008080ull, /* 23 */
1103 0x0000800000800000ull, /* 24 */
1104 0x0000800000800080ull, /* 25 */
1105 0x0000800000808000ull, /* 26 */
1106 0x0000800000808080ull, /* 27 */
1107 0x0000800080000000ull, /* 28 */
1108 0x0000800080000080ull, /* 29 */
1109 0x0000800080008000ull, /* 2A */
1110 0x0000800080008080ull, /* 2B */
1111 0x0000800080800000ull, /* 2C */
1112 0x0000800080800080ull, /* 2D */
1113 0x0000800080808000ull, /* 2E */
1114 0x0000800080808080ull, /* 2F */
1115 0x0000808000000000ull, /* 30 */
1116 0x0000808000000080ull, /* 31 */
1117 0x0000808000008000ull, /* 32 */
1118 0x0000808000008080ull, /* 33 */
1119 0x0000808000800000ull, /* 34 */
1120 0x0000808000800080ull, /* 35 */
1121 0x0000808000808000ull, /* 36 */
1122 0x0000808000808080ull, /* 37 */
1123 0x0000808080000000ull, /* 38 */
1124 0x0000808080000080ull, /* 39 */
1125 0x0000808080008000ull, /* 3A */
1126 0x0000808080008080ull, /* 3B */
1127 0x0000808080800000ull, /* 3C */
1128 0x0000808080800080ull, /* 3D */
1129 0x0000808080808000ull, /* 3E */
1130 0x0000808080808080ull, /* 3F */
1131 0x0080000000000000ull, /* 40 */
1132 0x0080000000000080ull, /* 41 */
1133 0x0080000000008000ull, /* 42 */
1134 0x0080000000008080ull, /* 43 */
1135 0x0080000000800000ull, /* 44 */
1136 0x0080000000800080ull, /* 45 */
1137 0x0080000000808000ull, /* 46 */
1138 0x0080000000808080ull, /* 47 */
1139 0x0080000080000000ull, /* 48 */
1140 0x0080000080000080ull, /* 49 */
1141 0x0080000080008000ull, /* 4A */
1142 0x0080000080008080ull, /* 4B */
1143 0x0080000080800000ull, /* 4C */
1144 0x0080000080800080ull, /* 4D */
1145 0x0080000080808000ull, /* 4E */
1146 0x0080000080808080ull, /* 4F */
1147 0x0080008000000000ull, /* 50 */
1148 0x0080008000000080ull, /* 51 */
1149 0x0080008000008000ull, /* 52 */
1150 0x0080008000008080ull, /* 53 */
1151 0x0080008000800000ull, /* 54 */
1152 0x0080008000800080ull, /* 55 */
1153 0x0080008000808000ull, /* 56 */
1154 0x0080008000808080ull, /* 57 */
1155 0x0080008080000000ull, /* 58 */
1156 0x0080008080000080ull, /* 59 */
1157 0x0080008080008000ull, /* 5A */
1158 0x0080008080008080ull, /* 5B */
1159 0x0080008080800000ull, /* 5C */
1160 0x0080008080800080ull, /* 5D */
1161 0x0080008080808000ull, /* 5E */
1162 0x0080008080808080ull, /* 5F */
1163 0x0080800000000000ull, /* 60 */
1164 0x0080800000000080ull, /* 61 */
1165 0x0080800000008000ull, /* 62 */
1166 0x0080800000008080ull, /* 63 */
1167 0x0080800000800000ull, /* 64 */
1168 0x0080800000800080ull, /* 65 */
1169 0x0080800000808000ull, /* 66 */
1170 0x0080800000808080ull, /* 67 */
1171 0x0080800080000000ull, /* 68 */
1172 0x0080800080000080ull, /* 69 */
1173 0x0080800080008000ull, /* 6A */
1174 0x0080800080008080ull, /* 6B */
1175 0x0080800080800000ull, /* 6C */
1176 0x0080800080800080ull, /* 6D */
1177 0x0080800080808000ull, /* 6E */
1178 0x0080800080808080ull, /* 6F */
1179 0x0080808000000000ull, /* 70 */
1180 0x0080808000000080ull, /* 71 */
1181 0x0080808000008000ull, /* 72 */
1182 0x0080808000008080ull, /* 73 */
1183 0x0080808000800000ull, /* 74 */
1184 0x0080808000800080ull, /* 75 */
1185 0x0080808000808000ull, /* 76 */
1186 0x0080808000808080ull, /* 77 */
1187 0x0080808080000000ull, /* 78 */
1188 0x0080808080000080ull, /* 79 */
1189 0x0080808080008000ull, /* 7A */
1190 0x0080808080008080ull, /* 7B */
1191 0x0080808080800000ull, /* 7C */
1192 0x0080808080800080ull, /* 7D */
1193 0x0080808080808000ull, /* 7E */
1194 0x0080808080808080ull, /* 7F */
1195 0x8000000000000000ull, /* 80 */
1196 0x8000000000000080ull, /* 81 */
1197 0x8000000000008000ull, /* 82 */
1198 0x8000000000008080ull, /* 83 */
1199 0x8000000000800000ull, /* 84 */
1200 0x8000000000800080ull, /* 85 */
1201 0x8000000000808000ull, /* 86 */
1202 0x8000000000808080ull, /* 87 */
1203 0x8000000080000000ull, /* 88 */
1204 0x8000000080000080ull, /* 89 */
1205 0x8000000080008000ull, /* 8A */
1206 0x8000000080008080ull, /* 8B */
1207 0x8000000080800000ull, /* 8C */
1208 0x8000000080800080ull, /* 8D */
1209 0x8000000080808000ull, /* 8E */
1210 0x8000000080808080ull, /* 8F */
1211 0x8000008000000000ull, /* 90 */
1212 0x8000008000000080ull, /* 91 */
1213 0x8000008000008000ull, /* 92 */
1214 0x8000008000008080ull, /* 93 */
1215 0x8000008000800000ull, /* 94 */
1216 0x8000008000800080ull, /* 95 */
1217 0x8000008000808000ull, /* 96 */
1218 0x8000008000808080ull, /* 97 */
1219 0x8000008080000000ull, /* 98 */
1220 0x8000008080000080ull, /* 99 */
1221 0x8000008080008000ull, /* 9A */
1222 0x8000008080008080ull, /* 9B */
1223 0x8000008080800000ull, /* 9C */
1224 0x8000008080800080ull, /* 9D */
1225 0x8000008080808000ull, /* 9E */
1226 0x8000008080808080ull, /* 9F */
1227 0x8000800000000000ull, /* A0 */
1228 0x8000800000000080ull, /* A1 */
1229 0x8000800000008000ull, /* A2 */
1230 0x8000800000008080ull, /* A3 */
1231 0x8000800000800000ull, /* A4 */
1232 0x8000800000800080ull, /* A5 */
1233 0x8000800000808000ull, /* A6 */
1234 0x8000800000808080ull, /* A7 */
1235 0x8000800080000000ull, /* A8 */
1236 0x8000800080000080ull, /* A9 */
1237 0x8000800080008000ull, /* AA */
1238 0x8000800080008080ull, /* AB */
1239 0x8000800080800000ull, /* AC */
1240 0x8000800080800080ull, /* AD */
1241 0x8000800080808000ull, /* AE */
1242 0x8000800080808080ull, /* AF */
1243 0x8000808000000000ull, /* B0 */
1244 0x8000808000000080ull, /* B1 */
1245 0x8000808000008000ull, /* B2 */
1246 0x8000808000008080ull, /* B3 */
1247 0x8000808000800000ull, /* B4 */
1248 0x8000808000800080ull, /* B5 */
1249 0x8000808000808000ull, /* B6 */
1250 0x8000808000808080ull, /* B7 */
1251 0x8000808080000000ull, /* B8 */
1252 0x8000808080000080ull, /* B9 */
1253 0x8000808080008000ull, /* BA */
1254 0x8000808080008080ull, /* BB */
1255 0x8000808080800000ull, /* BC */
1256 0x8000808080800080ull, /* BD */
1257 0x8000808080808000ull, /* BE */
1258 0x8000808080808080ull, /* BF */
1259 0x8080000000000000ull, /* C0 */
1260 0x8080000000000080ull, /* C1 */
1261 0x8080000000008000ull, /* C2 */
1262 0x8080000000008080ull, /* C3 */
1263 0x8080000000800000ull, /* C4 */
1264 0x8080000000800080ull, /* C5 */
1265 0x8080000000808000ull, /* C6 */
1266 0x8080000000808080ull, /* C7 */
1267 0x8080000080000000ull, /* C8 */
1268 0x8080000080000080ull, /* C9 */
1269 0x8080000080008000ull, /* CA */
1270 0x8080000080008080ull, /* CB */
1271 0x8080000080800000ull, /* CC */
1272 0x8080000080800080ull, /* CD */
1273 0x8080000080808000ull, /* CE */
1274 0x8080000080808080ull, /* CF */
1275 0x8080008000000000ull, /* D0 */
1276 0x8080008000000080ull, /* D1 */
1277 0x8080008000008000ull, /* D2 */
1278 0x8080008000008080ull, /* D3 */
1279 0x8080008000800000ull, /* D4 */
1280 0x8080008000800080ull, /* D5 */
1281 0x8080008000808000ull, /* D6 */
1282 0x8080008000808080ull, /* D7 */
1283 0x8080008080000000ull, /* D8 */
1284 0x8080008080000080ull, /* D9 */
1285 0x8080008080008000ull, /* DA */
1286 0x8080008080008080ull, /* DB */
1287 0x8080008080800000ull, /* DC */
1288 0x8080008080800080ull, /* DD */
1289 0x8080008080808000ull, /* DE */
1290 0x8080008080808080ull, /* DF */
1291 0x8080800000000000ull, /* E0 */
1292 0x8080800000000080ull, /* E1 */
1293 0x8080800000008000ull, /* E2 */
1294 0x8080800000008080ull, /* E3 */
1295 0x8080800000800000ull, /* E4 */
1296 0x8080800000800080ull, /* E5 */
1297 0x8080800000808000ull, /* E6 */
1298 0x8080800000808080ull, /* E7 */
1299 0x8080800080000000ull, /* E8 */
1300 0x8080800080000080ull, /* E9 */
1301 0x8080800080008000ull, /* EA */
1302 0x8080800080008080ull, /* EB */
1303 0x8080800080800000ull, /* EC */
1304 0x8080800080800080ull, /* ED */
1305 0x8080800080808000ull, /* EE */
1306 0x8080800080808080ull, /* EF */
1307 0x8080808000000000ull, /* F0 */
1308 0x8080808000000080ull, /* F1 */
1309 0x8080808000008000ull, /* F2 */
1310 0x8080808000008080ull, /* F3 */
1311 0x8080808000800000ull, /* F4 */
1312 0x8080808000800080ull, /* F5 */
1313 0x8080808000808000ull, /* F6 */
1314 0x8080808000808080ull, /* F7 */
1315 0x8080808080000000ull, /* F8 */
1316 0x8080808080000080ull, /* F9 */
1317 0x8080808080008000ull, /* FA */
1318 0x8080808080008080ull, /* FB */
1319 0x8080808080800000ull, /* FC */
1320 0x8080808080800080ull, /* FD */
1321 0x8080808080808000ull, /* FE */
1322 0x8080808080808080ull, /* FF */
1325 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1327 int i;
1328 uint64_t t[2] = { 0, 0 };
1330 VECTOR_FOR_INORDER_I(i, u8) {
1331 #if defined(HOST_WORDS_BIGENDIAN)
1332 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1333 #else
1334 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1335 #endif
1338 r->u64[0] = t[0];
1339 r->u64[1] = t[1];
1342 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1343 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1345 int i, j; \
1346 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1348 VECTOR_FOR_INORDER_I(i, srcfld) { \
1349 prod[i] = 0; \
1350 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1351 if (a->srcfld[i] & (1ull<<j)) { \
1352 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1357 VECTOR_FOR_INORDER_I(i, trgfld) { \
1358 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1362 PMSUM(vpmsumb, u8, u16, uint16_t)
1363 PMSUM(vpmsumh, u16, u32, uint32_t)
1364 PMSUM(vpmsumw, u32, u64, uint64_t)
1366 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1369 #ifdef CONFIG_INT128
1370 int i, j;
1371 __uint128_t prod[2];
1373 VECTOR_FOR_INORDER_I(i, u64) {
1374 prod[i] = 0;
1375 for (j = 0; j < 64; j++) {
1376 if (a->u64[i] & (1ull<<j)) {
1377 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1382 r->u128 = prod[0] ^ prod[1];
1384 #else
1385 int i, j;
1386 ppc_avr_t prod[2];
1388 VECTOR_FOR_INORDER_I(i, u64) {
1389 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1390 for (j = 0; j < 64; j++) {
1391 if (a->u64[i] & (1ull<<j)) {
1392 ppc_avr_t bshift;
1393 if (j == 0) {
1394 bshift.u64[HI_IDX] = 0;
1395 bshift.u64[LO_IDX] = b->u64[i];
1396 } else {
1397 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1398 bshift.u64[LO_IDX] = b->u64[i] << j;
1400 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1401 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1406 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1407 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1408 #endif
1412 #if defined(HOST_WORDS_BIGENDIAN)
1413 #define PKBIG 1
1414 #else
1415 #define PKBIG 0
1416 #endif
1417 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1419 int i, j;
1420 ppc_avr_t result;
1421 #if defined(HOST_WORDS_BIGENDIAN)
1422 const ppc_avr_t *x[2] = { a, b };
1423 #else
1424 const ppc_avr_t *x[2] = { b, a };
1425 #endif
1427 VECTOR_FOR_INORDER_I(i, u64) {
1428 VECTOR_FOR_INORDER_I(j, u32) {
1429 uint32_t e = x[i]->u32[j];
1431 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1432 ((e >> 6) & 0x3e0) |
1433 ((e >> 3) & 0x1f));
1436 *r = result;
1439 #define VPK(suffix, from, to, cvt, dosat) \
1440 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1441 ppc_avr_t *a, ppc_avr_t *b) \
1443 int i; \
1444 int sat = 0; \
1445 ppc_avr_t result; \
1446 ppc_avr_t *a0 = PKBIG ? a : b; \
1447 ppc_avr_t *a1 = PKBIG ? b : a; \
1449 VECTOR_FOR_INORDER_I(i, from) { \
1450 result.to[i] = cvt(a0->from[i], &sat); \
1451 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1453 *r = result; \
1454 if (dosat && sat) { \
1455 env->vscr |= (1 << VSCR_SAT); \
1458 #define I(x, y) (x)
1459 VPK(shss, s16, s8, cvtshsb, 1)
1460 VPK(shus, s16, u8, cvtshub, 1)
1461 VPK(swss, s32, s16, cvtswsh, 1)
1462 VPK(swus, s32, u16, cvtswuh, 1)
1463 VPK(sdss, s64, s32, cvtsdsw, 1)
1464 VPK(sdus, s64, u32, cvtsduw, 1)
1465 VPK(uhus, u16, u8, cvtuhub, 1)
1466 VPK(uwus, u32, u16, cvtuwuh, 1)
1467 VPK(udus, u64, u32, cvtuduw, 1)
1468 VPK(uhum, u16, u8, I, 0)
1469 VPK(uwum, u32, u16, I, 0)
1470 VPK(udum, u64, u32, I, 0)
1471 #undef I
1472 #undef VPK
1473 #undef PKBIG
1475 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1477 int i;
1479 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1480 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1484 #define VRFI(suffix, rounding) \
1485 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1486 ppc_avr_t *b) \
1488 int i; \
1489 float_status s = env->vec_status; \
1491 set_float_rounding_mode(rounding, &s); \
1492 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1493 r->f[i] = float32_round_to_int (b->f[i], &s); \
1496 VRFI(n, float_round_nearest_even)
1497 VRFI(m, float_round_down)
1498 VRFI(p, float_round_up)
1499 VRFI(z, float_round_to_zero)
1500 #undef VRFI
1502 #define VROTATE(suffix, element, mask) \
1503 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1505 int i; \
1507 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1508 unsigned int shift = b->element[i] & mask; \
1509 r->element[i] = (a->element[i] << shift) | \
1510 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1513 VROTATE(b, u8, 0x7)
1514 VROTATE(h, u16, 0xF)
1515 VROTATE(w, u32, 0x1F)
1516 VROTATE(d, u64, 0x3F)
1517 #undef VROTATE
1519 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1521 int i;
1523 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1524 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1526 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1530 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1531 ppc_avr_t *c)
1533 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1534 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1537 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1539 int i;
1541 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1542 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1546 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1548 int i;
1550 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1551 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1555 /* The specification says that the results are undefined if all of the
1556 * shift counts are not identical. We check to make sure that they are
1557 * to conform to what real hardware appears to do. */
1558 #define VSHIFT(suffix, leftp) \
1559 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1561 int shift = b->u8[LO_IDX*15] & 0x7; \
1562 int doit = 1; \
1563 int i; \
1565 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1566 doit = doit && ((b->u8[i] & 0x7) == shift); \
1568 if (doit) { \
1569 if (shift == 0) { \
1570 *r = *a; \
1571 } else if (leftp) { \
1572 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1574 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1575 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1576 } else { \
1577 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1579 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1580 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1584 VSHIFT(l, 1)
1585 VSHIFT(r, 0)
1586 #undef VSHIFT
1588 #define VSL(suffix, element, mask) \
1589 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1591 int i; \
1593 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1594 unsigned int shift = b->element[i] & mask; \
1596 r->element[i] = a->element[i] << shift; \
1599 VSL(b, u8, 0x7)
1600 VSL(h, u16, 0x0F)
1601 VSL(w, u32, 0x1F)
1602 VSL(d, u64, 0x3F)
1603 #undef VSL
1605 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1607 int sh = shift & 0xf;
1608 int i;
1609 ppc_avr_t result;
1611 #if defined(HOST_WORDS_BIGENDIAN)
1612 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1613 int index = sh + i;
1614 if (index > 0xf) {
1615 result.u8[i] = b->u8[index - 0x10];
1616 } else {
1617 result.u8[i] = a->u8[index];
1620 #else
1621 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1622 int index = (16 - sh) + i;
1623 if (index > 0xf) {
1624 result.u8[i] = a->u8[index - 0x10];
1625 } else {
1626 result.u8[i] = b->u8[index];
1629 #endif
1630 *r = result;
1633 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1635 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1637 #if defined(HOST_WORDS_BIGENDIAN)
1638 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1639 memset(&r->u8[16-sh], 0, sh);
1640 #else
1641 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1642 memset(&r->u8[0], 0, sh);
1643 #endif
1646 /* Experimental testing shows that hardware masks the immediate. */
1647 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1648 #if defined(HOST_WORDS_BIGENDIAN)
1649 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1650 #else
1651 #define SPLAT_ELEMENT(element) \
1652 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1653 #endif
1654 #define VSPLT(suffix, element) \
1655 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1657 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1658 int i; \
1660 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1661 r->element[i] = s; \
1664 VSPLT(b, u8)
1665 VSPLT(h, u16)
1666 VSPLT(w, u32)
1667 #undef VSPLT
1668 #undef SPLAT_ELEMENT
1669 #undef _SPLAT_MASKED
1671 #define VSPLTI(suffix, element, splat_type) \
1672 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1674 splat_type x = (int8_t)(splat << 3) >> 3; \
1675 int i; \
1677 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1678 r->element[i] = x; \
1681 VSPLTI(b, s8, int8_t)
1682 VSPLTI(h, s16, int16_t)
1683 VSPLTI(w, s32, int32_t)
1684 #undef VSPLTI
1686 #define VSR(suffix, element, mask) \
1687 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1689 int i; \
1691 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1692 unsigned int shift = b->element[i] & mask; \
1693 r->element[i] = a->element[i] >> shift; \
1696 VSR(ab, s8, 0x7)
1697 VSR(ah, s16, 0xF)
1698 VSR(aw, s32, 0x1F)
1699 VSR(ad, s64, 0x3F)
1700 VSR(b, u8, 0x7)
1701 VSR(h, u16, 0xF)
1702 VSR(w, u32, 0x1F)
1703 VSR(d, u64, 0x3F)
1704 #undef VSR
1706 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1708 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1710 #if defined(HOST_WORDS_BIGENDIAN)
1711 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1712 memset(&r->u8[0], 0, sh);
1713 #else
1714 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1715 memset(&r->u8[16 - sh], 0, sh);
1716 #endif
1719 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1721 int i;
1723 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1724 r->u32[i] = a->u32[i] >= b->u32[i];
1728 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1730 int64_t t;
1731 int i, upper;
1732 ppc_avr_t result;
1733 int sat = 0;
1735 #if defined(HOST_WORDS_BIGENDIAN)
1736 upper = ARRAY_SIZE(r->s32)-1;
1737 #else
1738 upper = 0;
1739 #endif
1740 t = (int64_t)b->s32[upper];
1741 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1742 t += a->s32[i];
1743 result.s32[i] = 0;
1745 result.s32[upper] = cvtsdsw(t, &sat);
1746 *r = result;
1748 if (sat) {
1749 env->vscr |= (1 << VSCR_SAT);
1753 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1755 int i, j, upper;
1756 ppc_avr_t result;
1757 int sat = 0;
1759 #if defined(HOST_WORDS_BIGENDIAN)
1760 upper = 1;
1761 #else
1762 upper = 0;
1763 #endif
1764 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1765 int64_t t = (int64_t)b->s32[upper + i * 2];
1767 result.u64[i] = 0;
1768 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1769 t += a->s32[2 * i + j];
1771 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1774 *r = result;
1775 if (sat) {
1776 env->vscr |= (1 << VSCR_SAT);
1780 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1782 int i, j;
1783 int sat = 0;
1785 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1786 int64_t t = (int64_t)b->s32[i];
1788 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1789 t += a->s8[4 * i + j];
1791 r->s32[i] = cvtsdsw(t, &sat);
1794 if (sat) {
1795 env->vscr |= (1 << VSCR_SAT);
1799 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1801 int sat = 0;
1802 int i;
1804 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1805 int64_t t = (int64_t)b->s32[i];
1807 t += a->s16[2 * i] + a->s16[2 * i + 1];
1808 r->s32[i] = cvtsdsw(t, &sat);
1811 if (sat) {
1812 env->vscr |= (1 << VSCR_SAT);
1816 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1818 int i, j;
1819 int sat = 0;
1821 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1822 uint64_t t = (uint64_t)b->u32[i];
1824 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1825 t += a->u8[4 * i + j];
1827 r->u32[i] = cvtuduw(t, &sat);
1830 if (sat) {
1831 env->vscr |= (1 << VSCR_SAT);
1835 #if defined(HOST_WORDS_BIGENDIAN)
1836 #define UPKHI 1
1837 #define UPKLO 0
1838 #else
1839 #define UPKHI 0
1840 #define UPKLO 1
1841 #endif
1842 #define VUPKPX(suffix, hi) \
1843 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1845 int i; \
1846 ppc_avr_t result; \
1848 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1849 uint16_t e = b->u16[hi ? i : i+4]; \
1850 uint8_t a = (e >> 15) ? 0xff : 0; \
1851 uint8_t r = (e >> 10) & 0x1f; \
1852 uint8_t g = (e >> 5) & 0x1f; \
1853 uint8_t b = e & 0x1f; \
1855 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1857 *r = result; \
1859 VUPKPX(lpx, UPKLO)
1860 VUPKPX(hpx, UPKHI)
1861 #undef VUPKPX
1863 #define VUPK(suffix, unpacked, packee, hi) \
1864 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1866 int i; \
1867 ppc_avr_t result; \
1869 if (hi) { \
1870 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1871 result.unpacked[i] = b->packee[i]; \
1873 } else { \
1874 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1875 i++) { \
1876 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1879 *r = result; \
1881 VUPK(hsb, s16, s8, UPKHI)
1882 VUPK(hsh, s32, s16, UPKHI)
1883 VUPK(hsw, s64, s32, UPKHI)
1884 VUPK(lsb, s16, s8, UPKLO)
1885 VUPK(lsh, s32, s16, UPKLO)
1886 VUPK(lsw, s64, s32, UPKLO)
1887 #undef VUPK
1888 #undef UPKHI
1889 #undef UPKLO
1891 #define VGENERIC_DO(name, element) \
1892 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1894 int i; \
1896 VECTOR_FOR_INORDER_I(i, element) { \
1897 r->element[i] = name(b->element[i]); \
1901 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1902 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1903 #define clzw(v) clz32((v))
1904 #define clzd(v) clz64((v))
1906 VGENERIC_DO(clzb, u8)
1907 VGENERIC_DO(clzh, u16)
1908 VGENERIC_DO(clzw, u32)
1909 VGENERIC_DO(clzd, u64)
1911 #undef clzb
1912 #undef clzh
1913 #undef clzw
1914 #undef clzd
1916 #define popcntb(v) ctpop8(v)
1917 #define popcnth(v) ctpop16(v)
1918 #define popcntw(v) ctpop32(v)
1919 #define popcntd(v) ctpop64(v)
1921 VGENERIC_DO(popcntb, u8)
1922 VGENERIC_DO(popcnth, u16)
1923 VGENERIC_DO(popcntw, u32)
1924 VGENERIC_DO(popcntd, u64)
1926 #undef popcntb
1927 #undef popcnth
1928 #undef popcntw
1929 #undef popcntd
1931 #undef VGENERIC_DO
1933 #if defined(HOST_WORDS_BIGENDIAN)
1934 #define QW_ONE { .u64 = { 0, 1 } }
1935 #else
1936 #define QW_ONE { .u64 = { 1, 0 } }
1937 #endif
1939 #ifndef CONFIG_INT128
1941 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1943 t->u64[0] = ~a.u64[0];
1944 t->u64[1] = ~a.u64[1];
1947 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1949 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1950 return -1;
1951 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1952 return 1;
1953 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1954 return -1;
1955 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1956 return 1;
1957 } else {
1958 return 0;
1962 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1964 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1965 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1966 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1969 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1971 ppc_avr_t not_a;
1972 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1973 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1974 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1975 avr_qw_not(&not_a, a);
1976 return avr_qw_cmpu(not_a, b) < 0;
1979 #endif
1981 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1983 #ifdef CONFIG_INT128
1984 r->u128 = a->u128 + b->u128;
1985 #else
1986 avr_qw_add(r, *a, *b);
1987 #endif
1990 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1992 #ifdef CONFIG_INT128
1993 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1994 #else
1996 if (c->u64[LO_IDX] & 1) {
1997 ppc_avr_t tmp;
1999 tmp.u64[HI_IDX] = 0;
2000 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2001 avr_qw_add(&tmp, *a, tmp);
2002 avr_qw_add(r, tmp, *b);
2003 } else {
2004 avr_qw_add(r, *a, *b);
2006 #endif
2009 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2011 #ifdef CONFIG_INT128
2012 r->u128 = (~a->u128 < b->u128);
2013 #else
2014 ppc_avr_t not_a;
2016 avr_qw_not(&not_a, *a);
2018 r->u64[HI_IDX] = 0;
2019 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2020 #endif
2023 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2025 #ifdef CONFIG_INT128
2026 int carry_out = (~a->u128 < b->u128);
2027 if (!carry_out && (c->u128 & 1)) {
2028 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2029 ((a->u128 != 0) || (b->u128 != 0));
2031 r->u128 = carry_out;
2032 #else
2034 int carry_in = c->u64[LO_IDX] & 1;
2035 int carry_out = 0;
2036 ppc_avr_t tmp;
2038 carry_out = avr_qw_addc(&tmp, *a, *b);
2040 if (!carry_out && carry_in) {
2041 ppc_avr_t one = QW_ONE;
2042 carry_out = avr_qw_addc(&tmp, tmp, one);
2044 r->u64[HI_IDX] = 0;
2045 r->u64[LO_IDX] = carry_out;
2046 #endif
2049 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2051 #ifdef CONFIG_INT128
2052 r->u128 = a->u128 - b->u128;
2053 #else
2054 ppc_avr_t tmp;
2055 ppc_avr_t one = QW_ONE;
2057 avr_qw_not(&tmp, *b);
2058 avr_qw_add(&tmp, *a, tmp);
2059 avr_qw_add(r, tmp, one);
2060 #endif
2063 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2065 #ifdef CONFIG_INT128
2066 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2067 #else
2068 ppc_avr_t tmp, sum;
2070 avr_qw_not(&tmp, *b);
2071 avr_qw_add(&sum, *a, tmp);
2073 tmp.u64[HI_IDX] = 0;
2074 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2075 avr_qw_add(r, sum, tmp);
2076 #endif
2079 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2081 #ifdef CONFIG_INT128
2082 r->u128 = (~a->u128 < ~b->u128) ||
2083 (a->u128 + ~b->u128 == (__uint128_t)-1);
2084 #else
2085 int carry = (avr_qw_cmpu(*a, *b) > 0);
2086 if (!carry) {
2087 ppc_avr_t tmp;
2088 avr_qw_not(&tmp, *b);
2089 avr_qw_add(&tmp, *a, tmp);
2090 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2092 r->u64[HI_IDX] = 0;
2093 r->u64[LO_IDX] = carry;
2094 #endif
2097 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2099 #ifdef CONFIG_INT128
2100 r->u128 =
2101 (~a->u128 < ~b->u128) ||
2102 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2103 #else
2104 int carry_in = c->u64[LO_IDX] & 1;
2105 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2106 if (!carry_out && carry_in) {
2107 ppc_avr_t tmp;
2108 avr_qw_not(&tmp, *b);
2109 avr_qw_add(&tmp, *a, tmp);
2110 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2113 r->u64[HI_IDX] = 0;
2114 r->u64[LO_IDX] = carry_out;
2115 #endif
2118 #define BCD_PLUS_PREF_1 0xC
2119 #define BCD_PLUS_PREF_2 0xF
2120 #define BCD_PLUS_ALT_1 0xA
2121 #define BCD_NEG_PREF 0xD
2122 #define BCD_NEG_ALT 0xB
2123 #define BCD_PLUS_ALT_2 0xE
2125 #if defined(HOST_WORDS_BIGENDIAN)
2126 #define BCD_DIG_BYTE(n) (15 - (n/2))
2127 #else
2128 #define BCD_DIG_BYTE(n) (n/2)
2129 #endif
2131 static int bcd_get_sgn(ppc_avr_t *bcd)
2133 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2134 case BCD_PLUS_PREF_1:
2135 case BCD_PLUS_PREF_2:
2136 case BCD_PLUS_ALT_1:
2137 case BCD_PLUS_ALT_2:
2139 return 1;
2142 case BCD_NEG_PREF:
2143 case BCD_NEG_ALT:
2145 return -1;
2148 default:
2150 return 0;
2155 static int bcd_preferred_sgn(int sgn, int ps)
2157 if (sgn >= 0) {
2158 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2159 } else {
2160 return BCD_NEG_PREF;
2164 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2166 uint8_t result;
2167 if (n & 1) {
2168 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2169 } else {
2170 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2173 if (unlikely(result > 9)) {
2174 *invalid = true;
2176 return result;
2179 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2181 if (n & 1) {
2182 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2183 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2184 } else {
2185 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2186 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2190 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2192 int i;
2193 int invalid = 0;
2194 for (i = 31; i > 0; i--) {
2195 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2196 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2197 if (unlikely(invalid)) {
2198 return 0; /* doesn't matter */
2199 } else if (dig_a > dig_b) {
2200 return 1;
2201 } else if (dig_a < dig_b) {
2202 return -1;
2206 return 0;
2209 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2210 int *overflow)
2212 int carry = 0;
2213 int i;
2214 int is_zero = 1;
2215 for (i = 1; i <= 31; i++) {
2216 uint8_t digit = bcd_get_digit(a, i, invalid) +
2217 bcd_get_digit(b, i, invalid) + carry;
2218 is_zero &= (digit == 0);
2219 if (digit > 9) {
2220 carry = 1;
2221 digit -= 10;
2222 } else {
2223 carry = 0;
2226 bcd_put_digit(t, digit, i);
2228 if (unlikely(*invalid)) {
2229 return -1;
2233 *overflow = carry;
2234 return is_zero;
2237 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2238 int *overflow)
2240 int carry = 0;
2241 int i;
2242 int is_zero = 1;
2243 for (i = 1; i <= 31; i++) {
2244 uint8_t digit = bcd_get_digit(a, i, invalid) -
2245 bcd_get_digit(b, i, invalid) + carry;
2246 is_zero &= (digit == 0);
2247 if (digit & 0x80) {
2248 carry = -1;
2249 digit += 10;
2250 } else {
2251 carry = 0;
2254 bcd_put_digit(t, digit, i);
2256 if (unlikely(*invalid)) {
2257 return -1;
2261 *overflow = carry;
2262 return is_zero;
2265 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2268 int sgna = bcd_get_sgn(a);
2269 int sgnb = bcd_get_sgn(b);
2270 int invalid = (sgna == 0) || (sgnb == 0);
2271 int overflow = 0;
2272 int zero = 0;
2273 uint32_t cr = 0;
2274 ppc_avr_t result = { .u64 = { 0, 0 } };
2276 if (!invalid) {
2277 if (sgna == sgnb) {
2278 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2279 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2280 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2281 } else if (bcd_cmp_mag(a, b) > 0) {
2282 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2283 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2284 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2285 } else {
2286 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2287 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2288 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2292 if (unlikely(invalid)) {
2293 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2294 cr = 1 << CRF_SO;
2295 } else if (overflow) {
2296 cr |= 1 << CRF_SO;
2297 } else if (zero) {
2298 cr = 1 << CRF_EQ;
2301 *r = result;
2303 return cr;
2306 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2308 ppc_avr_t bcopy = *b;
2309 int sgnb = bcd_get_sgn(b);
2310 if (sgnb < 0) {
2311 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2312 } else if (sgnb > 0) {
2313 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2315 /* else invalid ... defer to bcdadd code for proper handling */
2317 return helper_bcdadd(r, a, &bcopy, ps);
2320 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2322 int i;
2323 VECTOR_FOR_INORDER_I(i, u8) {
2324 r->u8[i] = AES_sbox[a->u8[i]];
2328 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2330 int i;
2332 VECTOR_FOR_INORDER_I(i, u32) {
2333 r->AVRW(i) = b->AVRW(i) ^
2334 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2335 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2336 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2337 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2341 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2343 int i;
2345 VECTOR_FOR_INORDER_I(i, u8) {
2346 r->AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2350 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2352 /* This differs from what is written in ISA V2.07. The RTL is */
2353 /* incorrect and will be fixed in V2.07B. */
2354 int i;
2355 ppc_avr_t tmp;
2357 VECTOR_FOR_INORDER_I(i, u8) {
2358 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2361 VECTOR_FOR_INORDER_I(i, u32) {
2362 r->AVRW(i) =
2363 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2364 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2365 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2366 AES_imc[tmp.AVRB(4*i + 3)][3];
2370 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2372 int i;
2374 VECTOR_FOR_INORDER_I(i, u8) {
2375 r->AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2379 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2380 #if defined(HOST_WORDS_BIGENDIAN)
2381 #define EL_IDX(i) (i)
2382 #else
2383 #define EL_IDX(i) (3 - (i))
2384 #endif
2386 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2388 int st = (st_six & 0x10) != 0;
2389 int six = st_six & 0xF;
2390 int i;
2392 VECTOR_FOR_INORDER_I(i, u32) {
2393 if (st == 0) {
2394 if ((six & (0x8 >> i)) == 0) {
2395 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2396 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2397 (a->u32[EL_IDX(i)] >> 3);
2398 } else { /* six.bit[i] == 1 */
2399 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2400 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2401 (a->u32[EL_IDX(i)] >> 10);
2403 } else { /* st == 1 */
2404 if ((six & (0x8 >> i)) == 0) {
2405 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2406 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2407 ROTRu32(a->u32[EL_IDX(i)], 22);
2408 } else { /* six.bit[i] == 1 */
2409 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2410 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2411 ROTRu32(a->u32[EL_IDX(i)], 25);
2417 #undef ROTRu32
2418 #undef EL_IDX
2420 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2421 #if defined(HOST_WORDS_BIGENDIAN)
2422 #define EL_IDX(i) (i)
2423 #else
2424 #define EL_IDX(i) (1 - (i))
2425 #endif
2427 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2429 int st = (st_six & 0x10) != 0;
2430 int six = st_six & 0xF;
2431 int i;
2433 VECTOR_FOR_INORDER_I(i, u64) {
2434 if (st == 0) {
2435 if ((six & (0x8 >> (2*i))) == 0) {
2436 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2437 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2438 (a->u64[EL_IDX(i)] >> 7);
2439 } else { /* six.bit[2*i] == 1 */
2440 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2441 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2442 (a->u64[EL_IDX(i)] >> 6);
2444 } else { /* st == 1 */
2445 if ((six & (0x8 >> (2*i))) == 0) {
2446 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2447 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2448 ROTRu64(a->u64[EL_IDX(i)], 39);
2449 } else { /* six.bit[2*i] == 1 */
2450 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2451 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2452 ROTRu64(a->u64[EL_IDX(i)], 41);
2458 #undef ROTRu64
2459 #undef EL_IDX
2461 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2463 int i;
2464 VECTOR_FOR_INORDER_I(i, u8) {
2465 int indexA = c->u8[i] >> 4;
2466 int indexB = c->u8[i] & 0xF;
2467 #if defined(HOST_WORDS_BIGENDIAN)
2468 r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
2469 #else
2470 r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2471 #endif
2475 #undef VECTOR_FOR_INORDER_I
2476 #undef HI_IDX
2477 #undef LO_IDX
2479 /*****************************************************************************/
2480 /* SPE extension helpers */
2481 /* Use a table to make this quicker */
2482 static const uint8_t hbrev[16] = {
2483 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2484 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2487 static inline uint8_t byte_reverse(uint8_t val)
2489 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2492 static inline uint32_t word_reverse(uint32_t val)
2494 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2495 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2498 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2499 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2501 uint32_t a, b, d, mask;
2503 mask = UINT32_MAX >> (32 - MASKBITS);
2504 a = arg1 & mask;
2505 b = arg2 & mask;
2506 d = word_reverse(1 + word_reverse(a | ~b));
2507 return (arg1 & ~mask) | (d & b);
2510 uint32_t helper_cntlsw32(uint32_t val)
2512 if (val & 0x80000000) {
2513 return clz32(~val);
2514 } else {
2515 return clz32(val);
2519 uint32_t helper_cntlzw32(uint32_t val)
2521 return clz32(val);
2524 /* 440 specific */
2525 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2526 target_ulong low, uint32_t update_Rc)
2528 target_ulong mask;
2529 int i;
2531 i = 1;
2532 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2533 if ((high & mask) == 0) {
2534 if (update_Rc) {
2535 env->crf[0] = 0x4;
2537 goto done;
2539 i++;
2541 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2542 if ((low & mask) == 0) {
2543 if (update_Rc) {
2544 env->crf[0] = 0x8;
2546 goto done;
2548 i++;
2550 i = 8;
2551 if (update_Rc) {
2552 env->crf[0] = 0x2;
2554 done:
2555 env->xer = (env->xer & ~0x7F) | i;
2556 if (update_Rc) {
2557 env->crf[0] |= xer_so;
2559 return i;