vhost: vhost-kernel: enable vq reset feature
[qemu/kevin.git] / target / ppc / int_helper.c
blobd97a7f1f28b7697012f23448fccbc9a14854a465
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
39 if (unlikely(ov)) {
40 env->so = env->ov = env->ov32 = 1;
41 } else {
42 env->ov = env->ov32 = 0;
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
49 uint64_t rt = 0;
50 int overflow = 0;
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
70 return (target_ulong)rt;
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
76 int64_t rt = 0;
77 int overflow = 0;
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
98 return (target_ulong)rt;
101 #if defined(TARGET_PPC64)
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
105 uint64_t rt = 0;
106 int overflow = 0;
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
119 return rt;
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
140 return rt;
143 #endif
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
163 return hasvalue(rb, ra) ? CRF_GT : 0;
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
171 * Return a random number.
173 uint64_t helper_darn32(void)
175 Error *err = NULL;
176 uint32_t ret;
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
185 return ret;
188 uint64_t helper_darn64(void)
190 Error *err = NULL;
191 uint64_t ret;
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
200 return ret;
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
205 int i;
206 uint64_t ra = 0;
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
216 return ra;
219 #endif
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
231 mask <<= 8;
233 return ra;
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
240 int32_t ret;
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
259 return (target_long)ret;
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
266 int64_t ret;
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
285 return ret;
287 #endif
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
302 target_ulong helper_popcntw(target_ulong val)
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
317 #else
318 target_ulong helper_popcntb(target_ulong val)
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
326 #endif
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
340 if (mask == 0 || mask == -1) {
341 return src;
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
387 return left | (right >> n);
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
392 int i, o;
393 uint64_t result = 0;
395 if (mask == -1) {
396 return src;
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
405 return result;
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
410 int i, o;
411 uint64_t result = 0;
413 if (mask == -1) {
414 return src;
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
423 return result;
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if HOST_BIG_ENDIAN
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
440 to_type r; \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
451 return r; \
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
456 to_type r; \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
464 return r; \
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
481 ppc_store_vscr(env, vscr);
484 uint32_t helper_mfvscr(CPUPPCState *env)
486 return ppc_get_vscr(env);
489 static inline void set_vscr_sat(CPUPPCState *env)
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
495 /* vprtybq */
496 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
498 uint64_t res = b->u64[0] ^ b->u64[1];
499 res ^= res >> 32;
500 res ^= res >> 16;
501 res ^= res >> 8;
502 r->VsrD(1) = res & 1;
503 r->VsrD(0) = 0;
506 #define VARITHFP(suffix, func) \
507 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
508 ppc_avr_t *b) \
510 int i; \
512 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
513 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
516 VARITHFP(addfp, float32_add)
517 VARITHFP(subfp, float32_sub)
518 VARITHFP(minfp, float32_min)
519 VARITHFP(maxfp, float32_max)
520 #undef VARITHFP
522 #define VARITHFPFMA(suffix, type) \
523 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
524 ppc_avr_t *b, ppc_avr_t *c) \
526 int i; \
527 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
528 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
529 type, &env->vec_status); \
532 VARITHFPFMA(maddfp, 0);
533 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
534 #undef VARITHFPFMA
536 #define VARITHSAT_CASE(type, op, cvt, element) \
538 type result = (type)a->element[i] op (type)b->element[i]; \
539 r->element[i] = cvt(result, &sat); \
542 #define VARITHSAT_DO(name, op, optype, cvt, element) \
543 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
544 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
546 int sat = 0; \
547 int i; \
549 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
550 VARITHSAT_CASE(optype, op, cvt, element); \
552 if (sat) { \
553 vscr_sat->u32[0] = 1; \
556 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
557 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
558 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
559 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
560 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
561 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
562 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
563 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
564 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
565 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
566 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
567 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
568 #undef VARITHSAT_CASE
569 #undef VARITHSAT_DO
570 #undef VARITHSAT_SIGNED
571 #undef VARITHSAT_UNSIGNED
573 #define VAVG(name, element, etype) \
574 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
576 int i; \
578 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
579 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
580 r->element[i] = x >> 1; \
584 VAVG(VAVGSB, s8, int16_t)
585 VAVG(VAVGUB, u8, uint16_t)
586 VAVG(VAVGSH, s16, int32_t)
587 VAVG(VAVGUH, u16, uint32_t)
588 VAVG(VAVGSW, s32, int64_t)
589 VAVG(VAVGUW, u32, uint64_t)
590 #undef VAVG
592 #define VABSDU(name, element) \
593 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
595 int i; \
597 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
598 r->element[i] = (a->element[i] > b->element[i]) ? \
599 (a->element[i] - b->element[i]) : \
600 (b->element[i] - a->element[i]); \
605 * VABSDU - Vector absolute difference unsigned
606 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
607 * element - element type to access from vector
609 VABSDU(VABSDUB, u8)
610 VABSDU(VABSDUH, u16)
611 VABSDU(VABSDUW, u32)
612 #undef VABSDU
614 #define VCF(suffix, cvt, element) \
615 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
616 ppc_avr_t *b, uint32_t uim) \
618 int i; \
620 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
621 float32 t = cvt(b->element[i], &env->vec_status); \
622 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
625 VCF(ux, uint32_to_float32, u32)
626 VCF(sx, int32_to_float32, s32)
627 #undef VCF
629 #define VCMPNEZ(NAME, ELEM) \
630 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
632 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
633 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
634 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
637 VCMPNEZ(VCMPNEZB, u8)
638 VCMPNEZ(VCMPNEZH, u16)
639 VCMPNEZ(VCMPNEZW, u32)
640 #undef VCMPNEZ
642 #define VCMPFP_DO(suffix, compare, order, record) \
643 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
644 ppc_avr_t *a, ppc_avr_t *b) \
646 uint32_t ones = (uint32_t)-1; \
647 uint32_t all = ones; \
648 uint32_t none = 0; \
649 int i; \
651 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
652 uint32_t result; \
653 FloatRelation rel = \
654 float32_compare_quiet(a->f32[i], b->f32[i], \
655 &env->vec_status); \
656 if (rel == float_relation_unordered) { \
657 result = 0; \
658 } else if (rel compare order) { \
659 result = ones; \
660 } else { \
661 result = 0; \
663 r->u32[i] = result; \
664 all &= result; \
665 none |= result; \
667 if (record) { \
668 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
671 #define VCMPFP(suffix, compare, order) \
672 VCMPFP_DO(suffix, compare, order, 0) \
673 VCMPFP_DO(suffix##_dot, compare, order, 1)
674 VCMPFP(eqfp, ==, float_relation_equal)
675 VCMPFP(gefp, !=, float_relation_less)
676 VCMPFP(gtfp, ==, float_relation_greater)
677 #undef VCMPFP_DO
678 #undef VCMPFP
680 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
681 ppc_avr_t *a, ppc_avr_t *b, int record)
683 int i;
684 int all_in = 0;
686 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
687 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
688 &env->vec_status);
689 if (le_rel == float_relation_unordered) {
690 r->u32[i] = 0xc0000000;
691 all_in = 1;
692 } else {
693 float32 bneg = float32_chs(b->f32[i]);
694 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
695 &env->vec_status);
696 int le = le_rel != float_relation_greater;
697 int ge = ge_rel != float_relation_less;
699 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
700 all_in |= (!le | !ge);
703 if (record) {
704 env->crf[6] = (all_in == 0) << 1;
708 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
710 vcmpbfp_internal(env, r, a, b, 0);
713 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
714 ppc_avr_t *b)
716 vcmpbfp_internal(env, r, a, b, 1);
719 #define VCT(suffix, satcvt, element) \
720 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
721 ppc_avr_t *b, uint32_t uim) \
723 int i; \
724 int sat = 0; \
725 float_status s = env->vec_status; \
727 set_float_rounding_mode(float_round_to_zero, &s); \
728 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
729 if (float32_is_any_nan(b->f32[i])) { \
730 r->element[i] = 0; \
731 } else { \
732 float64 t = float32_to_float64(b->f32[i], &s); \
733 int64_t j; \
735 t = float64_scalbn(t, uim, &s); \
736 j = float64_to_int64(t, &s); \
737 r->element[i] = satcvt(j, &sat); \
740 if (sat) { \
741 set_vscr_sat(env); \
744 VCT(uxs, cvtsduw, u32)
745 VCT(sxs, cvtsdsw, s32)
746 #undef VCT
748 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
750 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
752 int64_t psum = 0;
753 for (int i = 0; i < 8; i++, mask >>= 1) {
754 if (mask & 1) {
755 psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
758 return psum;
761 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
763 int64_t psum = 0;
764 for (int i = 0; i < 4; i++, mask >>= 1) {
765 if (mask & 1) {
766 psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
769 return psum;
772 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
774 int64_t psum = 0;
775 for (int i = 0; i < 2; i++, mask >>= 1) {
776 if (mask & 1) {
777 psum += (int64_t)sextract32(a, 16 * i, 16) *
778 sextract32(b, 16 * i, 16);
781 return psum;
784 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t *at,
785 uint32_t mask, bool sat, bool acc, do_ger ger)
787 uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
788 xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
789 ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
790 uint8_t xmsk_bit, ymsk_bit;
791 int64_t psum;
792 int i, j;
793 for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
794 for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
795 if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
796 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
797 if (acc) {
798 psum += at[i].VsrSW(j);
800 if (sat && psum > INT32_MAX) {
801 set_vscr_sat(env);
802 at[i].VsrSW(j) = INT32_MAX;
803 } else if (sat && psum < INT32_MIN) {
804 set_vscr_sat(env);
805 at[i].VsrSW(j) = INT32_MIN;
806 } else {
807 at[i].VsrSW(j) = (int32_t) psum;
809 } else {
810 at[i].VsrSW(j) = 0;
816 QEMU_FLATTEN
817 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
818 ppc_acc_t *at, uint32_t mask)
820 xviger(env, a, b, at, mask, false, false, ger_rank8);
823 QEMU_FLATTEN
824 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
825 ppc_acc_t *at, uint32_t mask)
827 xviger(env, a, b, at, mask, false, true, ger_rank8);
830 QEMU_FLATTEN
831 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
832 ppc_acc_t *at, uint32_t mask)
834 xviger(env, a, b, at, mask, false, false, ger_rank4);
837 QEMU_FLATTEN
838 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
839 ppc_acc_t *at, uint32_t mask)
841 xviger(env, a, b, at, mask, false, true, ger_rank4);
844 QEMU_FLATTEN
845 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
846 ppc_acc_t *at, uint32_t mask)
848 xviger(env, a, b, at, mask, true, true, ger_rank4);
851 QEMU_FLATTEN
852 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
853 ppc_acc_t *at, uint32_t mask)
855 xviger(env, a, b, at, mask, false, false, ger_rank2);
858 QEMU_FLATTEN
859 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
860 ppc_acc_t *at, uint32_t mask)
862 xviger(env, a, b, at, mask, true, false, ger_rank2);
865 QEMU_FLATTEN
866 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
867 ppc_acc_t *at, uint32_t mask)
869 xviger(env, a, b, at, mask, false, true, ger_rank2);
872 QEMU_FLATTEN
873 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
874 ppc_acc_t *at, uint32_t mask)
876 xviger(env, a, b, at, mask, true, true, ger_rank2);
879 target_ulong helper_vclzlsbb(ppc_avr_t *r)
881 target_ulong count = 0;
882 int i;
883 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
884 if (r->VsrB(i) & 0x01) {
885 break;
887 count++;
889 return count;
892 target_ulong helper_vctzlsbb(ppc_avr_t *r)
894 target_ulong count = 0;
895 int i;
896 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
897 if (r->VsrB(i) & 0x01) {
898 break;
900 count++;
902 return count;
905 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
906 ppc_avr_t *b, ppc_avr_t *c)
908 int sat = 0;
909 int i;
911 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
912 int32_t prod = a->s16[i] * b->s16[i];
913 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
915 r->s16[i] = cvtswsh(t, &sat);
918 if (sat) {
919 set_vscr_sat(env);
923 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
924 ppc_avr_t *b, ppc_avr_t *c)
926 int sat = 0;
927 int i;
929 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
930 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
931 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
932 r->s16[i] = cvtswsh(t, &sat);
935 if (sat) {
936 set_vscr_sat(env);
940 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
941 uint32_t v)
943 int i;
945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
946 int32_t prod = a->s16[i] * b->s16[i];
947 r->s16[i] = (int16_t) (prod + c->s16[i]);
951 #define VMRG_DO(name, element, access, ofs) \
952 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
954 ppc_avr_t result; \
955 int i, half = ARRAY_SIZE(r->element) / 2; \
957 for (i = 0; i < half; i++) { \
958 result.access(i * 2 + 0) = a->access(i + ofs); \
959 result.access(i * 2 + 1) = b->access(i + ofs); \
961 *r = result; \
964 #define VMRG(suffix, element, access) \
965 VMRG_DO(mrgl##suffix, element, access, half) \
966 VMRG_DO(mrgh##suffix, element, access, 0)
967 VMRG(b, u8, VsrB)
968 VMRG(h, u16, VsrH)
969 VMRG(w, u32, VsrW)
970 #undef VMRG_DO
971 #undef VMRG
973 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
975 int32_t prod[16];
976 int i;
978 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
979 prod[i] = (int32_t)a->s8[i] * b->u8[i];
982 VECTOR_FOR_INORDER_I(i, s32) {
983 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
984 prod[4 * i + 2] + prod[4 * i + 3];
988 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
990 int32_t prod[8];
991 int i;
993 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
994 prod[i] = a->s16[i] * b->s16[i];
997 VECTOR_FOR_INORDER_I(i, s32) {
998 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1002 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1003 ppc_avr_t *b, ppc_avr_t *c)
1005 int32_t prod[8];
1006 int i;
1007 int sat = 0;
1009 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1010 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1013 VECTOR_FOR_INORDER_I(i, s32) {
1014 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1016 r->u32[i] = cvtsdsw(t, &sat);
1019 if (sat) {
1020 set_vscr_sat(env);
1024 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1026 uint16_t prod[16];
1027 int i;
1029 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1030 prod[i] = a->u8[i] * b->u8[i];
1033 VECTOR_FOR_INORDER_I(i, u32) {
1034 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1035 prod[4 * i + 2] + prod[4 * i + 3];
1039 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1041 uint32_t prod[8];
1042 int i;
1044 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1045 prod[i] = a->u16[i] * b->u16[i];
1048 VECTOR_FOR_INORDER_I(i, u32) {
1049 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1053 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1054 ppc_avr_t *b, ppc_avr_t *c)
1056 uint32_t prod[8];
1057 int i;
1058 int sat = 0;
1060 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1061 prod[i] = a->u16[i] * b->u16[i];
1064 VECTOR_FOR_INORDER_I(i, s32) {
1065 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1067 r->u32[i] = cvtuduw(t, &sat);
1070 if (sat) {
1071 set_vscr_sat(env);
1075 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1076 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1078 int i; \
1080 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1081 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1082 (cast)b->mul_access(i); \
1086 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1087 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1089 int i; \
1091 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1092 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1093 (cast)b->mul_access(i + 1); \
1097 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1098 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1099 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1100 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1101 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1102 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1103 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1104 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1105 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1106 #undef VMUL_DO_EVN
1107 #undef VMUL_DO_ODD
1108 #undef VMUL
1110 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1111 target_ulong uim)
1113 int i, idx;
1114 ppc_vsr_t tmp = { .u64 = {0, 0} };
1116 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1117 if ((pcv->VsrB(i) >> 5) == uim) {
1118 idx = pcv->VsrB(i) & 0x1f;
1119 if (idx < ARRAY_SIZE(t->u8)) {
1120 tmp.VsrB(i) = s0->VsrB(idx);
1121 } else {
1122 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1127 *t = tmp;
1130 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1132 Int128 neg1 = int128_makes64(-1);
1133 Int128 int128_min = int128_make128(0, INT64_MIN);
1134 if (likely(int128_nz(b->s128) &&
1135 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1136 t->s128 = int128_divs(a->s128, b->s128);
1137 } else {
1138 t->s128 = a->s128; /* Undefined behavior */
1142 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1144 if (int128_nz(b->s128)) {
1145 t->s128 = int128_divu(a->s128, b->s128);
1146 } else {
1147 t->s128 = a->s128; /* Undefined behavior */
1151 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1153 int i;
1154 int64_t high;
1155 uint64_t low;
1156 for (i = 0; i < 2; i++) {
1157 high = a->s64[i];
1158 low = 0;
1159 if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1160 t->s64[i] = a->s64[i]; /* Undefined behavior */
1161 } else {
1162 divs128(&low, &high, b->s64[i]);
1163 t->s64[i] = low;
1168 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1170 int i;
1171 uint64_t high, low;
1172 for (i = 0; i < 2; i++) {
1173 high = a->u64[i];
1174 low = 0;
1175 if (unlikely(!b->u64[i])) {
1176 t->u64[i] = a->u64[i]; /* Undefined behavior */
1177 } else {
1178 divu128(&low, &high, b->u64[i]);
1179 t->u64[i] = low;
1184 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1186 Int128 high, low;
1187 Int128 int128_min = int128_make128(0, INT64_MIN);
1188 Int128 neg1 = int128_makes64(-1);
1190 high = a->s128;
1191 low = int128_zero();
1192 if (unlikely(!int128_nz(b->s128) ||
1193 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1194 t->s128 = a->s128; /* Undefined behavior */
1195 } else {
1196 divs256(&low, &high, b->s128);
1197 t->s128 = low;
1201 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1203 Int128 high, low;
1205 high = a->s128;
1206 low = int128_zero();
1207 if (unlikely(!int128_nz(b->s128))) {
1208 t->s128 = a->s128; /* Undefined behavior */
1209 } else {
1210 divu256(&low, &high, b->s128);
1211 t->s128 = low;
1215 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1217 Int128 neg1 = int128_makes64(-1);
1218 Int128 int128_min = int128_make128(0, INT64_MIN);
1219 if (likely(int128_nz(b->s128) &&
1220 (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1221 t->s128 = int128_rems(a->s128, b->s128);
1222 } else {
1223 t->s128 = int128_zero(); /* Undefined behavior */
1227 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1229 if (likely(int128_nz(b->s128))) {
1230 t->s128 = int128_remu(a->s128, b->s128);
1231 } else {
1232 t->s128 = int128_zero(); /* Undefined behavior */
1236 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1238 ppc_avr_t result;
1239 int i;
1241 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1242 int s = c->VsrB(i) & 0x1f;
1243 int index = s & 0xf;
1245 if (s & 0x10) {
1246 result.VsrB(i) = b->VsrB(index);
1247 } else {
1248 result.VsrB(i) = a->VsrB(index);
1251 *r = result;
1254 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1256 ppc_avr_t result;
1257 int i;
1259 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1260 int s = c->VsrB(i) & 0x1f;
1261 int index = 15 - (s & 0xf);
1263 if (s & 0x10) {
1264 result.VsrB(i) = a->VsrB(index);
1265 } else {
1266 result.VsrB(i) = b->VsrB(index);
1269 *r = result;
1272 #define XXGENPCV_BE_EXP(NAME, SZ) \
1273 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1275 ppc_vsr_t tmp; \
1277 /* Initialize tmp with the result of an all-zeros mask */ \
1278 tmp.VsrD(0) = 0x1011121314151617; \
1279 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1281 /* Iterate over the most significant byte of each element */ \
1282 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1283 if (b->VsrB(i) & 0x80) { \
1284 /* Update each byte of the element */ \
1285 for (int k = 0; k < SZ; k++) { \
1286 tmp.VsrB(i + k) = j + k; \
1288 j += SZ; \
1292 *t = tmp; \
1295 #define XXGENPCV_BE_COMP(NAME, SZ) \
1296 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1298 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1300 /* Iterate over the most significant byte of each element */ \
1301 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1302 if (b->VsrB(i) & 0x80) { \
1303 /* Update each byte of the element */ \
1304 for (int k = 0; k < SZ; k++) { \
1305 tmp.VsrB(j + k) = i + k; \
1307 j += SZ; \
1311 *t = tmp; \
1314 #define XXGENPCV_LE_EXP(NAME, SZ) \
1315 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1317 ppc_vsr_t tmp; \
1319 /* Initialize tmp with the result of an all-zeros mask */ \
1320 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1321 tmp.VsrD(1) = 0x1716151413121110; \
1323 /* Iterate over the most significant byte of each element */ \
1324 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1325 /* Reverse indexing of "i" */ \
1326 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1327 if (b->VsrB(idx) & 0x80) { \
1328 /* Update each byte of the element */ \
1329 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1330 tmp.VsrB(idx + rk) = j + k; \
1332 j += SZ; \
1336 *t = tmp; \
1339 #define XXGENPCV_LE_COMP(NAME, SZ) \
1340 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1342 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1344 /* Iterate over the most significant byte of each element */ \
1345 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1346 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1347 /* Update each byte of the element */ \
1348 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1349 /* Reverse indexing of "j" */ \
1350 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1351 tmp.VsrB(idx + rk) = i + k; \
1353 j += SZ; \
1357 *t = tmp; \
1360 #define XXGENPCV(NAME, SZ) \
1361 XXGENPCV_BE_EXP(NAME, SZ) \
1362 XXGENPCV_BE_COMP(NAME, SZ) \
1363 XXGENPCV_LE_EXP(NAME, SZ) \
1364 XXGENPCV_LE_COMP(NAME, SZ) \
1366 XXGENPCV(XXGENPCVBM, 1)
1367 XXGENPCV(XXGENPCVHM, 2)
1368 XXGENPCV(XXGENPCVWM, 4)
1369 XXGENPCV(XXGENPCVDM, 8)
1371 #undef XXGENPCV_BE_EXP
1372 #undef XXGENPCV_BE_COMP
1373 #undef XXGENPCV_LE_EXP
1374 #undef XXGENPCV_LE_COMP
1375 #undef XXGENPCV
1377 #if HOST_BIG_ENDIAN
1378 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1379 #define VBPERMD_INDEX(i) (i)
1380 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1381 #else
1382 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1383 #define VBPERMD_INDEX(i) (1 - i)
1384 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1385 #endif
1386 #define EXTRACT_BIT(avr, i, index) \
1387 (extract64((avr)->VsrD(i), 63 - index, 1))
1389 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1391 int i, j;
1392 ppc_avr_t result = { .u64 = { 0, 0 } };
1393 VECTOR_FOR_INORDER_I(i, u64) {
1394 for (j = 0; j < 8; j++) {
1395 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1396 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1397 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1401 *r = result;
1404 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1406 int i;
1407 uint64_t perm = 0;
1409 VECTOR_FOR_INORDER_I(i, u8) {
1410 int index = VBPERMQ_INDEX(b, i);
1412 if (index < 128) {
1413 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1414 if (a->u64[VBPERMQ_DW(index)] & mask) {
1415 perm |= (0x8000 >> i);
1420 r->VsrD(0) = perm;
1421 r->VsrD(1) = 0;
1424 #undef VBPERMQ_INDEX
1425 #undef VBPERMQ_DW
1427 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1428 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1430 int i, j; \
1431 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1433 VECTOR_FOR_INORDER_I(i, srcfld) { \
1434 prod[i] = 0; \
1435 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1436 if (a->srcfld[i] & (1ull << j)) { \
1437 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1442 VECTOR_FOR_INORDER_I(i, trgfld) { \
1443 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1447 PMSUM(vpmsumb, u8, u16, uint16_t)
1448 PMSUM(vpmsumh, u16, u32, uint32_t)
1449 PMSUM(vpmsumw, u32, u64, uint64_t)
1451 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1453 int i, j;
1454 Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
1456 for (j = 0; j < 64; j++) {
1457 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1458 if (a->VsrD(i) & (1ull << j)) {
1459 tmp = int128_make64(b->VsrD(i));
1460 tmp = int128_lshift(tmp, j);
1461 prod[i] = int128_xor(prod[i], tmp);
1466 r->s128 = int128_xor(prod[0], prod[1]);
1469 #if HOST_BIG_ENDIAN
1470 #define PKBIG 1
1471 #else
1472 #define PKBIG 0
1473 #endif
1474 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1476 int i, j;
1477 ppc_avr_t result;
1478 #if HOST_BIG_ENDIAN
1479 const ppc_avr_t *x[2] = { a, b };
1480 #else
1481 const ppc_avr_t *x[2] = { b, a };
1482 #endif
1484 VECTOR_FOR_INORDER_I(i, u64) {
1485 VECTOR_FOR_INORDER_I(j, u32) {
1486 uint32_t e = x[i]->u32[j];
1488 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1489 ((e >> 6) & 0x3e0) |
1490 ((e >> 3) & 0x1f));
1493 *r = result;
1496 #define VPK(suffix, from, to, cvt, dosat) \
1497 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1498 ppc_avr_t *a, ppc_avr_t *b) \
1500 int i; \
1501 int sat = 0; \
1502 ppc_avr_t result; \
1503 ppc_avr_t *a0 = PKBIG ? a : b; \
1504 ppc_avr_t *a1 = PKBIG ? b : a; \
1506 VECTOR_FOR_INORDER_I(i, from) { \
1507 result.to[i] = cvt(a0->from[i], &sat); \
1508 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1510 *r = result; \
1511 if (dosat && sat) { \
1512 set_vscr_sat(env); \
1515 #define I(x, y) (x)
1516 VPK(shss, s16, s8, cvtshsb, 1)
1517 VPK(shus, s16, u8, cvtshub, 1)
1518 VPK(swss, s32, s16, cvtswsh, 1)
1519 VPK(swus, s32, u16, cvtswuh, 1)
1520 VPK(sdss, s64, s32, cvtsdsw, 1)
1521 VPK(sdus, s64, u32, cvtsduw, 1)
1522 VPK(uhus, u16, u8, cvtuhub, 1)
1523 VPK(uwus, u32, u16, cvtuwuh, 1)
1524 VPK(udus, u64, u32, cvtuduw, 1)
1525 VPK(uhum, u16, u8, I, 0)
1526 VPK(uwum, u32, u16, I, 0)
1527 VPK(udum, u64, u32, I, 0)
1528 #undef I
1529 #undef VPK
1530 #undef PKBIG
1532 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1534 int i;
1536 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1537 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1541 #define VRFI(suffix, rounding) \
1542 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1543 ppc_avr_t *b) \
1545 int i; \
1546 float_status s = env->vec_status; \
1548 set_float_rounding_mode(rounding, &s); \
1549 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1550 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1553 VRFI(n, float_round_nearest_even)
1554 VRFI(m, float_round_down)
1555 VRFI(p, float_round_up)
1556 VRFI(z, float_round_to_zero)
1557 #undef VRFI
1559 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1561 int i;
1563 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1564 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1566 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1570 #define VRLMI(name, size, element, insert) \
1571 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1573 int i; \
1574 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1575 uint##size##_t src1 = a->element[i]; \
1576 uint##size##_t src2 = b->element[i]; \
1577 uint##size##_t src3 = r->element[i]; \
1578 uint##size##_t begin, end, shift, mask, rot_val; \
1580 shift = extract##size(src2, 0, 6); \
1581 end = extract##size(src2, 8, 6); \
1582 begin = extract##size(src2, 16, 6); \
1583 rot_val = rol##size(src1, shift); \
1584 mask = mask_u##size(begin, end); \
1585 if (insert) { \
1586 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1587 } else { \
1588 r->element[i] = (rot_val & mask); \
1593 VRLMI(VRLDMI, 64, u64, 1);
1594 VRLMI(VRLWMI, 32, u32, 1);
1595 VRLMI(VRLDNM, 64, u64, 0);
1596 VRLMI(VRLWNM, 32, u32, 0);
1598 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1600 int i;
1602 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1603 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1607 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1609 int i;
1611 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1612 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1616 #define VEXTU_X_DO(name, size, left) \
1617 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1619 int index = (a & 0xf) * 8; \
1620 if (left) { \
1621 index = 128 - index - size; \
1623 return int128_getlo(int128_rshift(b->s128, index)) & \
1624 MAKE_64BIT_MASK(0, size); \
1626 VEXTU_X_DO(vextublx, 8, 1)
1627 VEXTU_X_DO(vextuhlx, 16, 1)
1628 VEXTU_X_DO(vextuwlx, 32, 1)
1629 VEXTU_X_DO(vextubrx, 8, 0)
1630 VEXTU_X_DO(vextuhrx, 16, 0)
1631 VEXTU_X_DO(vextuwrx, 32, 0)
1632 #undef VEXTU_X_DO
1634 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1636 int i;
1637 unsigned int shift, bytes, size;
1639 size = ARRAY_SIZE(r->u8);
1640 for (i = 0; i < size; i++) {
1641 shift = b->VsrB(i) & 0x7; /* extract shift value */
1642 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1643 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1644 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1648 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1650 int i;
1651 unsigned int shift, bytes;
1654 * Use reverse order, as destination and source register can be
1655 * same. Its being modified in place saving temporary, reverse
1656 * order will guarantee that computed result is not fed back.
1658 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1659 shift = b->VsrB(i) & 0x7; /* extract shift value */
1660 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1661 /* extract adjacent bytes */
1662 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1666 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1668 int sh = shift & 0xf;
1669 int i;
1670 ppc_avr_t result;
1672 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1673 int index = sh + i;
1674 if (index > 0xf) {
1675 result.VsrB(i) = b->VsrB(index - 0x10);
1676 } else {
1677 result.VsrB(i) = a->VsrB(index);
1680 *r = result;
1683 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1685 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1687 #if HOST_BIG_ENDIAN
1688 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1689 memset(&r->u8[16 - sh], 0, sh);
1690 #else
1691 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1692 memset(&r->u8[0], 0, sh);
1693 #endif
1696 #if HOST_BIG_ENDIAN
1697 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1698 #else
1699 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1700 #endif
1702 #define VINSX(SUFFIX, TYPE) \
1703 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1704 uint64_t val, target_ulong index) \
1706 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1707 target_long idx = index; \
1709 if (idx < 0 || idx > maxidx) { \
1710 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1711 qemu_log_mask(LOG_GUEST_ERROR, \
1712 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1713 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1714 } else { \
1715 TYPE src = val; \
1716 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1719 VINSX(B, uint8_t)
1720 VINSX(H, uint16_t)
1721 VINSX(W, uint32_t)
1722 VINSX(D, uint64_t)
1723 #undef ELEM_ADDR
1724 #undef VINSX
1725 #if HOST_BIG_ENDIAN
1726 #define VEXTDVLX(NAME, SIZE) \
1727 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1728 target_ulong index) \
1730 const target_long idx = index; \
1731 ppc_avr_t tmp[2] = { *a, *b }; \
1732 memset(t, 0, sizeof(*t)); \
1733 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1734 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1735 } else { \
1736 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1737 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1738 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1741 #else
1742 #define VEXTDVLX(NAME, SIZE) \
1743 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1744 target_ulong index) \
1746 const target_long idx = index; \
1747 ppc_avr_t tmp[2] = { *b, *a }; \
1748 memset(t, 0, sizeof(*t)); \
1749 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1750 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1751 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1752 } else { \
1753 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1754 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1755 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1758 #endif
1759 VEXTDVLX(VEXTDUBVLX, 1)
1760 VEXTDVLX(VEXTDUHVLX, 2)
1761 VEXTDVLX(VEXTDUWVLX, 4)
1762 VEXTDVLX(VEXTDDVLX, 8)
1763 #undef VEXTDVLX
1764 #if HOST_BIG_ENDIAN
1765 #define VEXTRACT(suffix, element) \
1766 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1768 uint32_t es = sizeof(r->element[0]); \
1769 memmove(&r->u8[8 - es], &b->u8[index], es); \
1770 memset(&r->u8[8], 0, 8); \
1771 memset(&r->u8[0], 0, 8 - es); \
1773 #else
1774 #define VEXTRACT(suffix, element) \
1775 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1777 uint32_t es = sizeof(r->element[0]); \
1778 uint32_t s = (16 - index) - es; \
1779 memmove(&r->u8[8], &b->u8[s], es); \
1780 memset(&r->u8[0], 0, 8); \
1781 memset(&r->u8[8 + es], 0, 8 - es); \
1783 #endif
1784 VEXTRACT(ub, u8)
1785 VEXTRACT(uh, u16)
1786 VEXTRACT(uw, u32)
1787 VEXTRACT(d, u64)
1788 #undef VEXTRACT
1790 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1791 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1793 int i, idx, crf = 0; \
1795 for (i = 0; i < NUM_ELEMS; i++) { \
1796 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1797 if (b->Vsr##ELEM(idx)) { \
1798 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1799 } else { \
1800 crf = 0b0010; \
1801 break; \
1805 for (; i < NUM_ELEMS; i++) { \
1806 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1807 t->Vsr##ELEM(idx) = 0; \
1810 return crf; \
1812 VSTRI(VSTRIBL, B, 16, true)
1813 VSTRI(VSTRIBR, B, 16, false)
1814 VSTRI(VSTRIHL, H, 8, true)
1815 VSTRI(VSTRIHR, H, 8, false)
1816 #undef VSTRI
1818 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1820 ppc_vsr_t t = { };
1821 size_t es = sizeof(uint32_t);
1822 uint32_t ext_index;
1823 int i;
1825 ext_index = index;
1826 for (i = 0; i < es; i++, ext_index++) {
1827 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1830 *xt = t;
1833 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1835 ppc_vsr_t t = *xt;
1836 size_t es = sizeof(uint32_t);
1837 int ins_index, i = 0;
1839 ins_index = index;
1840 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1841 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1844 *xt = t;
1847 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1848 uint32_t desc)
1851 * Instead of processing imm bit-by-bit, we'll skip the computation of
1852 * conjunctions whose corresponding bit is unset.
1854 int bit, imm = simd_data(desc);
1855 Int128 conj, disj = int128_zero();
1857 /* Iterate over set bits from the least to the most significant bit */
1858 while (imm) {
1860 * Get the next bit to be processed with ctz64. Invert the result of
1861 * ctz64 to match the indexing used by PowerISA.
1863 bit = 7 - ctzl(imm);
1864 if (bit & 0x4) {
1865 conj = a->s128;
1866 } else {
1867 conj = int128_not(a->s128);
1869 if (bit & 0x2) {
1870 conj = int128_and(conj, b->s128);
1871 } else {
1872 conj = int128_and(conj, int128_not(b->s128));
1874 if (bit & 0x1) {
1875 conj = int128_and(conj, c->s128);
1876 } else {
1877 conj = int128_and(conj, int128_not(c->s128));
1879 disj = int128_or(disj, conj);
1881 /* Unset the least significant bit that is set */
1882 imm &= imm - 1;
1885 t->s128 = disj;
1888 #define XXBLEND(name, sz) \
1889 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1890 ppc_avr_t *c, uint32_t desc) \
1892 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1893 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1894 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1897 XXBLEND(B, 8)
1898 XXBLEND(H, 16)
1899 XXBLEND(W, 32)
1900 XXBLEND(D, 64)
1901 #undef XXBLEND
1903 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1905 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1907 #if HOST_BIG_ENDIAN
1908 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1909 memset(&r->u8[0], 0, sh);
1910 #else
1911 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1912 memset(&r->u8[16 - sh], 0, sh);
1913 #endif
1916 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1918 int64_t t;
1919 int i, upper;
1920 ppc_avr_t result;
1921 int sat = 0;
1923 upper = ARRAY_SIZE(r->s32) - 1;
1924 t = (int64_t)b->VsrSW(upper);
1925 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1926 t += a->VsrSW(i);
1927 result.VsrSW(i) = 0;
1929 result.VsrSW(upper) = cvtsdsw(t, &sat);
1930 *r = result;
1932 if (sat) {
1933 set_vscr_sat(env);
1937 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1939 int i, j, upper;
1940 ppc_avr_t result;
1941 int sat = 0;
1943 upper = 1;
1944 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1945 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1947 result.VsrD(i) = 0;
1948 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1949 t += a->VsrSW(2 * i + j);
1951 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1954 *r = result;
1955 if (sat) {
1956 set_vscr_sat(env);
1960 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1962 int i, j;
1963 int sat = 0;
1965 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1966 int64_t t = (int64_t)b->s32[i];
1968 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1969 t += a->s8[4 * i + j];
1971 r->s32[i] = cvtsdsw(t, &sat);
1974 if (sat) {
1975 set_vscr_sat(env);
1979 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1981 int sat = 0;
1982 int i;
1984 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1985 int64_t t = (int64_t)b->s32[i];
1987 t += a->s16[2 * i] + a->s16[2 * i + 1];
1988 r->s32[i] = cvtsdsw(t, &sat);
1991 if (sat) {
1992 set_vscr_sat(env);
1996 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1998 int i, j;
1999 int sat = 0;
2001 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2002 uint64_t t = (uint64_t)b->u32[i];
2004 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2005 t += a->u8[4 * i + j];
2007 r->u32[i] = cvtuduw(t, &sat);
2010 if (sat) {
2011 set_vscr_sat(env);
2015 #if HOST_BIG_ENDIAN
2016 #define UPKHI 1
2017 #define UPKLO 0
2018 #else
2019 #define UPKHI 0
2020 #define UPKLO 1
2021 #endif
2022 #define VUPKPX(suffix, hi) \
2023 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2025 int i; \
2026 ppc_avr_t result; \
2028 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2029 uint16_t e = b->u16[hi ? i : i + 4]; \
2030 uint8_t a = (e >> 15) ? 0xff : 0; \
2031 uint8_t r = (e >> 10) & 0x1f; \
2032 uint8_t g = (e >> 5) & 0x1f; \
2033 uint8_t b = e & 0x1f; \
2035 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2037 *r = result; \
2039 VUPKPX(lpx, UPKLO)
2040 VUPKPX(hpx, UPKHI)
2041 #undef VUPKPX
2043 #define VUPK(suffix, unpacked, packee, hi) \
2044 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2046 int i; \
2047 ppc_avr_t result; \
2049 if (hi) { \
2050 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2051 result.unpacked[i] = b->packee[i]; \
2053 } else { \
2054 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2055 i++) { \
2056 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2059 *r = result; \
2061 VUPK(hsb, s16, s8, UPKHI)
2062 VUPK(hsh, s32, s16, UPKHI)
2063 VUPK(hsw, s64, s32, UPKHI)
2064 VUPK(lsb, s16, s8, UPKLO)
2065 VUPK(lsh, s32, s16, UPKLO)
2066 VUPK(lsw, s64, s32, UPKLO)
2067 #undef VUPK
2068 #undef UPKHI
2069 #undef UPKLO
2071 #define VGENERIC_DO(name, element) \
2072 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2074 int i; \
2076 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2077 r->element[i] = name(b->element[i]); \
2081 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2082 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2084 VGENERIC_DO(clzb, u8)
2085 VGENERIC_DO(clzh, u16)
2087 #undef clzb
2088 #undef clzh
2090 #define ctzb(v) ((v) ? ctz32(v) : 8)
2091 #define ctzh(v) ((v) ? ctz32(v) : 16)
2092 #define ctzw(v) ctz32((v))
2093 #define ctzd(v) ctz64((v))
2095 VGENERIC_DO(ctzb, u8)
2096 VGENERIC_DO(ctzh, u16)
2097 VGENERIC_DO(ctzw, u32)
2098 VGENERIC_DO(ctzd, u64)
2100 #undef ctzb
2101 #undef ctzh
2102 #undef ctzw
2103 #undef ctzd
2105 #define popcntb(v) ctpop8(v)
2106 #define popcnth(v) ctpop16(v)
2107 #define popcntw(v) ctpop32(v)
2108 #define popcntd(v) ctpop64(v)
2110 VGENERIC_DO(popcntb, u8)
2111 VGENERIC_DO(popcnth, u16)
2112 VGENERIC_DO(popcntw, u32)
2113 VGENERIC_DO(popcntd, u64)
2115 #undef popcntb
2116 #undef popcnth
2117 #undef popcntw
2118 #undef popcntd
2120 #undef VGENERIC_DO
2122 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2124 r->s128 = int128_add(a->s128, b->s128);
2127 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2129 r->s128 = int128_add(int128_add(a->s128, b->s128),
2130 int128_make64(int128_getlo(c->s128) & 1));
2133 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2135 r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
2136 r->VsrD(0) = 0;
2139 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2141 bool carry_out = int128_ult(int128_not(a->s128), b->s128),
2142 carry_in = int128_getlo(c->s128) & 1;
2144 if (!carry_out && carry_in) {
2145 carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
2146 int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
2149 r->VsrD(0) = 0;
2150 r->VsrD(1) = carry_out;
2153 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2155 r->s128 = int128_sub(a->s128, b->s128);
2158 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2160 r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
2161 int128_make64(int128_getlo(c->s128) & 1));
2164 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2166 Int128 tmp = int128_not(b->s128);
2168 r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
2169 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
2170 r->VsrD(0) = 0;
2173 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2175 Int128 tmp = int128_not(b->s128);
2176 bool carry_out = int128_ult(int128_not(a->s128), tmp),
2177 carry_in = int128_getlo(c->s128) & 1;
2179 r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
2180 int128_makes64(-1)));
2181 r->VsrD(0) = 0;
2184 #define BCD_PLUS_PREF_1 0xC
2185 #define BCD_PLUS_PREF_2 0xF
2186 #define BCD_PLUS_ALT_1 0xA
2187 #define BCD_NEG_PREF 0xD
2188 #define BCD_NEG_ALT 0xB
2189 #define BCD_PLUS_ALT_2 0xE
2190 #define NATIONAL_PLUS 0x2B
2191 #define NATIONAL_NEG 0x2D
2193 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2195 static int bcd_get_sgn(ppc_avr_t *bcd)
2197 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2198 case BCD_PLUS_PREF_1:
2199 case BCD_PLUS_PREF_2:
2200 case BCD_PLUS_ALT_1:
2201 case BCD_PLUS_ALT_2:
2203 return 1;
2206 case BCD_NEG_PREF:
2207 case BCD_NEG_ALT:
2209 return -1;
2212 default:
2214 return 0;
2219 static int bcd_preferred_sgn(int sgn, int ps)
2221 if (sgn >= 0) {
2222 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2223 } else {
2224 return BCD_NEG_PREF;
2228 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2230 uint8_t result;
2231 if (n & 1) {
2232 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2233 } else {
2234 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2237 if (unlikely(result > 9)) {
2238 *invalid = true;
2240 return result;
2243 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2245 if (n & 1) {
2246 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2247 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2248 } else {
2249 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2250 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2254 static bool bcd_is_valid(ppc_avr_t *bcd)
2256 int i;
2257 int invalid = 0;
2259 if (bcd_get_sgn(bcd) == 0) {
2260 return false;
2263 for (i = 1; i < 32; i++) {
2264 bcd_get_digit(bcd, i, &invalid);
2265 if (unlikely(invalid)) {
2266 return false;
2269 return true;
2272 static int bcd_cmp_zero(ppc_avr_t *bcd)
2274 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2275 return CRF_EQ;
2276 } else {
2277 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2281 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2283 return reg->VsrH(7 - n);
2286 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2288 reg->VsrH(7 - n) = val;
2291 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2293 int i;
2294 int invalid = 0;
2295 for (i = 31; i > 0; i--) {
2296 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2297 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2298 if (unlikely(invalid)) {
2299 return 0; /* doesn't matter */
2300 } else if (dig_a > dig_b) {
2301 return 1;
2302 } else if (dig_a < dig_b) {
2303 return -1;
2307 return 0;
2310 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2311 int *overflow)
2313 int carry = 0;
2314 int i;
2315 int is_zero = 1;
2317 for (i = 1; i <= 31; i++) {
2318 uint8_t digit = bcd_get_digit(a, i, invalid) +
2319 bcd_get_digit(b, i, invalid) + carry;
2320 is_zero &= (digit == 0);
2321 if (digit > 9) {
2322 carry = 1;
2323 digit -= 10;
2324 } else {
2325 carry = 0;
2328 bcd_put_digit(t, digit, i);
2331 *overflow = carry;
2332 return is_zero;
2335 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2336 int *overflow)
2338 int carry = 0;
2339 int i;
2341 for (i = 1; i <= 31; i++) {
2342 uint8_t digit = bcd_get_digit(a, i, invalid) -
2343 bcd_get_digit(b, i, invalid) + carry;
2344 if (digit & 0x80) {
2345 carry = -1;
2346 digit += 10;
2347 } else {
2348 carry = 0;
2351 bcd_put_digit(t, digit, i);
2354 *overflow = carry;
2357 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2360 int sgna = bcd_get_sgn(a);
2361 int sgnb = bcd_get_sgn(b);
2362 int invalid = (sgna == 0) || (sgnb == 0);
2363 int overflow = 0;
2364 int zero = 0;
2365 uint32_t cr = 0;
2366 ppc_avr_t result = { .u64 = { 0, 0 } };
2368 if (!invalid) {
2369 if (sgna == sgnb) {
2370 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2371 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2372 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2373 } else {
2374 int magnitude = bcd_cmp_mag(a, b);
2375 if (magnitude > 0) {
2376 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2377 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2378 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2379 } else if (magnitude < 0) {
2380 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2381 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2382 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2383 } else {
2384 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2385 cr = CRF_EQ;
2390 if (unlikely(invalid)) {
2391 result.VsrD(0) = result.VsrD(1) = -1;
2392 cr = CRF_SO;
2393 } else if (overflow) {
2394 cr |= CRF_SO;
2395 } else if (zero) {
2396 cr |= CRF_EQ;
2399 *r = result;
2401 return cr;
2404 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2406 ppc_avr_t bcopy = *b;
2407 int sgnb = bcd_get_sgn(b);
2408 if (sgnb < 0) {
2409 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2410 } else if (sgnb > 0) {
2411 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2413 /* else invalid ... defer to bcdadd code for proper handling */
2415 return helper_bcdadd(r, a, &bcopy, ps);
2418 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2420 int i;
2421 int cr = 0;
2422 uint16_t national = 0;
2423 uint16_t sgnb = get_national_digit(b, 0);
2424 ppc_avr_t ret = { .u64 = { 0, 0 } };
2425 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2427 for (i = 1; i < 8; i++) {
2428 national = get_national_digit(b, i);
2429 if (unlikely(national < 0x30 || national > 0x39)) {
2430 invalid = 1;
2431 break;
2434 bcd_put_digit(&ret, national & 0xf, i);
2437 if (sgnb == NATIONAL_PLUS) {
2438 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2439 } else {
2440 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2443 cr = bcd_cmp_zero(&ret);
2445 if (unlikely(invalid)) {
2446 cr = CRF_SO;
2449 *r = ret;
2451 return cr;
2454 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2456 int i;
2457 int cr = 0;
2458 int sgnb = bcd_get_sgn(b);
2459 int invalid = (sgnb == 0);
2460 ppc_avr_t ret = { .u64 = { 0, 0 } };
2462 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2464 for (i = 1; i < 8; i++) {
2465 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2467 if (unlikely(invalid)) {
2468 break;
2471 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2473 cr = bcd_cmp_zero(b);
2475 if (ox_flag) {
2476 cr |= CRF_SO;
2479 if (unlikely(invalid)) {
2480 cr = CRF_SO;
2483 *r = ret;
2485 return cr;
2488 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2490 int i;
2491 int cr = 0;
2492 int invalid = 0;
2493 int zone_digit = 0;
2494 int zone_lead = ps ? 0xF : 0x3;
2495 int digit = 0;
2496 ppc_avr_t ret = { .u64 = { 0, 0 } };
2497 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2499 if (unlikely((sgnb < 0xA) && ps)) {
2500 invalid = 1;
2503 for (i = 0; i < 16; i++) {
2504 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2505 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2506 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2507 invalid = 1;
2508 break;
2511 bcd_put_digit(&ret, digit, i + 1);
2514 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2515 (!ps && (sgnb & 0x4))) {
2516 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2517 } else {
2518 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2521 cr = bcd_cmp_zero(&ret);
2523 if (unlikely(invalid)) {
2524 cr = CRF_SO;
2527 *r = ret;
2529 return cr;
2532 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2534 int i;
2535 int cr = 0;
2536 uint8_t digit = 0;
2537 int sgnb = bcd_get_sgn(b);
2538 int zone_lead = (ps) ? 0xF0 : 0x30;
2539 int invalid = (sgnb == 0);
2540 ppc_avr_t ret = { .u64 = { 0, 0 } };
2542 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2544 for (i = 0; i < 16; i++) {
2545 digit = bcd_get_digit(b, i + 1, &invalid);
2547 if (unlikely(invalid)) {
2548 break;
2551 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2554 if (ps) {
2555 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2556 } else {
2557 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2560 cr = bcd_cmp_zero(b);
2562 if (ox_flag) {
2563 cr |= CRF_SO;
2566 if (unlikely(invalid)) {
2567 cr = CRF_SO;
2570 *r = ret;
2572 return cr;
2576 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2578 * Returns:
2579 * > 0 if ahi|alo > bhi|blo,
2580 * 0 if ahi|alo == bhi|blo,
2581 * < 0 if ahi|alo < bhi|blo
2583 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2584 uint64_t blo, uint64_t bhi)
2586 return (ahi == bhi) ?
2587 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2588 (ahi > bhi ? 1 : -1);
2591 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2593 int i;
2594 int cr;
2595 uint64_t lo_value;
2596 uint64_t hi_value;
2597 uint64_t rem;
2598 ppc_avr_t ret = { .u64 = { 0, 0 } };
2600 if (b->VsrSD(0) < 0) {
2601 lo_value = -b->VsrSD(1);
2602 hi_value = ~b->VsrD(0) + !lo_value;
2603 bcd_put_digit(&ret, 0xD, 0);
2605 cr = CRF_LT;
2606 } else {
2607 lo_value = b->VsrD(1);
2608 hi_value = b->VsrD(0);
2609 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2611 if (hi_value == 0 && lo_value == 0) {
2612 cr = CRF_EQ;
2613 } else {
2614 cr = CRF_GT;
2619 * Check src limits: abs(src) <= 10^31 - 1
2621 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2623 if (ucmp128(lo_value, hi_value,
2624 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2625 cr |= CRF_SO;
2628 * According to the ISA, if src wouldn't fit in the destination
2629 * register, the result is undefined.
2630 * In that case, we leave r unchanged.
2632 } else {
2633 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2635 for (i = 1; i < 16; rem /= 10, i++) {
2636 bcd_put_digit(&ret, rem % 10, i);
2639 for (; i < 32; lo_value /= 10, i++) {
2640 bcd_put_digit(&ret, lo_value % 10, i);
2643 *r = ret;
2646 return cr;
2649 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2651 uint8_t i;
2652 int cr;
2653 uint64_t carry;
2654 uint64_t unused;
2655 uint64_t lo_value;
2656 uint64_t hi_value = 0;
2657 int sgnb = bcd_get_sgn(b);
2658 int invalid = (sgnb == 0);
2660 lo_value = bcd_get_digit(b, 31, &invalid);
2661 for (i = 30; i > 0; i--) {
2662 mulu64(&lo_value, &carry, lo_value, 10ULL);
2663 mulu64(&hi_value, &unused, hi_value, 10ULL);
2664 lo_value += bcd_get_digit(b, i, &invalid);
2665 hi_value += carry;
2667 if (unlikely(invalid)) {
2668 break;
2672 if (sgnb == -1) {
2673 r->VsrSD(1) = -lo_value;
2674 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2675 } else {
2676 r->VsrSD(1) = lo_value;
2677 r->VsrSD(0) = hi_value;
2680 cr = bcd_cmp_zero(b);
2682 if (unlikely(invalid)) {
2683 cr = CRF_SO;
2686 return cr;
2689 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2691 int i;
2692 int invalid = 0;
2694 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2695 return CRF_SO;
2698 *r = *a;
2699 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2701 for (i = 1; i < 32; i++) {
2702 bcd_get_digit(a, i, &invalid);
2703 bcd_get_digit(b, i, &invalid);
2704 if (unlikely(invalid)) {
2705 return CRF_SO;
2709 return bcd_cmp_zero(r);
2712 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2714 int sgnb = bcd_get_sgn(b);
2716 *r = *b;
2717 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2719 if (bcd_is_valid(b) == false) {
2720 return CRF_SO;
2723 return bcd_cmp_zero(r);
2726 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2728 int cr;
2729 int i = a->VsrSB(7);
2730 bool ox_flag = false;
2731 int sgnb = bcd_get_sgn(b);
2732 ppc_avr_t ret = *b;
2733 ret.VsrD(1) &= ~0xf;
2735 if (bcd_is_valid(b) == false) {
2736 return CRF_SO;
2739 if (unlikely(i > 31)) {
2740 i = 31;
2741 } else if (unlikely(i < -31)) {
2742 i = -31;
2745 if (i > 0) {
2746 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2747 } else {
2748 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2750 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2752 *r = ret;
2754 cr = bcd_cmp_zero(r);
2755 if (ox_flag) {
2756 cr |= CRF_SO;
2759 return cr;
2762 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2764 int cr;
2765 int i;
2766 int invalid = 0;
2767 bool ox_flag = false;
2768 ppc_avr_t ret = *b;
2770 for (i = 0; i < 32; i++) {
2771 bcd_get_digit(b, i, &invalid);
2773 if (unlikely(invalid)) {
2774 return CRF_SO;
2778 i = a->VsrSB(7);
2779 if (i >= 32) {
2780 ox_flag = true;
2781 ret.VsrD(1) = ret.VsrD(0) = 0;
2782 } else if (i <= -32) {
2783 ret.VsrD(1) = ret.VsrD(0) = 0;
2784 } else if (i > 0) {
2785 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2786 } else {
2787 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2789 *r = ret;
2791 cr = bcd_cmp_zero(r);
2792 if (ox_flag) {
2793 cr |= CRF_SO;
2796 return cr;
2799 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2801 int cr;
2802 int unused = 0;
2803 int invalid = 0;
2804 bool ox_flag = false;
2805 int sgnb = bcd_get_sgn(b);
2806 ppc_avr_t ret = *b;
2807 ret.VsrD(1) &= ~0xf;
2809 int i = a->VsrSB(7);
2810 ppc_avr_t bcd_one;
2812 bcd_one.VsrD(0) = 0;
2813 bcd_one.VsrD(1) = 0x10;
2815 if (bcd_is_valid(b) == false) {
2816 return CRF_SO;
2819 if (unlikely(i > 31)) {
2820 i = 31;
2821 } else if (unlikely(i < -31)) {
2822 i = -31;
2825 if (i > 0) {
2826 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2827 } else {
2828 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2830 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2831 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2834 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2836 cr = bcd_cmp_zero(&ret);
2837 if (ox_flag) {
2838 cr |= CRF_SO;
2840 *r = ret;
2842 return cr;
2845 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2847 uint64_t mask;
2848 uint32_t ox_flag = 0;
2849 int i = a->VsrSH(3) + 1;
2850 ppc_avr_t ret = *b;
2852 if (bcd_is_valid(b) == false) {
2853 return CRF_SO;
2856 if (i > 16 && i < 32) {
2857 mask = (uint64_t)-1 >> (128 - i * 4);
2858 if (ret.VsrD(0) & ~mask) {
2859 ox_flag = CRF_SO;
2862 ret.VsrD(0) &= mask;
2863 } else if (i >= 0 && i <= 16) {
2864 mask = (uint64_t)-1 >> (64 - i * 4);
2865 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2866 ox_flag = CRF_SO;
2869 ret.VsrD(1) &= mask;
2870 ret.VsrD(0) = 0;
2872 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2873 *r = ret;
2875 return bcd_cmp_zero(&ret) | ox_flag;
2878 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2880 int i;
2881 uint64_t mask;
2882 uint32_t ox_flag = 0;
2883 int invalid = 0;
2884 ppc_avr_t ret = *b;
2886 for (i = 0; i < 32; i++) {
2887 bcd_get_digit(b, i, &invalid);
2889 if (unlikely(invalid)) {
2890 return CRF_SO;
2894 i = a->VsrSH(3);
2895 if (i > 16 && i < 33) {
2896 mask = (uint64_t)-1 >> (128 - i * 4);
2897 if (ret.VsrD(0) & ~mask) {
2898 ox_flag = CRF_SO;
2901 ret.VsrD(0) &= mask;
2902 } else if (i > 0 && i <= 16) {
2903 mask = (uint64_t)-1 >> (64 - i * 4);
2904 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2905 ox_flag = CRF_SO;
2908 ret.VsrD(1) &= mask;
2909 ret.VsrD(0) = 0;
2910 } else if (i == 0) {
2911 if (ret.VsrD(0) || ret.VsrD(1)) {
2912 ox_flag = CRF_SO;
2914 ret.VsrD(0) = ret.VsrD(1) = 0;
2917 *r = ret;
2918 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2919 return ox_flag | CRF_EQ;
2922 return ox_flag | CRF_GT;
2925 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2927 int i;
2928 VECTOR_FOR_INORDER_I(i, u8) {
2929 r->u8[i] = AES_sbox[a->u8[i]];
2933 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2935 ppc_avr_t result;
2936 int i;
2938 VECTOR_FOR_INORDER_I(i, u32) {
2939 result.VsrW(i) = b->VsrW(i) ^
2940 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2941 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2942 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2943 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2945 *r = result;
2948 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2950 ppc_avr_t result;
2951 int i;
2953 VECTOR_FOR_INORDER_I(i, u8) {
2954 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2956 *r = result;
2959 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2961 /* This differs from what is written in ISA V2.07. The RTL is */
2962 /* incorrect and will be fixed in V2.07B. */
2963 int i;
2964 ppc_avr_t tmp;
2966 VECTOR_FOR_INORDER_I(i, u8) {
2967 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2970 VECTOR_FOR_INORDER_I(i, u32) {
2971 r->VsrW(i) =
2972 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2973 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2974 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2975 AES_imc[tmp.VsrB(4 * i + 3)][3];
2979 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2981 ppc_avr_t result;
2982 int i;
2984 VECTOR_FOR_INORDER_I(i, u8) {
2985 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2987 *r = result;
2990 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2992 int st = (st_six & 0x10) != 0;
2993 int six = st_six & 0xF;
2994 int i;
2996 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2997 if (st == 0) {
2998 if ((six & (0x8 >> i)) == 0) {
2999 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3000 ror32(a->VsrW(i), 18) ^
3001 (a->VsrW(i) >> 3);
3002 } else { /* six.bit[i] == 1 */
3003 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3004 ror32(a->VsrW(i), 19) ^
3005 (a->VsrW(i) >> 10);
3007 } else { /* st == 1 */
3008 if ((six & (0x8 >> i)) == 0) {
3009 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3010 ror32(a->VsrW(i), 13) ^
3011 ror32(a->VsrW(i), 22);
3012 } else { /* six.bit[i] == 1 */
3013 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3014 ror32(a->VsrW(i), 11) ^
3015 ror32(a->VsrW(i), 25);
3021 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3023 int st = (st_six & 0x10) != 0;
3024 int six = st_six & 0xF;
3025 int i;
3027 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3028 if (st == 0) {
3029 if ((six & (0x8 >> (2 * i))) == 0) {
3030 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3031 ror64(a->VsrD(i), 8) ^
3032 (a->VsrD(i) >> 7);
3033 } else { /* six.bit[2*i] == 1 */
3034 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3035 ror64(a->VsrD(i), 61) ^
3036 (a->VsrD(i) >> 6);
3038 } else { /* st == 1 */
3039 if ((six & (0x8 >> (2 * i))) == 0) {
3040 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3041 ror64(a->VsrD(i), 34) ^
3042 ror64(a->VsrD(i), 39);
3043 } else { /* six.bit[2*i] == 1 */
3044 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3045 ror64(a->VsrD(i), 18) ^
3046 ror64(a->VsrD(i), 41);
3052 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3054 ppc_avr_t result;
3055 int i;
3057 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3058 int indexA = c->VsrB(i) >> 4;
3059 int indexB = c->VsrB(i) & 0xF;
3061 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3063 *r = result;
3066 #undef VECTOR_FOR_INORDER_I
3068 /*****************************************************************************/
3069 /* SPE extension helpers */
3070 /* Use a table to make this quicker */
3071 static const uint8_t hbrev[16] = {
3072 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3073 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3076 static inline uint8_t byte_reverse(uint8_t val)
3078 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3081 static inline uint32_t word_reverse(uint32_t val)
3083 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3084 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3087 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3088 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3090 uint32_t a, b, d, mask;
3092 mask = UINT32_MAX >> (32 - MASKBITS);
3093 a = arg1 & mask;
3094 b = arg2 & mask;
3095 d = word_reverse(1 + word_reverse(a | ~b));
3096 return (arg1 & ~mask) | (d & b);
3099 uint32_t helper_cntlsw32(uint32_t val)
3101 if (val & 0x80000000) {
3102 return clz32(~val);
3103 } else {
3104 return clz32(val);
3108 uint32_t helper_cntlzw32(uint32_t val)
3110 return clz32(val);
3113 /* 440 specific */
3114 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3115 target_ulong low, uint32_t update_Rc)
3117 target_ulong mask;
3118 int i;
3120 i = 1;
3121 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3122 if ((high & mask) == 0) {
3123 if (update_Rc) {
3124 env->crf[0] = 0x4;
3126 goto done;
3128 i++;
3130 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3131 if ((low & mask) == 0) {
3132 if (update_Rc) {
3133 env->crf[0] = 0x8;
3135 goto done;
3137 i++;
3139 i = 8;
3140 if (update_Rc) {
3141 env->crf[0] = 0x2;
3143 done:
3144 env->xer = (env->xer & ~0x7F) | i;
3145 if (update_Rc) {
3146 env->crf[0] |= xer_so;
3148 return i;