target/ppc: initialize 'val' union in kvm_get_one_spr()
[qemu.git] / target / ppc / int_helper.c
blob8c1674510bb3335e36cedde516fad49e63ae9f47
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
31 #include "tcg/tcg-gvec-desc.h"
33 #include "helper_regs.h"
34 /*****************************************************************************/
35 /* Fixed point operations helpers */
37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
39 if (unlikely(ov)) {
40 env->so = env->ov = 1;
41 } else {
42 env->ov = 0;
46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint32_t oe)
49 uint64_t rt = 0;
50 int overflow = 0;
52 uint64_t dividend = (uint64_t)ra << 32;
53 uint64_t divisor = (uint32_t)rb;
55 if (unlikely(divisor == 0)) {
56 overflow = 1;
57 } else {
58 rt = dividend / divisor;
59 overflow = rt > UINT32_MAX;
62 if (unlikely(overflow)) {
63 rt = 0; /* Undefined */
66 if (oe) {
67 helper_update_ov_legacy(env, overflow);
70 return (target_ulong)rt;
73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 uint32_t oe)
76 int64_t rt = 0;
77 int overflow = 0;
79 int64_t dividend = (int64_t)ra << 32;
80 int64_t divisor = (int64_t)((int32_t)rb);
82 if (unlikely((divisor == 0) ||
83 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 overflow = 1;
85 } else {
86 rt = dividend / divisor;
87 overflow = rt != (int32_t)rt;
90 if (unlikely(overflow)) {
91 rt = 0; /* Undefined */
94 if (oe) {
95 helper_update_ov_legacy(env, overflow);
98 return (target_ulong)rt;
101 #if defined(TARGET_PPC64)
103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
105 uint64_t rt = 0;
106 int overflow = 0;
108 if (unlikely(rb == 0 || ra >= rb)) {
109 overflow = 1;
110 rt = 0; /* Undefined */
111 } else {
112 divu128(&rt, &ra, rb);
115 if (oe) {
116 helper_update_ov_legacy(env, overflow);
119 return rt;
122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
124 uint64_t rt = 0;
125 int64_t ra = (int64_t)rau;
126 int64_t rb = (int64_t)rbu;
127 int overflow = 0;
129 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
130 overflow = 1;
131 rt = 0; /* Undefined */
132 } else {
133 divs128(&rt, &ra, rb);
136 if (oe) {
137 helper_update_ov_legacy(env, overflow);
140 return rt;
143 #endif
146 #if defined(TARGET_PPC64)
147 /* if x = 0xab, returns 0xababababababababa */
148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
151 * subtract 1 from each byte, and with inverse, check if MSB is set at each
152 * byte.
153 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
154 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
158 /* When you XOR the pattern and there is a match, that byte will be zero */
159 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
163 return hasvalue(rb, ra) ? CRF_GT : 0;
166 #undef pattern
167 #undef haszero
168 #undef hasvalue
171 * Return a random number.
173 uint64_t helper_darn32(void)
175 Error *err = NULL;
176 uint32_t ret;
178 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
179 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
180 error_get_pretty(err));
181 error_free(err);
182 return -1;
185 return ret;
188 uint64_t helper_darn64(void)
190 Error *err = NULL;
191 uint64_t ret;
193 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
194 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
195 error_get_pretty(err));
196 error_free(err);
197 return -1;
200 return ret;
203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
205 int i;
206 uint64_t ra = 0;
208 for (i = 0; i < 8; i++) {
209 int index = (rs >> (i * 8)) & 0xFF;
210 if (index < 64) {
211 if (rb & PPC_BIT(index)) {
212 ra |= 1 << i;
216 return ra;
219 #endif
221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
223 target_ulong mask = 0xff;
224 target_ulong ra = 0;
225 int i;
227 for (i = 0; i < sizeof(target_ulong); i++) {
228 if ((rs & mask) == (rb & mask)) {
229 ra |= mask;
231 mask <<= 8;
233 return ra;
236 /* shift right arithmetic helper */
237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
238 target_ulong shift)
240 int32_t ret;
242 if (likely(!(shift & 0x20))) {
243 if (likely((uint32_t)shift != 0)) {
244 shift &= 0x1f;
245 ret = (int32_t)value >> shift;
246 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
247 env->ca32 = env->ca = 0;
248 } else {
249 env->ca32 = env->ca = 1;
251 } else {
252 ret = (int32_t)value;
253 env->ca32 = env->ca = 0;
255 } else {
256 ret = (int32_t)value >> 31;
257 env->ca32 = env->ca = (ret != 0);
259 return (target_long)ret;
262 #if defined(TARGET_PPC64)
263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
264 target_ulong shift)
266 int64_t ret;
268 if (likely(!(shift & 0x40))) {
269 if (likely((uint64_t)shift != 0)) {
270 shift &= 0x3f;
271 ret = (int64_t)value >> shift;
272 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
273 env->ca32 = env->ca = 0;
274 } else {
275 env->ca32 = env->ca = 1;
277 } else {
278 ret = (int64_t)value;
279 env->ca32 = env->ca = 0;
281 } else {
282 ret = (int64_t)value >> 63;
283 env->ca32 = env->ca = (ret != 0);
285 return ret;
287 #endif
289 #if defined(TARGET_PPC64)
290 target_ulong helper_popcntb(target_ulong val)
292 /* Note that we don't fold past bytes */
293 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
294 0x5555555555555555ULL);
295 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
296 0x3333333333333333ULL);
297 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
298 0x0f0f0f0f0f0f0f0fULL);
299 return val;
302 target_ulong helper_popcntw(target_ulong val)
304 /* Note that we don't fold past words. */
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
317 #else
318 target_ulong helper_popcntb(target_ulong val)
320 /* Note that we don't fold past bytes */
321 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
322 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
323 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
324 return val;
326 #endif
328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
331 * Instead of processing the mask bit-by-bit from the most significant to
332 * the least significant bit, as described in PowerISA, we'll handle it in
333 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
334 * ctz or cto, we negate the mask at the end of the loop.
336 target_ulong m, left = 0, right = 0;
337 unsigned int n, i = 64;
338 bool bit = false; /* tracks if we are processing zeros or ones */
340 if (mask == 0 || mask == -1) {
341 return src;
344 /* Processes the mask in blocks, from LSB to MSB */
345 while (i) {
346 /* Find how many bits we should take */
347 n = ctz64(mask);
348 if (n > i) {
349 n = i;
353 * Extracts 'n' trailing bits of src and put them on the leading 'n'
354 * bits of 'right' or 'left', pushing down the previously extracted
355 * values.
357 m = (1ll << n) - 1;
358 if (bit) {
359 right = ror64(right | (src & m), n);
360 } else {
361 left = ror64(left | (src & m), n);
365 * Discards the processed bits from 'src' and 'mask'. Note that we are
366 * removing 'n' trailing zeros from 'mask', but the logical shift will
367 * add 'n' leading zeros back, so the population count of 'mask' is kept
368 * the same.
370 src >>= n;
371 mask >>= n;
372 i -= n;
373 bit = !bit;
374 mask = ~mask;
378 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
379 * we'll shift it more 64-ctpop(mask) times.
381 if (bit) {
382 n = ctpop64(mask);
383 } else {
384 n = 64 - ctpop64(mask);
387 return left | (right >> n);
390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
392 int i, o;
393 uint64_t result = 0;
395 if (mask == -1) {
396 return src;
399 for (i = 0; mask != 0; i++) {
400 o = ctz64(mask);
401 mask &= mask - 1;
402 result |= ((src >> i) & 1) << o;
405 return result;
408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
410 int i, o;
411 uint64_t result = 0;
413 if (mask == -1) {
414 return src;
417 for (o = 0; mask != 0; o++) {
418 i = ctz64(mask);
419 mask &= mask - 1;
420 result |= ((src >> i) & 1) << o;
423 return result;
426 /*****************************************************************************/
427 /* Altivec extension helpers */
428 #if HOST_BIG_ENDIAN
429 #define VECTOR_FOR_INORDER_I(index, element) \
430 for (index = 0; index < ARRAY_SIZE(r->element); index++)
431 #else
432 #define VECTOR_FOR_INORDER_I(index, element) \
433 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
434 #endif
436 /* Saturating arithmetic helpers. */
437 #define SATCVT(from, to, from_type, to_type, min, max) \
438 static inline to_type cvt##from##to(from_type x, int *sat) \
440 to_type r; \
442 if (x < (from_type)min) { \
443 r = min; \
444 *sat = 1; \
445 } else if (x > (from_type)max) { \
446 r = max; \
447 *sat = 1; \
448 } else { \
449 r = x; \
451 return r; \
453 #define SATCVTU(from, to, from_type, to_type, min, max) \
454 static inline to_type cvt##from##to(from_type x, int *sat) \
456 to_type r; \
458 if (x > (from_type)max) { \
459 r = max; \
460 *sat = 1; \
461 } else { \
462 r = x; \
464 return r; \
466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
476 #undef SATCVT
477 #undef SATCVTU
479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
481 ppc_store_vscr(env, vscr);
484 uint32_t helper_mfvscr(CPUPPCState *env)
486 return ppc_get_vscr(env);
489 static inline void set_vscr_sat(CPUPPCState *env)
491 /* The choice of non-zero value is arbitrary. */
492 env->vscr_sat.u32[0] = 1;
495 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
497 int i;
499 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
500 r->u32[i] = ~a->u32[i] < b->u32[i];
504 /* vprtybw */
505 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
507 int i;
508 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
509 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
510 res ^= res >> 8;
511 r->u32[i] = res & 1;
515 /* vprtybd */
516 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
518 int i;
519 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
520 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
521 res ^= res >> 16;
522 res ^= res >> 8;
523 r->u64[i] = res & 1;
527 /* vprtybq */
528 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
530 uint64_t res = b->u64[0] ^ b->u64[1];
531 res ^= res >> 32;
532 res ^= res >> 16;
533 res ^= res >> 8;
534 r->VsrD(1) = res & 1;
535 r->VsrD(0) = 0;
538 #define VARITHFP(suffix, func) \
539 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
542 int i; \
544 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
545 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
548 VARITHFP(addfp, float32_add)
549 VARITHFP(subfp, float32_sub)
550 VARITHFP(minfp, float32_min)
551 VARITHFP(maxfp, float32_max)
552 #undef VARITHFP
554 #define VARITHFPFMA(suffix, type) \
555 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
556 ppc_avr_t *b, ppc_avr_t *c) \
558 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
560 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
561 type, &env->vec_status); \
564 VARITHFPFMA(maddfp, 0);
565 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
566 #undef VARITHFPFMA
568 #define VARITHSAT_CASE(type, op, cvt, element) \
570 type result = (type)a->element[i] op (type)b->element[i]; \
571 r->element[i] = cvt(result, &sat); \
574 #define VARITHSAT_DO(name, op, optype, cvt, element) \
575 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
576 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
578 int sat = 0; \
579 int i; \
581 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
582 VARITHSAT_CASE(optype, op, cvt, element); \
584 if (sat) { \
585 vscr_sat->u32[0] = 1; \
588 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
589 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
590 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
591 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
592 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
593 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
594 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
595 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
596 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
597 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
598 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
599 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
600 #undef VARITHSAT_CASE
601 #undef VARITHSAT_DO
602 #undef VARITHSAT_SIGNED
603 #undef VARITHSAT_UNSIGNED
605 #define VAVG_DO(name, element, etype) \
606 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
608 int i; \
610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
611 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
612 r->element[i] = x >> 1; \
616 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 unsigned_type) \
618 VAVG_DO(avgs##type, signed_element, signed_type) \
619 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
620 VAVG(b, s8, int16_t, u8, uint16_t)
621 VAVG(h, s16, int32_t, u16, uint32_t)
622 VAVG(w, s32, int64_t, u32, uint64_t)
623 #undef VAVG_DO
624 #undef VAVG
626 #define VABSDU_DO(name, element) \
627 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
629 int i; \
631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
632 r->element[i] = (a->element[i] > b->element[i]) ? \
633 (a->element[i] - b->element[i]) : \
634 (b->element[i] - a->element[i]); \
639 * VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
645 VABSDU(b, u8)
646 VABSDU(h, u16)
647 VABSDU(w, u32)
648 #undef VABSDU_DO
649 #undef VABSDU
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
655 int i; \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
664 #undef VCF
666 #define VCMPNEZ(NAME, ELEM) \
667 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
669 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
670 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
671 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
674 VCMPNEZ(VCMPNEZB, u8)
675 VCMPNEZ(VCMPNEZH, u16)
676 VCMPNEZ(VCMPNEZW, u32)
677 #undef VCMPNEZ
679 #define VCMPFP_DO(suffix, compare, order, record) \
680 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
681 ppc_avr_t *a, ppc_avr_t *b) \
683 uint32_t ones = (uint32_t)-1; \
684 uint32_t all = ones; \
685 uint32_t none = 0; \
686 int i; \
688 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
689 uint32_t result; \
690 FloatRelation rel = \
691 float32_compare_quiet(a->f32[i], b->f32[i], \
692 &env->vec_status); \
693 if (rel == float_relation_unordered) { \
694 result = 0; \
695 } else if (rel compare order) { \
696 result = ones; \
697 } else { \
698 result = 0; \
700 r->u32[i] = result; \
701 all &= result; \
702 none |= result; \
704 if (record) { \
705 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
708 #define VCMPFP(suffix, compare, order) \
709 VCMPFP_DO(suffix, compare, order, 0) \
710 VCMPFP_DO(suffix##_dot, compare, order, 1)
711 VCMPFP(eqfp, ==, float_relation_equal)
712 VCMPFP(gefp, !=, float_relation_less)
713 VCMPFP(gtfp, ==, float_relation_greater)
714 #undef VCMPFP_DO
715 #undef VCMPFP
717 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
718 ppc_avr_t *a, ppc_avr_t *b, int record)
720 int i;
721 int all_in = 0;
723 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
724 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
725 &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 all_in = 1;
729 } else {
730 float32 bneg = float32_chs(b->f32[i]);
731 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
732 &env->vec_status);
733 int le = le_rel != float_relation_greater;
734 int ge = ge_rel != float_relation_less;
736 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
737 all_in |= (!le | !ge);
740 if (record) {
741 env->crf[6] = (all_in == 0) << 1;
745 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
747 vcmpbfp_internal(env, r, a, b, 0);
750 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
751 ppc_avr_t *b)
753 vcmpbfp_internal(env, r, a, b, 1);
756 #define VCT(suffix, satcvt, element) \
757 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
758 ppc_avr_t *b, uint32_t uim) \
760 int i; \
761 int sat = 0; \
762 float_status s = env->vec_status; \
764 set_float_rounding_mode(float_round_to_zero, &s); \
765 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
766 if (float32_is_any_nan(b->f32[i])) { \
767 r->element[i] = 0; \
768 } else { \
769 float64 t = float32_to_float64(b->f32[i], &s); \
770 int64_t j; \
772 t = float64_scalbn(t, uim, &s); \
773 j = float64_to_int64(t, &s); \
774 r->element[i] = satcvt(j, &sat); \
777 if (sat) { \
778 set_vscr_sat(env); \
781 VCT(uxs, cvtsduw, u32)
782 VCT(sxs, cvtsdsw, s32)
783 #undef VCT
785 target_ulong helper_vclzlsbb(ppc_avr_t *r)
787 target_ulong count = 0;
788 int i;
789 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
790 if (r->VsrB(i) & 0x01) {
791 break;
793 count++;
795 return count;
798 target_ulong helper_vctzlsbb(ppc_avr_t *r)
800 target_ulong count = 0;
801 int i;
802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
803 if (r->VsrB(i) & 0x01) {
804 break;
806 count++;
808 return count;
811 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
812 ppc_avr_t *b, ppc_avr_t *c)
814 int sat = 0;
815 int i;
817 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
818 int32_t prod = a->s16[i] * b->s16[i];
819 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
821 r->s16[i] = cvtswsh(t, &sat);
824 if (sat) {
825 set_vscr_sat(env);
829 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
830 ppc_avr_t *b, ppc_avr_t *c)
832 int sat = 0;
833 int i;
835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
836 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
837 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
838 r->s16[i] = cvtswsh(t, &sat);
841 if (sat) {
842 set_vscr_sat(env);
846 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
848 int i;
850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
851 int32_t prod = a->s16[i] * b->s16[i];
852 r->s16[i] = (int16_t) (prod + c->s16[i]);
856 #define VMRG_DO(name, element, access, ofs) \
857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
859 ppc_avr_t result; \
860 int i, half = ARRAY_SIZE(r->element) / 2; \
862 for (i = 0; i < half; i++) { \
863 result.access(i * 2 + 0) = a->access(i + ofs); \
864 result.access(i * 2 + 1) = b->access(i + ofs); \
866 *r = result; \
869 #define VMRG(suffix, element, access) \
870 VMRG_DO(mrgl##suffix, element, access, half) \
871 VMRG_DO(mrgh##suffix, element, access, 0)
872 VMRG(b, u8, VsrB)
873 VMRG(h, u16, VsrH)
874 VMRG(w, u32, VsrW)
875 #undef VMRG_DO
876 #undef VMRG
878 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b, ppc_avr_t *c)
881 int32_t prod[16];
882 int i;
884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
885 prod[i] = (int32_t)a->s8[i] * b->u8[i];
888 VECTOR_FOR_INORDER_I(i, s32) {
889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
890 prod[4 * i + 2] + prod[4 * i + 3];
894 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
897 int32_t prod[8];
898 int i;
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 prod[i] = a->s16[i] * b->s16[i];
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
909 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
912 int32_t prod[8];
913 int i;
914 int sat = 0;
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = (int32_t)a->s16[i] * b->s16[i];
920 VECTOR_FOR_INORDER_I(i, s32) {
921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
923 r->u32[i] = cvtsdsw(t, &sat);
926 if (sat) {
927 set_vscr_sat(env);
931 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
932 ppc_avr_t *b, ppc_avr_t *c)
934 uint16_t prod[16];
935 int i;
937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
938 prod[i] = a->u8[i] * b->u8[i];
941 VECTOR_FOR_INORDER_I(i, u32) {
942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
943 prod[4 * i + 2] + prod[4 * i + 3];
947 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
950 uint32_t prod[8];
951 int i;
953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
954 prod[i] = a->u16[i] * b->u16[i];
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
962 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
963 ppc_avr_t *b, ppc_avr_t *c)
965 uint32_t prod[8];
966 int i;
967 int sat = 0;
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
973 VECTOR_FOR_INORDER_I(i, s32) {
974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
976 r->u32[i] = cvtuduw(t, &sat);
979 if (sat) {
980 set_vscr_sat(env);
984 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
985 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 int i; \
989 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
990 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
991 (cast)b->mul_access(i); \
995 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
996 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
998 int i; \
1000 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1001 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1002 (cast)b->mul_access(i + 1); \
1006 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1008 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1009 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1010 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1011 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1012 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1013 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1014 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1015 #undef VMUL_DO_EVN
1016 #undef VMUL_DO_ODD
1017 #undef VMUL
1019 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1020 target_ulong uim)
1022 int i, idx;
1023 ppc_vsr_t tmp = { .u64 = {0, 0} };
1025 for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1026 if ((pcv->VsrB(i) >> 5) == uim) {
1027 idx = pcv->VsrB(i) & 0x1f;
1028 if (idx < ARRAY_SIZE(t->u8)) {
1029 tmp.VsrB(i) = s0->VsrB(idx);
1030 } else {
1031 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1036 *t = tmp;
1039 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1041 ppc_avr_t result;
1042 int i;
1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1045 int s = c->VsrB(i) & 0x1f;
1046 int index = s & 0xf;
1048 if (s & 0x10) {
1049 result.VsrB(i) = b->VsrB(index);
1050 } else {
1051 result.VsrB(i) = a->VsrB(index);
1054 *r = result;
1057 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1059 ppc_avr_t result;
1060 int i;
1062 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1063 int s = c->VsrB(i) & 0x1f;
1064 int index = 15 - (s & 0xf);
1066 if (s & 0x10) {
1067 result.VsrB(i) = a->VsrB(index);
1068 } else {
1069 result.VsrB(i) = b->VsrB(index);
1072 *r = result;
1075 #define XXGENPCV_BE_EXP(NAME, SZ) \
1076 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1078 ppc_vsr_t tmp; \
1080 /* Initialize tmp with the result of an all-zeros mask */ \
1081 tmp.VsrD(0) = 0x1011121314151617; \
1082 tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \
1084 /* Iterate over the most significant byte of each element */ \
1085 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1086 if (b->VsrB(i) & 0x80) { \
1087 /* Update each byte of the element */ \
1088 for (int k = 0; k < SZ; k++) { \
1089 tmp.VsrB(i + k) = j + k; \
1091 j += SZ; \
1095 *t = tmp; \
1098 #define XXGENPCV_BE_COMP(NAME, SZ) \
1099 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1101 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1103 /* Iterate over the most significant byte of each element */ \
1104 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1105 if (b->VsrB(i) & 0x80) { \
1106 /* Update each byte of the element */ \
1107 for (int k = 0; k < SZ; k++) { \
1108 tmp.VsrB(j + k) = i + k; \
1110 j += SZ; \
1114 *t = tmp; \
1117 #define XXGENPCV_LE_EXP(NAME, SZ) \
1118 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1120 ppc_vsr_t tmp; \
1122 /* Initialize tmp with the result of an all-zeros mask */ \
1123 tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \
1124 tmp.VsrD(1) = 0x1716151413121110; \
1126 /* Iterate over the most significant byte of each element */ \
1127 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1128 /* Reverse indexing of "i" */ \
1129 const int idx = ARRAY_SIZE(b->u8) - i - SZ; \
1130 if (b->VsrB(idx) & 0x80) { \
1131 /* Update each byte of the element */ \
1132 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1133 tmp.VsrB(idx + rk) = j + k; \
1135 j += SZ; \
1139 *t = tmp; \
1142 #define XXGENPCV_LE_COMP(NAME, SZ) \
1143 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1145 ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
1147 /* Iterate over the most significant byte of each element */ \
1148 for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \
1149 if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \
1150 /* Update each byte of the element */ \
1151 for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \
1152 /* Reverse indexing of "j" */ \
1153 const int idx = ARRAY_SIZE(b->u8) - j - SZ; \
1154 tmp.VsrB(idx + rk) = i + k; \
1156 j += SZ; \
1160 *t = tmp; \
1163 #define XXGENPCV(NAME, SZ) \
1164 XXGENPCV_BE_EXP(NAME, SZ) \
1165 XXGENPCV_BE_COMP(NAME, SZ) \
1166 XXGENPCV_LE_EXP(NAME, SZ) \
1167 XXGENPCV_LE_COMP(NAME, SZ) \
1169 XXGENPCV(XXGENPCVBM, 1)
1170 XXGENPCV(XXGENPCVHM, 2)
1171 XXGENPCV(XXGENPCVWM, 4)
1172 XXGENPCV(XXGENPCVDM, 8)
1174 #undef XXGENPCV_BE_EXP
1175 #undef XXGENPCV_BE_COMP
1176 #undef XXGENPCV_LE_EXP
1177 #undef XXGENPCV_LE_COMP
1178 #undef XXGENPCV
1180 #if HOST_BIG_ENDIAN
1181 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1182 #define VBPERMD_INDEX(i) (i)
1183 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1184 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1185 #else
1186 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1187 #define VBPERMD_INDEX(i) (1 - i)
1188 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1189 #define EXTRACT_BIT(avr, i, index) \
1190 (extract64((avr)->u64[1 - i], 63 - index, 1))
1191 #endif
1193 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1195 int i, j;
1196 ppc_avr_t result = { .u64 = { 0, 0 } };
1197 VECTOR_FOR_INORDER_I(i, u64) {
1198 for (j = 0; j < 8; j++) {
1199 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1200 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1201 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1205 *r = result;
1208 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1210 int i;
1211 uint64_t perm = 0;
1213 VECTOR_FOR_INORDER_I(i, u8) {
1214 int index = VBPERMQ_INDEX(b, i);
1216 if (index < 128) {
1217 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1218 if (a->u64[VBPERMQ_DW(index)] & mask) {
1219 perm |= (0x8000 >> i);
1224 r->VsrD(0) = perm;
1225 r->VsrD(1) = 0;
1228 #undef VBPERMQ_INDEX
1229 #undef VBPERMQ_DW
1231 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1232 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1234 int i, j; \
1235 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1237 VECTOR_FOR_INORDER_I(i, srcfld) { \
1238 prod[i] = 0; \
1239 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1240 if (a->srcfld[i] & (1ull << j)) { \
1241 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1246 VECTOR_FOR_INORDER_I(i, trgfld) { \
1247 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1251 PMSUM(vpmsumb, u8, u16, uint16_t)
1252 PMSUM(vpmsumh, u16, u32, uint32_t)
1253 PMSUM(vpmsumw, u32, u64, uint64_t)
1255 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1258 #ifdef CONFIG_INT128
1259 int i, j;
1260 __uint128_t prod[2];
1262 VECTOR_FOR_INORDER_I(i, u64) {
1263 prod[i] = 0;
1264 for (j = 0; j < 64; j++) {
1265 if (a->u64[i] & (1ull << j)) {
1266 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1271 r->u128 = prod[0] ^ prod[1];
1273 #else
1274 int i, j;
1275 ppc_avr_t prod[2];
1277 VECTOR_FOR_INORDER_I(i, u64) {
1278 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1279 for (j = 0; j < 64; j++) {
1280 if (a->u64[i] & (1ull << j)) {
1281 ppc_avr_t bshift;
1282 if (j == 0) {
1283 bshift.VsrD(0) = 0;
1284 bshift.VsrD(1) = b->u64[i];
1285 } else {
1286 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1287 bshift.VsrD(1) = b->u64[i] << j;
1289 prod[i].VsrD(1) ^= bshift.VsrD(1);
1290 prod[i].VsrD(0) ^= bshift.VsrD(0);
1295 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1296 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1297 #endif
1301 #if HOST_BIG_ENDIAN
1302 #define PKBIG 1
1303 #else
1304 #define PKBIG 0
1305 #endif
1306 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1308 int i, j;
1309 ppc_avr_t result;
1310 #if HOST_BIG_ENDIAN
1311 const ppc_avr_t *x[2] = { a, b };
1312 #else
1313 const ppc_avr_t *x[2] = { b, a };
1314 #endif
1316 VECTOR_FOR_INORDER_I(i, u64) {
1317 VECTOR_FOR_INORDER_I(j, u32) {
1318 uint32_t e = x[i]->u32[j];
1320 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1321 ((e >> 6) & 0x3e0) |
1322 ((e >> 3) & 0x1f));
1325 *r = result;
1328 #define VPK(suffix, from, to, cvt, dosat) \
1329 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1330 ppc_avr_t *a, ppc_avr_t *b) \
1332 int i; \
1333 int sat = 0; \
1334 ppc_avr_t result; \
1335 ppc_avr_t *a0 = PKBIG ? a : b; \
1336 ppc_avr_t *a1 = PKBIG ? b : a; \
1338 VECTOR_FOR_INORDER_I(i, from) { \
1339 result.to[i] = cvt(a0->from[i], &sat); \
1340 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1342 *r = result; \
1343 if (dosat && sat) { \
1344 set_vscr_sat(env); \
1347 #define I(x, y) (x)
1348 VPK(shss, s16, s8, cvtshsb, 1)
1349 VPK(shus, s16, u8, cvtshub, 1)
1350 VPK(swss, s32, s16, cvtswsh, 1)
1351 VPK(swus, s32, u16, cvtswuh, 1)
1352 VPK(sdss, s64, s32, cvtsdsw, 1)
1353 VPK(sdus, s64, u32, cvtsduw, 1)
1354 VPK(uhus, u16, u8, cvtuhub, 1)
1355 VPK(uwus, u32, u16, cvtuwuh, 1)
1356 VPK(udus, u64, u32, cvtuduw, 1)
1357 VPK(uhum, u16, u8, I, 0)
1358 VPK(uwum, u32, u16, I, 0)
1359 VPK(udum, u64, u32, I, 0)
1360 #undef I
1361 #undef VPK
1362 #undef PKBIG
1364 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1366 int i;
1368 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1369 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1373 #define VRFI(suffix, rounding) \
1374 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1375 ppc_avr_t *b) \
1377 int i; \
1378 float_status s = env->vec_status; \
1380 set_float_rounding_mode(rounding, &s); \
1381 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1382 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1385 VRFI(n, float_round_nearest_even)
1386 VRFI(m, float_round_down)
1387 VRFI(p, float_round_up)
1388 VRFI(z, float_round_to_zero)
1389 #undef VRFI
1391 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1393 int i;
1395 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1396 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1398 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1402 #define VRLMI(name, size, element, insert) \
1403 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1405 int i; \
1406 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1407 uint##size##_t src1 = a->element[i]; \
1408 uint##size##_t src2 = b->element[i]; \
1409 uint##size##_t src3 = r->element[i]; \
1410 uint##size##_t begin, end, shift, mask, rot_val; \
1412 shift = extract##size(src2, 0, 6); \
1413 end = extract##size(src2, 8, 6); \
1414 begin = extract##size(src2, 16, 6); \
1415 rot_val = rol##size(src1, shift); \
1416 mask = mask_u##size(begin, end); \
1417 if (insert) { \
1418 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1419 } else { \
1420 r->element[i] = (rot_val & mask); \
1425 VRLMI(VRLDMI, 64, u64, 1);
1426 VRLMI(VRLWMI, 32, u32, 1);
1427 VRLMI(VRLDNM, 64, u64, 0);
1428 VRLMI(VRLWNM, 32, u32, 0);
1430 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1432 int i;
1434 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1435 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1439 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1441 int i;
1443 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1444 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1448 #define VEXTU_X_DO(name, size, left) \
1449 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1451 int index = (a & 0xf) * 8; \
1452 if (left) { \
1453 index = 128 - index - size; \
1455 return int128_getlo(int128_rshift(b->s128, index)) & \
1456 MAKE_64BIT_MASK(0, size); \
1458 VEXTU_X_DO(vextublx, 8, 1)
1459 VEXTU_X_DO(vextuhlx, 16, 1)
1460 VEXTU_X_DO(vextuwlx, 32, 1)
1461 VEXTU_X_DO(vextubrx, 8, 0)
1462 VEXTU_X_DO(vextuhrx, 16, 0)
1463 VEXTU_X_DO(vextuwrx, 32, 0)
1464 #undef VEXTU_X_DO
1466 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1468 int i;
1469 unsigned int shift, bytes, size;
1471 size = ARRAY_SIZE(r->u8);
1472 for (i = 0; i < size; i++) {
1473 shift = b->VsrB(i) & 0x7; /* extract shift value */
1474 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1475 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1476 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1480 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1482 int i;
1483 unsigned int shift, bytes;
1486 * Use reverse order, as destination and source register can be
1487 * same. Its being modified in place saving temporary, reverse
1488 * order will guarantee that computed result is not fed back.
1490 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1491 shift = b->VsrB(i) & 0x7; /* extract shift value */
1492 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1493 /* extract adjacent bytes */
1494 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1498 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1500 int sh = shift & 0xf;
1501 int i;
1502 ppc_avr_t result;
1504 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1505 int index = sh + i;
1506 if (index > 0xf) {
1507 result.VsrB(i) = b->VsrB(index - 0x10);
1508 } else {
1509 result.VsrB(i) = a->VsrB(index);
1512 *r = result;
1515 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1517 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1519 #if HOST_BIG_ENDIAN
1520 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1521 memset(&r->u8[16 - sh], 0, sh);
1522 #else
1523 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1524 memset(&r->u8[0], 0, sh);
1525 #endif
1528 #if HOST_BIG_ENDIAN
1529 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1530 #else
1531 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1532 #endif
1534 #define VINSX(SUFFIX, TYPE) \
1535 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1536 uint64_t val, target_ulong index) \
1538 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1539 target_long idx = index; \
1541 if (idx < 0 || idx > maxidx) { \
1542 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1543 qemu_log_mask(LOG_GUEST_ERROR, \
1544 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1545 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1546 } else { \
1547 TYPE src = val; \
1548 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1551 VINSX(B, uint8_t)
1552 VINSX(H, uint16_t)
1553 VINSX(W, uint32_t)
1554 VINSX(D, uint64_t)
1555 #undef ELEM_ADDR
1556 #undef VINSX
1557 #if HOST_BIG_ENDIAN
1558 #define VEXTDVLX(NAME, SIZE) \
1559 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1560 target_ulong index) \
1562 const target_long idx = index; \
1563 ppc_avr_t tmp[2] = { *a, *b }; \
1564 memset(t, 0, sizeof(*t)); \
1565 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1566 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1567 } else { \
1568 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1569 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1570 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1573 #else
1574 #define VEXTDVLX(NAME, SIZE) \
1575 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1576 target_ulong index) \
1578 const target_long idx = index; \
1579 ppc_avr_t tmp[2] = { *b, *a }; \
1580 memset(t, 0, sizeof(*t)); \
1581 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1582 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1583 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1584 } else { \
1585 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1586 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1587 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1590 #endif
1591 VEXTDVLX(VEXTDUBVLX, 1)
1592 VEXTDVLX(VEXTDUHVLX, 2)
1593 VEXTDVLX(VEXTDUWVLX, 4)
1594 VEXTDVLX(VEXTDDVLX, 8)
1595 #undef VEXTDVLX
1596 #if HOST_BIG_ENDIAN
1597 #define VEXTRACT(suffix, element) \
1598 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1600 uint32_t es = sizeof(r->element[0]); \
1601 memmove(&r->u8[8 - es], &b->u8[index], es); \
1602 memset(&r->u8[8], 0, 8); \
1603 memset(&r->u8[0], 0, 8 - es); \
1605 #else
1606 #define VEXTRACT(suffix, element) \
1607 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1609 uint32_t es = sizeof(r->element[0]); \
1610 uint32_t s = (16 - index) - es; \
1611 memmove(&r->u8[8], &b->u8[s], es); \
1612 memset(&r->u8[0], 0, 8); \
1613 memset(&r->u8[8 + es], 0, 8 - es); \
1615 #endif
1616 VEXTRACT(ub, u8)
1617 VEXTRACT(uh, u16)
1618 VEXTRACT(uw, u32)
1619 VEXTRACT(d, u64)
1620 #undef VEXTRACT
1622 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1623 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1625 int i, idx, crf = 0; \
1627 for (i = 0; i < NUM_ELEMS; i++) { \
1628 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1629 if (b->Vsr##ELEM(idx)) { \
1630 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1631 } else { \
1632 crf = 0b0010; \
1633 break; \
1637 for (; i < NUM_ELEMS; i++) { \
1638 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1639 t->Vsr##ELEM(idx) = 0; \
1642 return crf; \
1644 VSTRI(VSTRIBL, B, 16, true)
1645 VSTRI(VSTRIBR, B, 16, false)
1646 VSTRI(VSTRIHL, H, 8, true)
1647 VSTRI(VSTRIHR, H, 8, false)
1648 #undef VSTRI
1650 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1651 ppc_vsr_t *xb, uint32_t index)
1653 ppc_vsr_t t = { };
1654 size_t es = sizeof(uint32_t);
1655 uint32_t ext_index;
1656 int i;
1658 ext_index = index;
1659 for (i = 0; i < es; i++, ext_index++) {
1660 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1663 *xt = t;
1666 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1667 ppc_vsr_t *xb, uint32_t index)
1669 ppc_vsr_t t = *xt;
1670 size_t es = sizeof(uint32_t);
1671 int ins_index, i = 0;
1673 ins_index = index;
1674 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1675 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1678 *xt = t;
1681 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1682 uint32_t desc)
1685 * Instead of processing imm bit-by-bit, we'll skip the computation of
1686 * conjunctions whose corresponding bit is unset.
1688 int bit, imm = simd_data(desc);
1689 Int128 conj, disj = int128_zero();
1691 /* Iterate over set bits from the least to the most significant bit */
1692 while (imm) {
1694 * Get the next bit to be processed with ctz64. Invert the result of
1695 * ctz64 to match the indexing used by PowerISA.
1697 bit = 7 - ctzl(imm);
1698 if (bit & 0x4) {
1699 conj = a->s128;
1700 } else {
1701 conj = int128_not(a->s128);
1703 if (bit & 0x2) {
1704 conj = int128_and(conj, b->s128);
1705 } else {
1706 conj = int128_and(conj, int128_not(b->s128));
1708 if (bit & 0x1) {
1709 conj = int128_and(conj, c->s128);
1710 } else {
1711 conj = int128_and(conj, int128_not(c->s128));
1713 disj = int128_or(disj, conj);
1715 /* Unset the least significant bit that is set */
1716 imm &= imm - 1;
1719 t->s128 = disj;
1722 #define XXBLEND(name, sz) \
1723 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1724 ppc_avr_t *c, uint32_t desc) \
1726 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1727 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1728 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1731 XXBLEND(B, 8)
1732 XXBLEND(H, 16)
1733 XXBLEND(W, 32)
1734 XXBLEND(D, 64)
1735 #undef XXBLEND
1737 #define VNEG(name, element) \
1738 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1740 int i; \
1741 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1742 r->element[i] = -b->element[i]; \
1745 VNEG(vnegw, s32)
1746 VNEG(vnegd, s64)
1747 #undef VNEG
1749 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1751 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1753 #if HOST_BIG_ENDIAN
1754 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1755 memset(&r->u8[0], 0, sh);
1756 #else
1757 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1758 memset(&r->u8[16 - sh], 0, sh);
1759 #endif
1762 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1764 int i;
1766 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1767 r->u32[i] = a->u32[i] >= b->u32[i];
1771 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1773 int64_t t;
1774 int i, upper;
1775 ppc_avr_t result;
1776 int sat = 0;
1778 upper = ARRAY_SIZE(r->s32) - 1;
1779 t = (int64_t)b->VsrSW(upper);
1780 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1781 t += a->VsrSW(i);
1782 result.VsrSW(i) = 0;
1784 result.VsrSW(upper) = cvtsdsw(t, &sat);
1785 *r = result;
1787 if (sat) {
1788 set_vscr_sat(env);
1792 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1794 int i, j, upper;
1795 ppc_avr_t result;
1796 int sat = 0;
1798 upper = 1;
1799 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1800 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1802 result.VsrD(i) = 0;
1803 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1804 t += a->VsrSW(2 * i + j);
1806 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1809 *r = result;
1810 if (sat) {
1811 set_vscr_sat(env);
1815 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1817 int i, j;
1818 int sat = 0;
1820 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1821 int64_t t = (int64_t)b->s32[i];
1823 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1824 t += a->s8[4 * i + j];
1826 r->s32[i] = cvtsdsw(t, &sat);
1829 if (sat) {
1830 set_vscr_sat(env);
1834 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1836 int sat = 0;
1837 int i;
1839 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1840 int64_t t = (int64_t)b->s32[i];
1842 t += a->s16[2 * i] + a->s16[2 * i + 1];
1843 r->s32[i] = cvtsdsw(t, &sat);
1846 if (sat) {
1847 set_vscr_sat(env);
1851 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1853 int i, j;
1854 int sat = 0;
1856 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1857 uint64_t t = (uint64_t)b->u32[i];
1859 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1860 t += a->u8[4 * i + j];
1862 r->u32[i] = cvtuduw(t, &sat);
1865 if (sat) {
1866 set_vscr_sat(env);
1870 #if HOST_BIG_ENDIAN
1871 #define UPKHI 1
1872 #define UPKLO 0
1873 #else
1874 #define UPKHI 0
1875 #define UPKLO 1
1876 #endif
1877 #define VUPKPX(suffix, hi) \
1878 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1880 int i; \
1881 ppc_avr_t result; \
1883 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1884 uint16_t e = b->u16[hi ? i : i + 4]; \
1885 uint8_t a = (e >> 15) ? 0xff : 0; \
1886 uint8_t r = (e >> 10) & 0x1f; \
1887 uint8_t g = (e >> 5) & 0x1f; \
1888 uint8_t b = e & 0x1f; \
1890 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1892 *r = result; \
1894 VUPKPX(lpx, UPKLO)
1895 VUPKPX(hpx, UPKHI)
1896 #undef VUPKPX
1898 #define VUPK(suffix, unpacked, packee, hi) \
1899 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1901 int i; \
1902 ppc_avr_t result; \
1904 if (hi) { \
1905 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1906 result.unpacked[i] = b->packee[i]; \
1908 } else { \
1909 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1910 i++) { \
1911 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1914 *r = result; \
1916 VUPK(hsb, s16, s8, UPKHI)
1917 VUPK(hsh, s32, s16, UPKHI)
1918 VUPK(hsw, s64, s32, UPKHI)
1919 VUPK(lsb, s16, s8, UPKLO)
1920 VUPK(lsh, s32, s16, UPKLO)
1921 VUPK(lsw, s64, s32, UPKLO)
1922 #undef VUPK
1923 #undef UPKHI
1924 #undef UPKLO
1926 #define VGENERIC_DO(name, element) \
1927 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1929 int i; \
1931 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1932 r->element[i] = name(b->element[i]); \
1936 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1937 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1939 VGENERIC_DO(clzb, u8)
1940 VGENERIC_DO(clzh, u16)
1942 #undef clzb
1943 #undef clzh
1945 #define ctzb(v) ((v) ? ctz32(v) : 8)
1946 #define ctzh(v) ((v) ? ctz32(v) : 16)
1947 #define ctzw(v) ctz32((v))
1948 #define ctzd(v) ctz64((v))
1950 VGENERIC_DO(ctzb, u8)
1951 VGENERIC_DO(ctzh, u16)
1952 VGENERIC_DO(ctzw, u32)
1953 VGENERIC_DO(ctzd, u64)
1955 #undef ctzb
1956 #undef ctzh
1957 #undef ctzw
1958 #undef ctzd
1960 #define popcntb(v) ctpop8(v)
1961 #define popcnth(v) ctpop16(v)
1962 #define popcntw(v) ctpop32(v)
1963 #define popcntd(v) ctpop64(v)
1965 VGENERIC_DO(popcntb, u8)
1966 VGENERIC_DO(popcnth, u16)
1967 VGENERIC_DO(popcntw, u32)
1968 VGENERIC_DO(popcntd, u64)
1970 #undef popcntb
1971 #undef popcnth
1972 #undef popcntw
1973 #undef popcntd
1975 #undef VGENERIC_DO
1977 #if HOST_BIG_ENDIAN
1978 #define QW_ONE { .u64 = { 0, 1 } }
1979 #else
1980 #define QW_ONE { .u64 = { 1, 0 } }
1981 #endif
1983 #ifndef CONFIG_INT128
1985 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1987 t->u64[0] = ~a.u64[0];
1988 t->u64[1] = ~a.u64[1];
1991 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1993 if (a.VsrD(0) < b.VsrD(0)) {
1994 return -1;
1995 } else if (a.VsrD(0) > b.VsrD(0)) {
1996 return 1;
1997 } else if (a.VsrD(1) < b.VsrD(1)) {
1998 return -1;
1999 } else if (a.VsrD(1) > b.VsrD(1)) {
2000 return 1;
2001 } else {
2002 return 0;
2006 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2008 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2009 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2010 (~a.VsrD(1) < b.VsrD(1));
2013 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2015 ppc_avr_t not_a;
2016 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2017 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2018 (~a.VsrD(1) < b.VsrD(1));
2019 avr_qw_not(&not_a, a);
2020 return avr_qw_cmpu(not_a, b) < 0;
2023 #endif
2025 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2027 #ifdef CONFIG_INT128
2028 r->u128 = a->u128 + b->u128;
2029 #else
2030 avr_qw_add(r, *a, *b);
2031 #endif
2034 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2036 #ifdef CONFIG_INT128
2037 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2038 #else
2040 if (c->VsrD(1) & 1) {
2041 ppc_avr_t tmp;
2043 tmp.VsrD(0) = 0;
2044 tmp.VsrD(1) = c->VsrD(1) & 1;
2045 avr_qw_add(&tmp, *a, tmp);
2046 avr_qw_add(r, tmp, *b);
2047 } else {
2048 avr_qw_add(r, *a, *b);
2050 #endif
2053 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2055 #ifdef CONFIG_INT128
2056 r->u128 = (~a->u128 < b->u128);
2057 #else
2058 ppc_avr_t not_a;
2060 avr_qw_not(&not_a, *a);
2062 r->VsrD(0) = 0;
2063 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2064 #endif
2067 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2069 #ifdef CONFIG_INT128
2070 int carry_out = (~a->u128 < b->u128);
2071 if (!carry_out && (c->u128 & 1)) {
2072 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2073 ((a->u128 != 0) || (b->u128 != 0));
2075 r->u128 = carry_out;
2076 #else
2078 int carry_in = c->VsrD(1) & 1;
2079 int carry_out = 0;
2080 ppc_avr_t tmp;
2082 carry_out = avr_qw_addc(&tmp, *a, *b);
2084 if (!carry_out && carry_in) {
2085 ppc_avr_t one = QW_ONE;
2086 carry_out = avr_qw_addc(&tmp, tmp, one);
2088 r->VsrD(0) = 0;
2089 r->VsrD(1) = carry_out;
2090 #endif
2093 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2095 #ifdef CONFIG_INT128
2096 r->u128 = a->u128 - b->u128;
2097 #else
2098 ppc_avr_t tmp;
2099 ppc_avr_t one = QW_ONE;
2101 avr_qw_not(&tmp, *b);
2102 avr_qw_add(&tmp, *a, tmp);
2103 avr_qw_add(r, tmp, one);
2104 #endif
2107 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2109 #ifdef CONFIG_INT128
2110 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2111 #else
2112 ppc_avr_t tmp, sum;
2114 avr_qw_not(&tmp, *b);
2115 avr_qw_add(&sum, *a, tmp);
2117 tmp.VsrD(0) = 0;
2118 tmp.VsrD(1) = c->VsrD(1) & 1;
2119 avr_qw_add(r, sum, tmp);
2120 #endif
2123 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2125 #ifdef CONFIG_INT128
2126 r->u128 = (~a->u128 < ~b->u128) ||
2127 (a->u128 + ~b->u128 == (__uint128_t)-1);
2128 #else
2129 int carry = (avr_qw_cmpu(*a, *b) > 0);
2130 if (!carry) {
2131 ppc_avr_t tmp;
2132 avr_qw_not(&tmp, *b);
2133 avr_qw_add(&tmp, *a, tmp);
2134 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2136 r->VsrD(0) = 0;
2137 r->VsrD(1) = carry;
2138 #endif
2141 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2143 #ifdef CONFIG_INT128
2144 r->u128 =
2145 (~a->u128 < ~b->u128) ||
2146 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2147 #else
2148 int carry_in = c->VsrD(1) & 1;
2149 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2150 if (!carry_out && carry_in) {
2151 ppc_avr_t tmp;
2152 avr_qw_not(&tmp, *b);
2153 avr_qw_add(&tmp, *a, tmp);
2154 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2157 r->VsrD(0) = 0;
2158 r->VsrD(1) = carry_out;
2159 #endif
2162 #define BCD_PLUS_PREF_1 0xC
2163 #define BCD_PLUS_PREF_2 0xF
2164 #define BCD_PLUS_ALT_1 0xA
2165 #define BCD_NEG_PREF 0xD
2166 #define BCD_NEG_ALT 0xB
2167 #define BCD_PLUS_ALT_2 0xE
2168 #define NATIONAL_PLUS 0x2B
2169 #define NATIONAL_NEG 0x2D
2171 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2173 static int bcd_get_sgn(ppc_avr_t *bcd)
2175 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2176 case BCD_PLUS_PREF_1:
2177 case BCD_PLUS_PREF_2:
2178 case BCD_PLUS_ALT_1:
2179 case BCD_PLUS_ALT_2:
2181 return 1;
2184 case BCD_NEG_PREF:
2185 case BCD_NEG_ALT:
2187 return -1;
2190 default:
2192 return 0;
2197 static int bcd_preferred_sgn(int sgn, int ps)
2199 if (sgn >= 0) {
2200 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2201 } else {
2202 return BCD_NEG_PREF;
2206 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2208 uint8_t result;
2209 if (n & 1) {
2210 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2211 } else {
2212 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2215 if (unlikely(result > 9)) {
2216 *invalid = true;
2218 return result;
2221 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2223 if (n & 1) {
2224 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2225 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2226 } else {
2227 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2228 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2232 static bool bcd_is_valid(ppc_avr_t *bcd)
2234 int i;
2235 int invalid = 0;
2237 if (bcd_get_sgn(bcd) == 0) {
2238 return false;
2241 for (i = 1; i < 32; i++) {
2242 bcd_get_digit(bcd, i, &invalid);
2243 if (unlikely(invalid)) {
2244 return false;
2247 return true;
2250 static int bcd_cmp_zero(ppc_avr_t *bcd)
2252 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2253 return CRF_EQ;
2254 } else {
2255 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2259 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2261 return reg->VsrH(7 - n);
2264 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2266 reg->VsrH(7 - n) = val;
2269 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2271 int i;
2272 int invalid = 0;
2273 for (i = 31; i > 0; i--) {
2274 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2275 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2276 if (unlikely(invalid)) {
2277 return 0; /* doesn't matter */
2278 } else if (dig_a > dig_b) {
2279 return 1;
2280 } else if (dig_a < dig_b) {
2281 return -1;
2285 return 0;
2288 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2289 int *overflow)
2291 int carry = 0;
2292 int i;
2293 int is_zero = 1;
2295 for (i = 1; i <= 31; i++) {
2296 uint8_t digit = bcd_get_digit(a, i, invalid) +
2297 bcd_get_digit(b, i, invalid) + carry;
2298 is_zero &= (digit == 0);
2299 if (digit > 9) {
2300 carry = 1;
2301 digit -= 10;
2302 } else {
2303 carry = 0;
2306 bcd_put_digit(t, digit, i);
2309 *overflow = carry;
2310 return is_zero;
2313 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2314 int *overflow)
2316 int carry = 0;
2317 int i;
2319 for (i = 1; i <= 31; i++) {
2320 uint8_t digit = bcd_get_digit(a, i, invalid) -
2321 bcd_get_digit(b, i, invalid) + carry;
2322 if (digit & 0x80) {
2323 carry = -1;
2324 digit += 10;
2325 } else {
2326 carry = 0;
2329 bcd_put_digit(t, digit, i);
2332 *overflow = carry;
2335 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2338 int sgna = bcd_get_sgn(a);
2339 int sgnb = bcd_get_sgn(b);
2340 int invalid = (sgna == 0) || (sgnb == 0);
2341 int overflow = 0;
2342 int zero = 0;
2343 uint32_t cr = 0;
2344 ppc_avr_t result = { .u64 = { 0, 0 } };
2346 if (!invalid) {
2347 if (sgna == sgnb) {
2348 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2349 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2350 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2351 } else {
2352 int magnitude = bcd_cmp_mag(a, b);
2353 if (magnitude > 0) {
2354 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2355 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2356 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2357 } else if (magnitude < 0) {
2358 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2359 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2360 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2361 } else {
2362 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2363 cr = CRF_EQ;
2368 if (unlikely(invalid)) {
2369 result.VsrD(0) = result.VsrD(1) = -1;
2370 cr = CRF_SO;
2371 } else if (overflow) {
2372 cr |= CRF_SO;
2373 } else if (zero) {
2374 cr |= CRF_EQ;
2377 *r = result;
2379 return cr;
2382 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2384 ppc_avr_t bcopy = *b;
2385 int sgnb = bcd_get_sgn(b);
2386 if (sgnb < 0) {
2387 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2388 } else if (sgnb > 0) {
2389 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2391 /* else invalid ... defer to bcdadd code for proper handling */
2393 return helper_bcdadd(r, a, &bcopy, ps);
2396 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2398 int i;
2399 int cr = 0;
2400 uint16_t national = 0;
2401 uint16_t sgnb = get_national_digit(b, 0);
2402 ppc_avr_t ret = { .u64 = { 0, 0 } };
2403 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2405 for (i = 1; i < 8; i++) {
2406 national = get_national_digit(b, i);
2407 if (unlikely(national < 0x30 || national > 0x39)) {
2408 invalid = 1;
2409 break;
2412 bcd_put_digit(&ret, national & 0xf, i);
2415 if (sgnb == NATIONAL_PLUS) {
2416 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2417 } else {
2418 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2421 cr = bcd_cmp_zero(&ret);
2423 if (unlikely(invalid)) {
2424 cr = CRF_SO;
2427 *r = ret;
2429 return cr;
2432 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2434 int i;
2435 int cr = 0;
2436 int sgnb = bcd_get_sgn(b);
2437 int invalid = (sgnb == 0);
2438 ppc_avr_t ret = { .u64 = { 0, 0 } };
2440 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2442 for (i = 1; i < 8; i++) {
2443 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2445 if (unlikely(invalid)) {
2446 break;
2449 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2451 cr = bcd_cmp_zero(b);
2453 if (ox_flag) {
2454 cr |= CRF_SO;
2457 if (unlikely(invalid)) {
2458 cr = CRF_SO;
2461 *r = ret;
2463 return cr;
2466 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2468 int i;
2469 int cr = 0;
2470 int invalid = 0;
2471 int zone_digit = 0;
2472 int zone_lead = ps ? 0xF : 0x3;
2473 int digit = 0;
2474 ppc_avr_t ret = { .u64 = { 0, 0 } };
2475 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2477 if (unlikely((sgnb < 0xA) && ps)) {
2478 invalid = 1;
2481 for (i = 0; i < 16; i++) {
2482 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2483 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2484 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2485 invalid = 1;
2486 break;
2489 bcd_put_digit(&ret, digit, i + 1);
2492 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2493 (!ps && (sgnb & 0x4))) {
2494 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2495 } else {
2496 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2499 cr = bcd_cmp_zero(&ret);
2501 if (unlikely(invalid)) {
2502 cr = CRF_SO;
2505 *r = ret;
2507 return cr;
2510 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2512 int i;
2513 int cr = 0;
2514 uint8_t digit = 0;
2515 int sgnb = bcd_get_sgn(b);
2516 int zone_lead = (ps) ? 0xF0 : 0x30;
2517 int invalid = (sgnb == 0);
2518 ppc_avr_t ret = { .u64 = { 0, 0 } };
2520 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2522 for (i = 0; i < 16; i++) {
2523 digit = bcd_get_digit(b, i + 1, &invalid);
2525 if (unlikely(invalid)) {
2526 break;
2529 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2532 if (ps) {
2533 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2534 } else {
2535 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2538 cr = bcd_cmp_zero(b);
2540 if (ox_flag) {
2541 cr |= CRF_SO;
2544 if (unlikely(invalid)) {
2545 cr = CRF_SO;
2548 *r = ret;
2550 return cr;
2554 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2556 * Returns:
2557 * > 0 if ahi|alo > bhi|blo,
2558 * 0 if ahi|alo == bhi|blo,
2559 * < 0 if ahi|alo < bhi|blo
2561 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2562 uint64_t blo, uint64_t bhi)
2564 return (ahi == bhi) ?
2565 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2566 (ahi > bhi ? 1 : -1);
2569 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2571 int i;
2572 int cr;
2573 uint64_t lo_value;
2574 uint64_t hi_value;
2575 uint64_t rem;
2576 ppc_avr_t ret = { .u64 = { 0, 0 } };
2578 if (b->VsrSD(0) < 0) {
2579 lo_value = -b->VsrSD(1);
2580 hi_value = ~b->VsrD(0) + !lo_value;
2581 bcd_put_digit(&ret, 0xD, 0);
2583 cr = CRF_LT;
2584 } else {
2585 lo_value = b->VsrD(1);
2586 hi_value = b->VsrD(0);
2587 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2589 if (hi_value == 0 && lo_value == 0) {
2590 cr = CRF_EQ;
2591 } else {
2592 cr = CRF_GT;
2597 * Check src limits: abs(src) <= 10^31 - 1
2599 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2601 if (ucmp128(lo_value, hi_value,
2602 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2603 cr |= CRF_SO;
2606 * According to the ISA, if src wouldn't fit in the destination
2607 * register, the result is undefined.
2608 * In that case, we leave r unchanged.
2610 } else {
2611 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2613 for (i = 1; i < 16; rem /= 10, i++) {
2614 bcd_put_digit(&ret, rem % 10, i);
2617 for (; i < 32; lo_value /= 10, i++) {
2618 bcd_put_digit(&ret, lo_value % 10, i);
2621 *r = ret;
2624 return cr;
2627 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2629 uint8_t i;
2630 int cr;
2631 uint64_t carry;
2632 uint64_t unused;
2633 uint64_t lo_value;
2634 uint64_t hi_value = 0;
2635 int sgnb = bcd_get_sgn(b);
2636 int invalid = (sgnb == 0);
2638 lo_value = bcd_get_digit(b, 31, &invalid);
2639 for (i = 30; i > 0; i--) {
2640 mulu64(&lo_value, &carry, lo_value, 10ULL);
2641 mulu64(&hi_value, &unused, hi_value, 10ULL);
2642 lo_value += bcd_get_digit(b, i, &invalid);
2643 hi_value += carry;
2645 if (unlikely(invalid)) {
2646 break;
2650 if (sgnb == -1) {
2651 r->VsrSD(1) = -lo_value;
2652 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2653 } else {
2654 r->VsrSD(1) = lo_value;
2655 r->VsrSD(0) = hi_value;
2658 cr = bcd_cmp_zero(b);
2660 if (unlikely(invalid)) {
2661 cr = CRF_SO;
2664 return cr;
2667 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2669 int i;
2670 int invalid = 0;
2672 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2673 return CRF_SO;
2676 *r = *a;
2677 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2679 for (i = 1; i < 32; i++) {
2680 bcd_get_digit(a, i, &invalid);
2681 bcd_get_digit(b, i, &invalid);
2682 if (unlikely(invalid)) {
2683 return CRF_SO;
2687 return bcd_cmp_zero(r);
2690 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2692 int sgnb = bcd_get_sgn(b);
2694 *r = *b;
2695 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2697 if (bcd_is_valid(b) == false) {
2698 return CRF_SO;
2701 return bcd_cmp_zero(r);
2704 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2706 int cr;
2707 int i = a->VsrSB(7);
2708 bool ox_flag = false;
2709 int sgnb = bcd_get_sgn(b);
2710 ppc_avr_t ret = *b;
2711 ret.VsrD(1) &= ~0xf;
2713 if (bcd_is_valid(b) == false) {
2714 return CRF_SO;
2717 if (unlikely(i > 31)) {
2718 i = 31;
2719 } else if (unlikely(i < -31)) {
2720 i = -31;
2723 if (i > 0) {
2724 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2725 } else {
2726 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2728 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2730 *r = ret;
2732 cr = bcd_cmp_zero(r);
2733 if (ox_flag) {
2734 cr |= CRF_SO;
2737 return cr;
2740 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2742 int cr;
2743 int i;
2744 int invalid = 0;
2745 bool ox_flag = false;
2746 ppc_avr_t ret = *b;
2748 for (i = 0; i < 32; i++) {
2749 bcd_get_digit(b, i, &invalid);
2751 if (unlikely(invalid)) {
2752 return CRF_SO;
2756 i = a->VsrSB(7);
2757 if (i >= 32) {
2758 ox_flag = true;
2759 ret.VsrD(1) = ret.VsrD(0) = 0;
2760 } else if (i <= -32) {
2761 ret.VsrD(1) = ret.VsrD(0) = 0;
2762 } else if (i > 0) {
2763 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2764 } else {
2765 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2767 *r = ret;
2769 cr = bcd_cmp_zero(r);
2770 if (ox_flag) {
2771 cr |= CRF_SO;
2774 return cr;
2777 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2779 int cr;
2780 int unused = 0;
2781 int invalid = 0;
2782 bool ox_flag = false;
2783 int sgnb = bcd_get_sgn(b);
2784 ppc_avr_t ret = *b;
2785 ret.VsrD(1) &= ~0xf;
2787 int i = a->VsrSB(7);
2788 ppc_avr_t bcd_one;
2790 bcd_one.VsrD(0) = 0;
2791 bcd_one.VsrD(1) = 0x10;
2793 if (bcd_is_valid(b) == false) {
2794 return CRF_SO;
2797 if (unlikely(i > 31)) {
2798 i = 31;
2799 } else if (unlikely(i < -31)) {
2800 i = -31;
2803 if (i > 0) {
2804 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2805 } else {
2806 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2808 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2809 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2812 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2814 cr = bcd_cmp_zero(&ret);
2815 if (ox_flag) {
2816 cr |= CRF_SO;
2818 *r = ret;
2820 return cr;
2823 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2825 uint64_t mask;
2826 uint32_t ox_flag = 0;
2827 int i = a->VsrSH(3) + 1;
2828 ppc_avr_t ret = *b;
2830 if (bcd_is_valid(b) == false) {
2831 return CRF_SO;
2834 if (i > 16 && i < 32) {
2835 mask = (uint64_t)-1 >> (128 - i * 4);
2836 if (ret.VsrD(0) & ~mask) {
2837 ox_flag = CRF_SO;
2840 ret.VsrD(0) &= mask;
2841 } else if (i >= 0 && i <= 16) {
2842 mask = (uint64_t)-1 >> (64 - i * 4);
2843 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2844 ox_flag = CRF_SO;
2847 ret.VsrD(1) &= mask;
2848 ret.VsrD(0) = 0;
2850 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2851 *r = ret;
2853 return bcd_cmp_zero(&ret) | ox_flag;
2856 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2858 int i;
2859 uint64_t mask;
2860 uint32_t ox_flag = 0;
2861 int invalid = 0;
2862 ppc_avr_t ret = *b;
2864 for (i = 0; i < 32; i++) {
2865 bcd_get_digit(b, i, &invalid);
2867 if (unlikely(invalid)) {
2868 return CRF_SO;
2872 i = a->VsrSH(3);
2873 if (i > 16 && i < 33) {
2874 mask = (uint64_t)-1 >> (128 - i * 4);
2875 if (ret.VsrD(0) & ~mask) {
2876 ox_flag = CRF_SO;
2879 ret.VsrD(0) &= mask;
2880 } else if (i > 0 && i <= 16) {
2881 mask = (uint64_t)-1 >> (64 - i * 4);
2882 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2883 ox_flag = CRF_SO;
2886 ret.VsrD(1) &= mask;
2887 ret.VsrD(0) = 0;
2888 } else if (i == 0) {
2889 if (ret.VsrD(0) || ret.VsrD(1)) {
2890 ox_flag = CRF_SO;
2892 ret.VsrD(0) = ret.VsrD(1) = 0;
2895 *r = ret;
2896 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2897 return ox_flag | CRF_EQ;
2900 return ox_flag | CRF_GT;
2903 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2905 int i;
2906 VECTOR_FOR_INORDER_I(i, u8) {
2907 r->u8[i] = AES_sbox[a->u8[i]];
2911 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2913 ppc_avr_t result;
2914 int i;
2916 VECTOR_FOR_INORDER_I(i, u32) {
2917 result.VsrW(i) = b->VsrW(i) ^
2918 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2919 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2920 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2921 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2923 *r = result;
2926 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2928 ppc_avr_t result;
2929 int i;
2931 VECTOR_FOR_INORDER_I(i, u8) {
2932 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2934 *r = result;
2937 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2939 /* This differs from what is written in ISA V2.07. The RTL is */
2940 /* incorrect and will be fixed in V2.07B. */
2941 int i;
2942 ppc_avr_t tmp;
2944 VECTOR_FOR_INORDER_I(i, u8) {
2945 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2948 VECTOR_FOR_INORDER_I(i, u32) {
2949 r->VsrW(i) =
2950 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2951 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2952 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2953 AES_imc[tmp.VsrB(4 * i + 3)][3];
2957 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2959 ppc_avr_t result;
2960 int i;
2962 VECTOR_FOR_INORDER_I(i, u8) {
2963 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2965 *r = result;
2968 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2970 int st = (st_six & 0x10) != 0;
2971 int six = st_six & 0xF;
2972 int i;
2974 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2975 if (st == 0) {
2976 if ((six & (0x8 >> i)) == 0) {
2977 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2978 ror32(a->VsrW(i), 18) ^
2979 (a->VsrW(i) >> 3);
2980 } else { /* six.bit[i] == 1 */
2981 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2982 ror32(a->VsrW(i), 19) ^
2983 (a->VsrW(i) >> 10);
2985 } else { /* st == 1 */
2986 if ((six & (0x8 >> i)) == 0) {
2987 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2988 ror32(a->VsrW(i), 13) ^
2989 ror32(a->VsrW(i), 22);
2990 } else { /* six.bit[i] == 1 */
2991 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2992 ror32(a->VsrW(i), 11) ^
2993 ror32(a->VsrW(i), 25);
2999 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3001 int st = (st_six & 0x10) != 0;
3002 int six = st_six & 0xF;
3003 int i;
3005 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3006 if (st == 0) {
3007 if ((six & (0x8 >> (2 * i))) == 0) {
3008 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3009 ror64(a->VsrD(i), 8) ^
3010 (a->VsrD(i) >> 7);
3011 } else { /* six.bit[2*i] == 1 */
3012 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3013 ror64(a->VsrD(i), 61) ^
3014 (a->VsrD(i) >> 6);
3016 } else { /* st == 1 */
3017 if ((six & (0x8 >> (2 * i))) == 0) {
3018 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3019 ror64(a->VsrD(i), 34) ^
3020 ror64(a->VsrD(i), 39);
3021 } else { /* six.bit[2*i] == 1 */
3022 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3023 ror64(a->VsrD(i), 18) ^
3024 ror64(a->VsrD(i), 41);
3030 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3032 ppc_avr_t result;
3033 int i;
3035 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3036 int indexA = c->VsrB(i) >> 4;
3037 int indexB = c->VsrB(i) & 0xF;
3039 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3041 *r = result;
3044 #undef VECTOR_FOR_INORDER_I
3046 /*****************************************************************************/
3047 /* SPE extension helpers */
3048 /* Use a table to make this quicker */
3049 static const uint8_t hbrev[16] = {
3050 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3051 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3054 static inline uint8_t byte_reverse(uint8_t val)
3056 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3059 static inline uint32_t word_reverse(uint32_t val)
3061 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3062 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3065 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3066 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3068 uint32_t a, b, d, mask;
3070 mask = UINT32_MAX >> (32 - MASKBITS);
3071 a = arg1 & mask;
3072 b = arg2 & mask;
3073 d = word_reverse(1 + word_reverse(a | ~b));
3074 return (arg1 & ~mask) | (d & b);
3077 uint32_t helper_cntlsw32(uint32_t val)
3079 if (val & 0x80000000) {
3080 return clz32(~val);
3081 } else {
3082 return clz32(val);
3086 uint32_t helper_cntlzw32(uint32_t val)
3088 return clz32(val);
3091 /* 440 specific */
3092 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3093 target_ulong low, uint32_t update_Rc)
3095 target_ulong mask;
3096 int i;
3098 i = 1;
3099 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3100 if ((high & mask) == 0) {
3101 if (update_Rc) {
3102 env->crf[0] = 0x4;
3104 goto done;
3106 i++;
3108 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3109 if ((low & mask) == 0) {
3110 if (update_Rc) {
3111 env->crf[0] = 0x8;
3113 goto done;
3115 i++;
3117 i = 8;
3118 if (update_Rc) {
3119 env->crf[0] = 0x2;
3121 done:
3122 env->xer = (env->xer & ~0x7F) | i;
3123 if (update_Rc) {
3124 env->crf[0] |= xer_so;
3126 return i;