target/ppc: Move vsel and vperm/vpermr to decodetree
[qemu.git] / target / ppc / int_helper.c
blob6c63c7b2278c81acb6c2e6780304cdfd9b529df7
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
48 uint64_t rt = 0;
49 int overflow = 0;
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
69 return (target_ulong)rt;
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
75 int64_t rt = 0;
76 int overflow = 0;
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
97 return (target_ulong)rt;
100 #if defined(TARGET_PPC64)
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 uint64_t rt = 0;
105 int overflow = 0;
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
118 return rt;
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
139 return rt;
142 #endif
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 return hasvalue(rb, ra) ? CRF_GT : 0;
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
170 * Return a random number.
172 uint64_t helper_darn32(void)
174 Error *err = NULL;
175 uint32_t ret;
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
184 return ret;
187 uint64_t helper_darn64(void)
189 Error *err = NULL;
190 uint64_t ret;
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
199 return ret;
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 int i;
205 uint64_t ra = 0;
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
215 return ra;
218 #endif
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
230 mask <<= 8;
232 return ra;
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
239 int32_t ret;
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
258 return (target_long)ret;
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
265 int64_t ret;
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
284 return ret;
286 #endif
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
301 target_ulong helper_popcntw(target_ulong val)
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
316 #else
317 target_ulong helper_popcntb(target_ulong val)
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
325 #endif
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
339 if (mask == 0 || mask == -1) {
340 return src;
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
386 return left | (right >> n);
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 int i, o;
392 uint64_t result = 0;
394 if (mask == -1) {
395 return src;
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
404 return result;
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 int i, o;
410 uint64_t result = 0;
412 if (mask == -1) {
413 return src;
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
422 return result;
425 /*****************************************************************************/
426 /* Altivec extension helpers */
427 #if defined(HOST_WORDS_BIGENDIAN)
428 #define VECTOR_FOR_INORDER_I(index, element) \
429 for (index = 0; index < ARRAY_SIZE(r->element); index++)
430 #else
431 #define VECTOR_FOR_INORDER_I(index, element) \
432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
433 #endif
435 /* Saturating arithmetic helpers. */
436 #define SATCVT(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
439 to_type r; \
441 if (x < (from_type)min) { \
442 r = min; \
443 *sat = 1; \
444 } else if (x > (from_type)max) { \
445 r = max; \
446 *sat = 1; \
447 } else { \
448 r = x; \
450 return r; \
452 #define SATCVTU(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
455 to_type r; \
457 if (x > (from_type)max) { \
458 r = max; \
459 *sat = 1; \
460 } else { \
461 r = x; \
463 return r; \
465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
475 #undef SATCVT
476 #undef SATCVTU
478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 ppc_store_vscr(env, vscr);
483 uint32_t helper_mfvscr(CPUPPCState *env)
485 return ppc_get_vscr(env);
488 static inline void set_vscr_sat(CPUPPCState *env)
490 /* The choice of non-zero value is arbitrary. */
491 env->vscr_sat.u32[0] = 1;
494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 int i;
498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
499 r->u32[i] = ~a->u32[i] < b->u32[i];
503 /* vprtybw */
504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
509 res ^= res >> 8;
510 r->u32[i] = res & 1;
514 /* vprtybd */
515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 int i;
518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->u64[i] = res & 1;
526 /* vprtybq */
527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 uint64_t res = b->u64[0] ^ b->u64[1];
530 res ^= res >> 32;
531 res ^= res >> 16;
532 res ^= res >> 8;
533 r->VsrD(1) = res & 1;
534 r->VsrD(0) = 0;
537 #define VARITHFP(suffix, func) \
538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
541 int i; \
543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
547 VARITHFP(addfp, float32_add)
548 VARITHFP(subfp, float32_sub)
549 VARITHFP(minfp, float32_min)
550 VARITHFP(maxfp, float32_max)
551 #undef VARITHFP
553 #define VARITHFPFMA(suffix, type) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b, ppc_avr_t *c) \
557 int i; \
558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
560 type, &env->vec_status); \
563 VARITHFPFMA(maddfp, 0);
564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
565 #undef VARITHFPFMA
567 #define VARITHSAT_CASE(type, op, cvt, element) \
569 type result = (type)a->element[i] op (type)b->element[i]; \
570 r->element[i] = cvt(result, &sat); \
573 #define VARITHSAT_DO(name, op, optype, cvt, element) \
574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 int sat = 0; \
578 int i; \
580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
581 VARITHSAT_CASE(optype, op, cvt, element); \
583 if (sat) { \
584 vscr_sat->u32[0] = 1; \
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
600 #undef VARITHSAT_DO
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 int i; \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
616 unsigned_type) \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
622 #undef VAVG_DO
623 #undef VAVG
625 #define VABSDU_DO(name, element) \
626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 int i; \
630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
631 r->element[i] = (a->element[i] > b->element[i]) ? \
632 (a->element[i] - b->element[i]) : \
633 (b->element[i] - a->element[i]); \
638 * VABSDU - Vector absolute difference unsigned
639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
640 * element - element type to access from vector
642 #define VABSDU(type, element) \
643 VABSDU_DO(absdu##type, element)
644 VABSDU(b, u8)
645 VABSDU(h, u16)
646 VABSDU(w, u32)
647 #undef VABSDU_DO
648 #undef VABSDU
650 #define VCF(suffix, cvt, element) \
651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
652 ppc_avr_t *b, uint32_t uim) \
654 int i; \
656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
657 float32 t = cvt(b->element[i], &env->vec_status); \
658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
661 VCF(ux, uint32_to_float32, u32)
662 VCF(sx, int32_to_float32, s32)
663 #undef VCF
665 #define VCMPNEZ(NAME, ELEM) \
666 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
669 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
670 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
673 VCMPNEZ(VCMPNEZB, u8)
674 VCMPNEZ(VCMPNEZH, u16)
675 VCMPNEZ(VCMPNEZW, u32)
676 #undef VCMPNEZ
678 #define VCMPFP_DO(suffix, compare, order, record) \
679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
680 ppc_avr_t *a, ppc_avr_t *b) \
682 uint32_t ones = (uint32_t)-1; \
683 uint32_t all = ones; \
684 uint32_t none = 0; \
685 int i; \
687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
688 uint32_t result; \
689 FloatRelation rel = \
690 float32_compare_quiet(a->f32[i], b->f32[i], \
691 &env->vec_status); \
692 if (rel == float_relation_unordered) { \
693 result = 0; \
694 } else if (rel compare order) { \
695 result = ones; \
696 } else { \
697 result = 0; \
699 r->u32[i] = result; \
700 all &= result; \
701 none |= result; \
703 if (record) { \
704 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
707 #define VCMPFP(suffix, compare, order) \
708 VCMPFP_DO(suffix, compare, order, 0) \
709 VCMPFP_DO(suffix##_dot, compare, order, 1)
710 VCMPFP(eqfp, ==, float_relation_equal)
711 VCMPFP(gefp, !=, float_relation_less)
712 VCMPFP(gtfp, ==, float_relation_greater)
713 #undef VCMPFP_DO
714 #undef VCMPFP
716 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
717 ppc_avr_t *a, ppc_avr_t *b, int record)
719 int i;
720 int all_in = 0;
722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
723 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
724 &env->vec_status);
725 if (le_rel == float_relation_unordered) {
726 r->u32[i] = 0xc0000000;
727 all_in = 1;
728 } else {
729 float32 bneg = float32_chs(b->f32[i]);
730 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
731 &env->vec_status);
732 int le = le_rel != float_relation_greater;
733 int ge = ge_rel != float_relation_less;
735 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
736 all_in |= (!le | !ge);
739 if (record) {
740 env->crf[6] = (all_in == 0) << 1;
744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 vcmpbfp_internal(env, r, a, b, 0);
749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
750 ppc_avr_t *b)
752 vcmpbfp_internal(env, r, a, b, 1);
755 #define VCT(suffix, satcvt, element) \
756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
757 ppc_avr_t *b, uint32_t uim) \
759 int i; \
760 int sat = 0; \
761 float_status s = env->vec_status; \
763 set_float_rounding_mode(float_round_to_zero, &s); \
764 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
765 if (float32_is_any_nan(b->f32[i])) { \
766 r->element[i] = 0; \
767 } else { \
768 float64 t = float32_to_float64(b->f32[i], &s); \
769 int64_t j; \
771 t = float64_scalbn(t, uim, &s); \
772 j = float64_to_int64(t, &s); \
773 r->element[i] = satcvt(j, &sat); \
776 if (sat) { \
777 set_vscr_sat(env); \
780 VCT(uxs, cvtsduw, u32)
781 VCT(sxs, cvtsdsw, s32)
782 #undef VCT
784 target_ulong helper_vclzlsbb(ppc_avr_t *r)
786 target_ulong count = 0;
787 int i;
788 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
789 if (r->VsrB(i) & 0x01) {
790 break;
792 count++;
794 return count;
797 target_ulong helper_vctzlsbb(ppc_avr_t *r)
799 target_ulong count = 0;
800 int i;
801 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
802 if (r->VsrB(i) & 0x01) {
803 break;
805 count++;
807 return count;
810 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
811 ppc_avr_t *b, ppc_avr_t *c)
813 int sat = 0;
814 int i;
816 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
817 int32_t prod = a->s16[i] * b->s16[i];
818 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
820 r->s16[i] = cvtswsh(t, &sat);
823 if (sat) {
824 set_vscr_sat(env);
828 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
829 ppc_avr_t *b, ppc_avr_t *c)
831 int sat = 0;
832 int i;
834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
835 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
836 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
837 r->s16[i] = cvtswsh(t, &sat);
840 if (sat) {
841 set_vscr_sat(env);
845 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
847 int i;
849 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
850 int32_t prod = a->s16[i] * b->s16[i];
851 r->s16[i] = (int16_t) (prod + c->s16[i]);
855 #define VMRG_DO(name, element, access, ofs) \
856 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
858 ppc_avr_t result; \
859 int i, half = ARRAY_SIZE(r->element) / 2; \
861 for (i = 0; i < half; i++) { \
862 result.access(i * 2 + 0) = a->access(i + ofs); \
863 result.access(i * 2 + 1) = b->access(i + ofs); \
865 *r = result; \
868 #define VMRG(suffix, element, access) \
869 VMRG_DO(mrgl##suffix, element, access, half) \
870 VMRG_DO(mrgh##suffix, element, access, 0)
871 VMRG(b, u8, VsrB)
872 VMRG(h, u16, VsrH)
873 VMRG(w, u32, VsrW)
874 #undef VMRG_DO
875 #undef VMRG
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
880 int32_t prod[16];
881 int i;
883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884 prod[i] = (int32_t)a->s8[i] * b->u8[i];
887 VECTOR_FOR_INORDER_I(i, s32) {
888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889 prod[4 * i + 2] + prod[4 * i + 3];
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
896 int32_t prod[8];
897 int i;
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 prod[i] = a->s16[i] * b->s16[i];
903 VECTOR_FOR_INORDER_I(i, s32) {
904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909 ppc_avr_t *b, ppc_avr_t *c)
911 int32_t prod[8];
912 int i;
913 int sat = 0;
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 prod[i] = (int32_t)a->s16[i] * b->s16[i];
919 VECTOR_FOR_INORDER_I(i, s32) {
920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922 r->u32[i] = cvtsdsw(t, &sat);
925 if (sat) {
926 set_vscr_sat(env);
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931 ppc_avr_t *b, ppc_avr_t *c)
933 uint16_t prod[16];
934 int i;
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 prod[i] = a->u8[i] * b->u8[i];
940 VECTOR_FOR_INORDER_I(i, u32) {
941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942 prod[4 * i + 2] + prod[4 * i + 3];
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947 ppc_avr_t *b, ppc_avr_t *c)
949 uint32_t prod[8];
950 int i;
952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953 prod[i] = a->u16[i] * b->u16[i];
956 VECTOR_FOR_INORDER_I(i, u32) {
957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
964 uint32_t prod[8];
965 int i;
966 int sat = 0;
968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969 prod[i] = a->u16[i] * b->u16[i];
972 VECTOR_FOR_INORDER_I(i, s32) {
973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975 r->u32[i] = cvtuduw(t, &sat);
978 if (sat) {
979 set_vscr_sat(env);
983 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
984 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 int i; \
988 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
989 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
990 (cast)b->mul_access(i); \
994 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
995 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
997 int i; \
999 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1000 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1001 (cast)b->mul_access(i + 1); \
1005 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1006 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1008 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1009 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1010 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1011 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1012 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1013 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1014 #undef VMUL_DO_EVN
1015 #undef VMUL_DO_ODD
1016 #undef VMUL
1018 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1020 ppc_avr_t result;
1021 int i;
1023 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1024 int s = c->VsrB(i) & 0x1f;
1025 int index = s & 0xf;
1027 if (s & 0x10) {
1028 result.VsrB(i) = b->VsrB(index);
1029 } else {
1030 result.VsrB(i) = a->VsrB(index);
1033 *r = result;
1036 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1038 ppc_avr_t result;
1039 int i;
1041 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1042 int s = c->VsrB(i) & 0x1f;
1043 int index = 15 - (s & 0xf);
1045 if (s & 0x10) {
1046 result.VsrB(i) = a->VsrB(index);
1047 } else {
1048 result.VsrB(i) = b->VsrB(index);
1051 *r = result;
1054 #if defined(HOST_WORDS_BIGENDIAN)
1055 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1056 #define VBPERMD_INDEX(i) (i)
1057 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1058 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1059 #else
1060 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1061 #define VBPERMD_INDEX(i) (1 - i)
1062 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1063 #define EXTRACT_BIT(avr, i, index) \
1064 (extract64((avr)->u64[1 - i], 63 - index, 1))
1065 #endif
1067 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1069 int i, j;
1070 ppc_avr_t result = { .u64 = { 0, 0 } };
1071 VECTOR_FOR_INORDER_I(i, u64) {
1072 for (j = 0; j < 8; j++) {
1073 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1074 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1075 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1079 *r = result;
1082 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1084 int i;
1085 uint64_t perm = 0;
1087 VECTOR_FOR_INORDER_I(i, u8) {
1088 int index = VBPERMQ_INDEX(b, i);
1090 if (index < 128) {
1091 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1092 if (a->u64[VBPERMQ_DW(index)] & mask) {
1093 perm |= (0x8000 >> i);
1098 r->VsrD(0) = perm;
1099 r->VsrD(1) = 0;
1102 #undef VBPERMQ_INDEX
1103 #undef VBPERMQ_DW
1105 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1106 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1108 int i, j; \
1109 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1111 VECTOR_FOR_INORDER_I(i, srcfld) { \
1112 prod[i] = 0; \
1113 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1114 if (a->srcfld[i] & (1ull << j)) { \
1115 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1120 VECTOR_FOR_INORDER_I(i, trgfld) { \
1121 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1125 PMSUM(vpmsumb, u8, u16, uint16_t)
1126 PMSUM(vpmsumh, u16, u32, uint32_t)
1127 PMSUM(vpmsumw, u32, u64, uint64_t)
1129 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1132 #ifdef CONFIG_INT128
1133 int i, j;
1134 __uint128_t prod[2];
1136 VECTOR_FOR_INORDER_I(i, u64) {
1137 prod[i] = 0;
1138 for (j = 0; j < 64; j++) {
1139 if (a->u64[i] & (1ull << j)) {
1140 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1145 r->u128 = prod[0] ^ prod[1];
1147 #else
1148 int i, j;
1149 ppc_avr_t prod[2];
1151 VECTOR_FOR_INORDER_I(i, u64) {
1152 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1153 for (j = 0; j < 64; j++) {
1154 if (a->u64[i] & (1ull << j)) {
1155 ppc_avr_t bshift;
1156 if (j == 0) {
1157 bshift.VsrD(0) = 0;
1158 bshift.VsrD(1) = b->u64[i];
1159 } else {
1160 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1161 bshift.VsrD(1) = b->u64[i] << j;
1163 prod[i].VsrD(1) ^= bshift.VsrD(1);
1164 prod[i].VsrD(0) ^= bshift.VsrD(0);
1169 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1170 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1171 #endif
1175 #if defined(HOST_WORDS_BIGENDIAN)
1176 #define PKBIG 1
1177 #else
1178 #define PKBIG 0
1179 #endif
1180 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1182 int i, j;
1183 ppc_avr_t result;
1184 #if defined(HOST_WORDS_BIGENDIAN)
1185 const ppc_avr_t *x[2] = { a, b };
1186 #else
1187 const ppc_avr_t *x[2] = { b, a };
1188 #endif
1190 VECTOR_FOR_INORDER_I(i, u64) {
1191 VECTOR_FOR_INORDER_I(j, u32) {
1192 uint32_t e = x[i]->u32[j];
1194 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1195 ((e >> 6) & 0x3e0) |
1196 ((e >> 3) & 0x1f));
1199 *r = result;
1202 #define VPK(suffix, from, to, cvt, dosat) \
1203 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1204 ppc_avr_t *a, ppc_avr_t *b) \
1206 int i; \
1207 int sat = 0; \
1208 ppc_avr_t result; \
1209 ppc_avr_t *a0 = PKBIG ? a : b; \
1210 ppc_avr_t *a1 = PKBIG ? b : a; \
1212 VECTOR_FOR_INORDER_I(i, from) { \
1213 result.to[i] = cvt(a0->from[i], &sat); \
1214 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1216 *r = result; \
1217 if (dosat && sat) { \
1218 set_vscr_sat(env); \
1221 #define I(x, y) (x)
1222 VPK(shss, s16, s8, cvtshsb, 1)
1223 VPK(shus, s16, u8, cvtshub, 1)
1224 VPK(swss, s32, s16, cvtswsh, 1)
1225 VPK(swus, s32, u16, cvtswuh, 1)
1226 VPK(sdss, s64, s32, cvtsdsw, 1)
1227 VPK(sdus, s64, u32, cvtsduw, 1)
1228 VPK(uhus, u16, u8, cvtuhub, 1)
1229 VPK(uwus, u32, u16, cvtuwuh, 1)
1230 VPK(udus, u64, u32, cvtuduw, 1)
1231 VPK(uhum, u16, u8, I, 0)
1232 VPK(uwum, u32, u16, I, 0)
1233 VPK(udum, u64, u32, I, 0)
1234 #undef I
1235 #undef VPK
1236 #undef PKBIG
1238 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1240 int i;
1242 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1243 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1247 #define VRFI(suffix, rounding) \
1248 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1249 ppc_avr_t *b) \
1251 int i; \
1252 float_status s = env->vec_status; \
1254 set_float_rounding_mode(rounding, &s); \
1255 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1256 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1259 VRFI(n, float_round_nearest_even)
1260 VRFI(m, float_round_down)
1261 VRFI(p, float_round_up)
1262 VRFI(z, float_round_to_zero)
1263 #undef VRFI
1265 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1267 int i;
1269 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1270 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1272 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1276 #define VRLMI(name, size, element, insert) \
1277 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1279 int i; \
1280 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1281 uint##size##_t src1 = a->element[i]; \
1282 uint##size##_t src2 = b->element[i]; \
1283 uint##size##_t src3 = r->element[i]; \
1284 uint##size##_t begin, end, shift, mask, rot_val; \
1286 shift = extract##size(src2, 0, 6); \
1287 end = extract##size(src2, 8, 6); \
1288 begin = extract##size(src2, 16, 6); \
1289 rot_val = rol##size(src1, shift); \
1290 mask = mask_u##size(begin, end); \
1291 if (insert) { \
1292 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1293 } else { \
1294 r->element[i] = (rot_val & mask); \
1299 VRLMI(VRLDMI, 64, u64, 1);
1300 VRLMI(VRLWMI, 32, u32, 1);
1301 VRLMI(VRLDNM, 64, u64, 0);
1302 VRLMI(VRLWNM, 32, u32, 0);
1304 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1306 int i;
1308 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1309 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1313 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1315 int i;
1317 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1318 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1322 #define VEXTU_X_DO(name, size, left) \
1323 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1325 int index = (a & 0xf) * 8; \
1326 if (left) { \
1327 index = 128 - index - size; \
1329 return int128_getlo(int128_rshift(b->s128, index)) & \
1330 MAKE_64BIT_MASK(0, size); \
1332 VEXTU_X_DO(vextublx, 8, 1)
1333 VEXTU_X_DO(vextuhlx, 16, 1)
1334 VEXTU_X_DO(vextuwlx, 32, 1)
1335 VEXTU_X_DO(vextubrx, 8, 0)
1336 VEXTU_X_DO(vextuhrx, 16, 0)
1337 VEXTU_X_DO(vextuwrx, 32, 0)
1338 #undef VEXTU_X_DO
1340 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1342 int i;
1343 unsigned int shift, bytes, size;
1345 size = ARRAY_SIZE(r->u8);
1346 for (i = 0; i < size; i++) {
1347 shift = b->VsrB(i) & 0x7; /* extract shift value */
1348 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1349 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1350 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1354 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1356 int i;
1357 unsigned int shift, bytes;
1360 * Use reverse order, as destination and source register can be
1361 * same. Its being modified in place saving temporary, reverse
1362 * order will guarantee that computed result is not fed back.
1364 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1365 shift = b->VsrB(i) & 0x7; /* extract shift value */
1366 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1367 /* extract adjacent bytes */
1368 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1372 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1374 int sh = shift & 0xf;
1375 int i;
1376 ppc_avr_t result;
1378 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1379 int index = sh + i;
1380 if (index > 0xf) {
1381 result.VsrB(i) = b->VsrB(index - 0x10);
1382 } else {
1383 result.VsrB(i) = a->VsrB(index);
1386 *r = result;
1389 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1391 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1393 #if defined(HOST_WORDS_BIGENDIAN)
1394 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1395 memset(&r->u8[16 - sh], 0, sh);
1396 #else
1397 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1398 memset(&r->u8[0], 0, sh);
1399 #endif
1402 #if defined(HOST_WORDS_BIGENDIAN)
1403 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1404 #else
1405 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1406 #endif
1408 #define VINSX(SUFFIX, TYPE) \
1409 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1410 uint64_t val, target_ulong index) \
1412 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1413 target_long idx = index; \
1415 if (idx < 0 || idx > maxidx) { \
1416 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1417 qemu_log_mask(LOG_GUEST_ERROR, \
1418 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1419 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1420 } else { \
1421 TYPE src = val; \
1422 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1425 VINSX(B, uint8_t)
1426 VINSX(H, uint16_t)
1427 VINSX(W, uint32_t)
1428 VINSX(D, uint64_t)
1429 #undef ELEM_ADDR
1430 #undef VINSX
1431 #if defined(HOST_WORDS_BIGENDIAN)
1432 #define VEXTDVLX(NAME, SIZE) \
1433 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1434 target_ulong index) \
1436 const target_long idx = index; \
1437 ppc_avr_t tmp[2] = { *a, *b }; \
1438 memset(t, 0, sizeof(*t)); \
1439 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1440 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1441 } else { \
1442 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1443 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1444 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1447 #else
1448 #define VEXTDVLX(NAME, SIZE) \
1449 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1450 target_ulong index) \
1452 const target_long idx = index; \
1453 ppc_avr_t tmp[2] = { *b, *a }; \
1454 memset(t, 0, sizeof(*t)); \
1455 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1456 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1457 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1458 } else { \
1459 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1460 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1461 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1464 #endif
1465 VEXTDVLX(VEXTDUBVLX, 1)
1466 VEXTDVLX(VEXTDUHVLX, 2)
1467 VEXTDVLX(VEXTDUWVLX, 4)
1468 VEXTDVLX(VEXTDDVLX, 8)
1469 #undef VEXTDVLX
1470 #if defined(HOST_WORDS_BIGENDIAN)
1471 #define VEXTRACT(suffix, element) \
1472 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1474 uint32_t es = sizeof(r->element[0]); \
1475 memmove(&r->u8[8 - es], &b->u8[index], es); \
1476 memset(&r->u8[8], 0, 8); \
1477 memset(&r->u8[0], 0, 8 - es); \
1479 #else
1480 #define VEXTRACT(suffix, element) \
1481 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1483 uint32_t es = sizeof(r->element[0]); \
1484 uint32_t s = (16 - index) - es; \
1485 memmove(&r->u8[8], &b->u8[s], es); \
1486 memset(&r->u8[0], 0, 8); \
1487 memset(&r->u8[8 + es], 0, 8 - es); \
1489 #endif
1490 VEXTRACT(ub, u8)
1491 VEXTRACT(uh, u16)
1492 VEXTRACT(uw, u32)
1493 VEXTRACT(d, u64)
1494 #undef VEXTRACT
1496 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1497 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1499 int i, idx, crf = 0; \
1501 for (i = 0; i < NUM_ELEMS; i++) { \
1502 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1503 if (b->Vsr##ELEM(idx)) { \
1504 t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \
1505 } else { \
1506 crf = 0b0010; \
1507 break; \
1511 for (; i < NUM_ELEMS; i++) { \
1512 idx = LEFT ? i : NUM_ELEMS - i - 1; \
1513 t->Vsr##ELEM(idx) = 0; \
1516 return crf; \
1518 VSTRI(VSTRIBL, B, 16, true)
1519 VSTRI(VSTRIBR, B, 16, false)
1520 VSTRI(VSTRIHL, H, 8, true)
1521 VSTRI(VSTRIHR, H, 8, false)
1522 #undef VSTRI
1524 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1525 ppc_vsr_t *xb, uint32_t index)
1527 ppc_vsr_t t = { };
1528 size_t es = sizeof(uint32_t);
1529 uint32_t ext_index;
1530 int i;
1532 ext_index = index;
1533 for (i = 0; i < es; i++, ext_index++) {
1534 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1537 *xt = t;
1540 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1541 ppc_vsr_t *xb, uint32_t index)
1543 ppc_vsr_t t = *xt;
1544 size_t es = sizeof(uint32_t);
1545 int ins_index, i = 0;
1547 ins_index = index;
1548 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1549 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1552 *xt = t;
1555 #define XXBLEND(name, sz) \
1556 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1557 ppc_avr_t *c, uint32_t desc) \
1559 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1560 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1561 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1564 XXBLEND(B, 8)
1565 XXBLEND(H, 16)
1566 XXBLEND(W, 32)
1567 XXBLEND(D, 64)
1568 #undef XXBLEND
1570 #define VNEG(name, element) \
1571 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1573 int i; \
1574 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1575 r->element[i] = -b->element[i]; \
1578 VNEG(vnegw, s32)
1579 VNEG(vnegd, s64)
1580 #undef VNEG
1582 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1584 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1586 #if defined(HOST_WORDS_BIGENDIAN)
1587 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1588 memset(&r->u8[0], 0, sh);
1589 #else
1590 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1591 memset(&r->u8[16 - sh], 0, sh);
1592 #endif
1595 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1597 int i;
1599 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1600 r->u32[i] = a->u32[i] >= b->u32[i];
1604 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1606 int64_t t;
1607 int i, upper;
1608 ppc_avr_t result;
1609 int sat = 0;
1611 upper = ARRAY_SIZE(r->s32) - 1;
1612 t = (int64_t)b->VsrSW(upper);
1613 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1614 t += a->VsrSW(i);
1615 result.VsrSW(i) = 0;
1617 result.VsrSW(upper) = cvtsdsw(t, &sat);
1618 *r = result;
1620 if (sat) {
1621 set_vscr_sat(env);
1625 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1627 int i, j, upper;
1628 ppc_avr_t result;
1629 int sat = 0;
1631 upper = 1;
1632 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1633 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1635 result.VsrD(i) = 0;
1636 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1637 t += a->VsrSW(2 * i + j);
1639 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1642 *r = result;
1643 if (sat) {
1644 set_vscr_sat(env);
1648 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1650 int i, j;
1651 int sat = 0;
1653 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1654 int64_t t = (int64_t)b->s32[i];
1656 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1657 t += a->s8[4 * i + j];
1659 r->s32[i] = cvtsdsw(t, &sat);
1662 if (sat) {
1663 set_vscr_sat(env);
1667 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1669 int sat = 0;
1670 int i;
1672 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1673 int64_t t = (int64_t)b->s32[i];
1675 t += a->s16[2 * i] + a->s16[2 * i + 1];
1676 r->s32[i] = cvtsdsw(t, &sat);
1679 if (sat) {
1680 set_vscr_sat(env);
1684 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1686 int i, j;
1687 int sat = 0;
1689 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1690 uint64_t t = (uint64_t)b->u32[i];
1692 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1693 t += a->u8[4 * i + j];
1695 r->u32[i] = cvtuduw(t, &sat);
1698 if (sat) {
1699 set_vscr_sat(env);
1703 #if defined(HOST_WORDS_BIGENDIAN)
1704 #define UPKHI 1
1705 #define UPKLO 0
1706 #else
1707 #define UPKHI 0
1708 #define UPKLO 1
1709 #endif
1710 #define VUPKPX(suffix, hi) \
1711 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1713 int i; \
1714 ppc_avr_t result; \
1716 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1717 uint16_t e = b->u16[hi ? i : i + 4]; \
1718 uint8_t a = (e >> 15) ? 0xff : 0; \
1719 uint8_t r = (e >> 10) & 0x1f; \
1720 uint8_t g = (e >> 5) & 0x1f; \
1721 uint8_t b = e & 0x1f; \
1723 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1725 *r = result; \
1727 VUPKPX(lpx, UPKLO)
1728 VUPKPX(hpx, UPKHI)
1729 #undef VUPKPX
1731 #define VUPK(suffix, unpacked, packee, hi) \
1732 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1734 int i; \
1735 ppc_avr_t result; \
1737 if (hi) { \
1738 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1739 result.unpacked[i] = b->packee[i]; \
1741 } else { \
1742 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1743 i++) { \
1744 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1747 *r = result; \
1749 VUPK(hsb, s16, s8, UPKHI)
1750 VUPK(hsh, s32, s16, UPKHI)
1751 VUPK(hsw, s64, s32, UPKHI)
1752 VUPK(lsb, s16, s8, UPKLO)
1753 VUPK(lsh, s32, s16, UPKLO)
1754 VUPK(lsw, s64, s32, UPKLO)
1755 #undef VUPK
1756 #undef UPKHI
1757 #undef UPKLO
1759 #define VGENERIC_DO(name, element) \
1760 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1762 int i; \
1764 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1765 r->element[i] = name(b->element[i]); \
1769 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1770 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1772 VGENERIC_DO(clzb, u8)
1773 VGENERIC_DO(clzh, u16)
1775 #undef clzb
1776 #undef clzh
1778 #define ctzb(v) ((v) ? ctz32(v) : 8)
1779 #define ctzh(v) ((v) ? ctz32(v) : 16)
1780 #define ctzw(v) ctz32((v))
1781 #define ctzd(v) ctz64((v))
1783 VGENERIC_DO(ctzb, u8)
1784 VGENERIC_DO(ctzh, u16)
1785 VGENERIC_DO(ctzw, u32)
1786 VGENERIC_DO(ctzd, u64)
1788 #undef ctzb
1789 #undef ctzh
1790 #undef ctzw
1791 #undef ctzd
1793 #define popcntb(v) ctpop8(v)
1794 #define popcnth(v) ctpop16(v)
1795 #define popcntw(v) ctpop32(v)
1796 #define popcntd(v) ctpop64(v)
1798 VGENERIC_DO(popcntb, u8)
1799 VGENERIC_DO(popcnth, u16)
1800 VGENERIC_DO(popcntw, u32)
1801 VGENERIC_DO(popcntd, u64)
1803 #undef popcntb
1804 #undef popcnth
1805 #undef popcntw
1806 #undef popcntd
1808 #undef VGENERIC_DO
1810 #if defined(HOST_WORDS_BIGENDIAN)
1811 #define QW_ONE { .u64 = { 0, 1 } }
1812 #else
1813 #define QW_ONE { .u64 = { 1, 0 } }
1814 #endif
1816 #ifndef CONFIG_INT128
1818 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1820 t->u64[0] = ~a.u64[0];
1821 t->u64[1] = ~a.u64[1];
1824 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1826 if (a.VsrD(0) < b.VsrD(0)) {
1827 return -1;
1828 } else if (a.VsrD(0) > b.VsrD(0)) {
1829 return 1;
1830 } else if (a.VsrD(1) < b.VsrD(1)) {
1831 return -1;
1832 } else if (a.VsrD(1) > b.VsrD(1)) {
1833 return 1;
1834 } else {
1835 return 0;
1839 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1841 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1842 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1843 (~a.VsrD(1) < b.VsrD(1));
1846 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1848 ppc_avr_t not_a;
1849 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1850 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1851 (~a.VsrD(1) < b.VsrD(1));
1852 avr_qw_not(&not_a, a);
1853 return avr_qw_cmpu(not_a, b) < 0;
1856 #endif
1858 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1860 #ifdef CONFIG_INT128
1861 r->u128 = a->u128 + b->u128;
1862 #else
1863 avr_qw_add(r, *a, *b);
1864 #endif
1867 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1869 #ifdef CONFIG_INT128
1870 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1871 #else
1873 if (c->VsrD(1) & 1) {
1874 ppc_avr_t tmp;
1876 tmp.VsrD(0) = 0;
1877 tmp.VsrD(1) = c->VsrD(1) & 1;
1878 avr_qw_add(&tmp, *a, tmp);
1879 avr_qw_add(r, tmp, *b);
1880 } else {
1881 avr_qw_add(r, *a, *b);
1883 #endif
1886 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1888 #ifdef CONFIG_INT128
1889 r->u128 = (~a->u128 < b->u128);
1890 #else
1891 ppc_avr_t not_a;
1893 avr_qw_not(&not_a, *a);
1895 r->VsrD(0) = 0;
1896 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1897 #endif
1900 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1902 #ifdef CONFIG_INT128
1903 int carry_out = (~a->u128 < b->u128);
1904 if (!carry_out && (c->u128 & 1)) {
1905 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1906 ((a->u128 != 0) || (b->u128 != 0));
1908 r->u128 = carry_out;
1909 #else
1911 int carry_in = c->VsrD(1) & 1;
1912 int carry_out = 0;
1913 ppc_avr_t tmp;
1915 carry_out = avr_qw_addc(&tmp, *a, *b);
1917 if (!carry_out && carry_in) {
1918 ppc_avr_t one = QW_ONE;
1919 carry_out = avr_qw_addc(&tmp, tmp, one);
1921 r->VsrD(0) = 0;
1922 r->VsrD(1) = carry_out;
1923 #endif
1926 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1928 #ifdef CONFIG_INT128
1929 r->u128 = a->u128 - b->u128;
1930 #else
1931 ppc_avr_t tmp;
1932 ppc_avr_t one = QW_ONE;
1934 avr_qw_not(&tmp, *b);
1935 avr_qw_add(&tmp, *a, tmp);
1936 avr_qw_add(r, tmp, one);
1937 #endif
1940 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1942 #ifdef CONFIG_INT128
1943 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1944 #else
1945 ppc_avr_t tmp, sum;
1947 avr_qw_not(&tmp, *b);
1948 avr_qw_add(&sum, *a, tmp);
1950 tmp.VsrD(0) = 0;
1951 tmp.VsrD(1) = c->VsrD(1) & 1;
1952 avr_qw_add(r, sum, tmp);
1953 #endif
1956 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1958 #ifdef CONFIG_INT128
1959 r->u128 = (~a->u128 < ~b->u128) ||
1960 (a->u128 + ~b->u128 == (__uint128_t)-1);
1961 #else
1962 int carry = (avr_qw_cmpu(*a, *b) > 0);
1963 if (!carry) {
1964 ppc_avr_t tmp;
1965 avr_qw_not(&tmp, *b);
1966 avr_qw_add(&tmp, *a, tmp);
1967 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
1969 r->VsrD(0) = 0;
1970 r->VsrD(1) = carry;
1971 #endif
1974 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1976 #ifdef CONFIG_INT128
1977 r->u128 =
1978 (~a->u128 < ~b->u128) ||
1979 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
1980 #else
1981 int carry_in = c->VsrD(1) & 1;
1982 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
1983 if (!carry_out && carry_in) {
1984 ppc_avr_t tmp;
1985 avr_qw_not(&tmp, *b);
1986 avr_qw_add(&tmp, *a, tmp);
1987 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
1990 r->VsrD(0) = 0;
1991 r->VsrD(1) = carry_out;
1992 #endif
1995 #define BCD_PLUS_PREF_1 0xC
1996 #define BCD_PLUS_PREF_2 0xF
1997 #define BCD_PLUS_ALT_1 0xA
1998 #define BCD_NEG_PREF 0xD
1999 #define BCD_NEG_ALT 0xB
2000 #define BCD_PLUS_ALT_2 0xE
2001 #define NATIONAL_PLUS 0x2B
2002 #define NATIONAL_NEG 0x2D
2004 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2006 static int bcd_get_sgn(ppc_avr_t *bcd)
2008 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2009 case BCD_PLUS_PREF_1:
2010 case BCD_PLUS_PREF_2:
2011 case BCD_PLUS_ALT_1:
2012 case BCD_PLUS_ALT_2:
2014 return 1;
2017 case BCD_NEG_PREF:
2018 case BCD_NEG_ALT:
2020 return -1;
2023 default:
2025 return 0;
2030 static int bcd_preferred_sgn(int sgn, int ps)
2032 if (sgn >= 0) {
2033 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2034 } else {
2035 return BCD_NEG_PREF;
2039 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2041 uint8_t result;
2042 if (n & 1) {
2043 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2044 } else {
2045 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2048 if (unlikely(result > 9)) {
2049 *invalid = true;
2051 return result;
2054 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2056 if (n & 1) {
2057 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2058 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2059 } else {
2060 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2061 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2065 static bool bcd_is_valid(ppc_avr_t *bcd)
2067 int i;
2068 int invalid = 0;
2070 if (bcd_get_sgn(bcd) == 0) {
2071 return false;
2074 for (i = 1; i < 32; i++) {
2075 bcd_get_digit(bcd, i, &invalid);
2076 if (unlikely(invalid)) {
2077 return false;
2080 return true;
2083 static int bcd_cmp_zero(ppc_avr_t *bcd)
2085 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2086 return CRF_EQ;
2087 } else {
2088 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2092 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2094 return reg->VsrH(7 - n);
2097 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2099 reg->VsrH(7 - n) = val;
2102 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2104 int i;
2105 int invalid = 0;
2106 for (i = 31; i > 0; i--) {
2107 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2108 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2109 if (unlikely(invalid)) {
2110 return 0; /* doesn't matter */
2111 } else if (dig_a > dig_b) {
2112 return 1;
2113 } else if (dig_a < dig_b) {
2114 return -1;
2118 return 0;
2121 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2122 int *overflow)
2124 int carry = 0;
2125 int i;
2126 int is_zero = 1;
2128 for (i = 1; i <= 31; i++) {
2129 uint8_t digit = bcd_get_digit(a, i, invalid) +
2130 bcd_get_digit(b, i, invalid) + carry;
2131 is_zero &= (digit == 0);
2132 if (digit > 9) {
2133 carry = 1;
2134 digit -= 10;
2135 } else {
2136 carry = 0;
2139 bcd_put_digit(t, digit, i);
2142 *overflow = carry;
2143 return is_zero;
2146 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2147 int *overflow)
2149 int carry = 0;
2150 int i;
2152 for (i = 1; i <= 31; i++) {
2153 uint8_t digit = bcd_get_digit(a, i, invalid) -
2154 bcd_get_digit(b, i, invalid) + carry;
2155 if (digit & 0x80) {
2156 carry = -1;
2157 digit += 10;
2158 } else {
2159 carry = 0;
2162 bcd_put_digit(t, digit, i);
2165 *overflow = carry;
2168 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2171 int sgna = bcd_get_sgn(a);
2172 int sgnb = bcd_get_sgn(b);
2173 int invalid = (sgna == 0) || (sgnb == 0);
2174 int overflow = 0;
2175 int zero = 0;
2176 uint32_t cr = 0;
2177 ppc_avr_t result = { .u64 = { 0, 0 } };
2179 if (!invalid) {
2180 if (sgna == sgnb) {
2181 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2182 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2183 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2184 } else {
2185 int magnitude = bcd_cmp_mag(a, b);
2186 if (magnitude > 0) {
2187 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2188 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2189 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2190 } else if (magnitude < 0) {
2191 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2192 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2193 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2194 } else {
2195 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2196 cr = CRF_EQ;
2201 if (unlikely(invalid)) {
2202 result.VsrD(0) = result.VsrD(1) = -1;
2203 cr = CRF_SO;
2204 } else if (overflow) {
2205 cr |= CRF_SO;
2206 } else if (zero) {
2207 cr |= CRF_EQ;
2210 *r = result;
2212 return cr;
2215 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2217 ppc_avr_t bcopy = *b;
2218 int sgnb = bcd_get_sgn(b);
2219 if (sgnb < 0) {
2220 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2221 } else if (sgnb > 0) {
2222 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2224 /* else invalid ... defer to bcdadd code for proper handling */
2226 return helper_bcdadd(r, a, &bcopy, ps);
2229 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2231 int i;
2232 int cr = 0;
2233 uint16_t national = 0;
2234 uint16_t sgnb = get_national_digit(b, 0);
2235 ppc_avr_t ret = { .u64 = { 0, 0 } };
2236 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2238 for (i = 1; i < 8; i++) {
2239 national = get_national_digit(b, i);
2240 if (unlikely(national < 0x30 || national > 0x39)) {
2241 invalid = 1;
2242 break;
2245 bcd_put_digit(&ret, national & 0xf, i);
2248 if (sgnb == NATIONAL_PLUS) {
2249 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2250 } else {
2251 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2254 cr = bcd_cmp_zero(&ret);
2256 if (unlikely(invalid)) {
2257 cr = CRF_SO;
2260 *r = ret;
2262 return cr;
2265 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2267 int i;
2268 int cr = 0;
2269 int sgnb = bcd_get_sgn(b);
2270 int invalid = (sgnb == 0);
2271 ppc_avr_t ret = { .u64 = { 0, 0 } };
2273 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2275 for (i = 1; i < 8; i++) {
2276 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2278 if (unlikely(invalid)) {
2279 break;
2282 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2284 cr = bcd_cmp_zero(b);
2286 if (ox_flag) {
2287 cr |= CRF_SO;
2290 if (unlikely(invalid)) {
2291 cr = CRF_SO;
2294 *r = ret;
2296 return cr;
2299 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2301 int i;
2302 int cr = 0;
2303 int invalid = 0;
2304 int zone_digit = 0;
2305 int zone_lead = ps ? 0xF : 0x3;
2306 int digit = 0;
2307 ppc_avr_t ret = { .u64 = { 0, 0 } };
2308 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2310 if (unlikely((sgnb < 0xA) && ps)) {
2311 invalid = 1;
2314 for (i = 0; i < 16; i++) {
2315 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2316 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2317 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2318 invalid = 1;
2319 break;
2322 bcd_put_digit(&ret, digit, i + 1);
2325 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2326 (!ps && (sgnb & 0x4))) {
2327 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2328 } else {
2329 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2332 cr = bcd_cmp_zero(&ret);
2334 if (unlikely(invalid)) {
2335 cr = CRF_SO;
2338 *r = ret;
2340 return cr;
2343 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2345 int i;
2346 int cr = 0;
2347 uint8_t digit = 0;
2348 int sgnb = bcd_get_sgn(b);
2349 int zone_lead = (ps) ? 0xF0 : 0x30;
2350 int invalid = (sgnb == 0);
2351 ppc_avr_t ret = { .u64 = { 0, 0 } };
2353 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2355 for (i = 0; i < 16; i++) {
2356 digit = bcd_get_digit(b, i + 1, &invalid);
2358 if (unlikely(invalid)) {
2359 break;
2362 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2365 if (ps) {
2366 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2367 } else {
2368 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2371 cr = bcd_cmp_zero(b);
2373 if (ox_flag) {
2374 cr |= CRF_SO;
2377 if (unlikely(invalid)) {
2378 cr = CRF_SO;
2381 *r = ret;
2383 return cr;
2387 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2389 * Returns:
2390 * > 0 if ahi|alo > bhi|blo,
2391 * 0 if ahi|alo == bhi|blo,
2392 * < 0 if ahi|alo < bhi|blo
2394 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2395 uint64_t blo, uint64_t bhi)
2397 return (ahi == bhi) ?
2398 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2399 (ahi > bhi ? 1 : -1);
2402 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2404 int i;
2405 int cr;
2406 uint64_t lo_value;
2407 uint64_t hi_value;
2408 uint64_t rem;
2409 ppc_avr_t ret = { .u64 = { 0, 0 } };
2411 if (b->VsrSD(0) < 0) {
2412 lo_value = -b->VsrSD(1);
2413 hi_value = ~b->VsrD(0) + !lo_value;
2414 bcd_put_digit(&ret, 0xD, 0);
2416 cr = CRF_LT;
2417 } else {
2418 lo_value = b->VsrD(1);
2419 hi_value = b->VsrD(0);
2420 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2422 if (hi_value == 0 && lo_value == 0) {
2423 cr = CRF_EQ;
2424 } else {
2425 cr = CRF_GT;
2430 * Check src limits: abs(src) <= 10^31 - 1
2432 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2434 if (ucmp128(lo_value, hi_value,
2435 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2436 cr |= CRF_SO;
2439 * According to the ISA, if src wouldn't fit in the destination
2440 * register, the result is undefined.
2441 * In that case, we leave r unchanged.
2443 } else {
2444 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2446 for (i = 1; i < 16; rem /= 10, i++) {
2447 bcd_put_digit(&ret, rem % 10, i);
2450 for (; i < 32; lo_value /= 10, i++) {
2451 bcd_put_digit(&ret, lo_value % 10, i);
2454 *r = ret;
2457 return cr;
2460 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2462 uint8_t i;
2463 int cr;
2464 uint64_t carry;
2465 uint64_t unused;
2466 uint64_t lo_value;
2467 uint64_t hi_value = 0;
2468 int sgnb = bcd_get_sgn(b);
2469 int invalid = (sgnb == 0);
2471 lo_value = bcd_get_digit(b, 31, &invalid);
2472 for (i = 30; i > 0; i--) {
2473 mulu64(&lo_value, &carry, lo_value, 10ULL);
2474 mulu64(&hi_value, &unused, hi_value, 10ULL);
2475 lo_value += bcd_get_digit(b, i, &invalid);
2476 hi_value += carry;
2478 if (unlikely(invalid)) {
2479 break;
2483 if (sgnb == -1) {
2484 r->VsrSD(1) = -lo_value;
2485 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2486 } else {
2487 r->VsrSD(1) = lo_value;
2488 r->VsrSD(0) = hi_value;
2491 cr = bcd_cmp_zero(b);
2493 if (unlikely(invalid)) {
2494 cr = CRF_SO;
2497 return cr;
2500 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2502 int i;
2503 int invalid = 0;
2505 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2506 return CRF_SO;
2509 *r = *a;
2510 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2512 for (i = 1; i < 32; i++) {
2513 bcd_get_digit(a, i, &invalid);
2514 bcd_get_digit(b, i, &invalid);
2515 if (unlikely(invalid)) {
2516 return CRF_SO;
2520 return bcd_cmp_zero(r);
2523 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2525 int sgnb = bcd_get_sgn(b);
2527 *r = *b;
2528 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2530 if (bcd_is_valid(b) == false) {
2531 return CRF_SO;
2534 return bcd_cmp_zero(r);
2537 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2539 int cr;
2540 int i = a->VsrSB(7);
2541 bool ox_flag = false;
2542 int sgnb = bcd_get_sgn(b);
2543 ppc_avr_t ret = *b;
2544 ret.VsrD(1) &= ~0xf;
2546 if (bcd_is_valid(b) == false) {
2547 return CRF_SO;
2550 if (unlikely(i > 31)) {
2551 i = 31;
2552 } else if (unlikely(i < -31)) {
2553 i = -31;
2556 if (i > 0) {
2557 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2558 } else {
2559 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2561 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2563 *r = ret;
2565 cr = bcd_cmp_zero(r);
2566 if (ox_flag) {
2567 cr |= CRF_SO;
2570 return cr;
2573 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2575 int cr;
2576 int i;
2577 int invalid = 0;
2578 bool ox_flag = false;
2579 ppc_avr_t ret = *b;
2581 for (i = 0; i < 32; i++) {
2582 bcd_get_digit(b, i, &invalid);
2584 if (unlikely(invalid)) {
2585 return CRF_SO;
2589 i = a->VsrSB(7);
2590 if (i >= 32) {
2591 ox_flag = true;
2592 ret.VsrD(1) = ret.VsrD(0) = 0;
2593 } else if (i <= -32) {
2594 ret.VsrD(1) = ret.VsrD(0) = 0;
2595 } else if (i > 0) {
2596 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2597 } else {
2598 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2600 *r = ret;
2602 cr = bcd_cmp_zero(r);
2603 if (ox_flag) {
2604 cr |= CRF_SO;
2607 return cr;
2610 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2612 int cr;
2613 int unused = 0;
2614 int invalid = 0;
2615 bool ox_flag = false;
2616 int sgnb = bcd_get_sgn(b);
2617 ppc_avr_t ret = *b;
2618 ret.VsrD(1) &= ~0xf;
2620 int i = a->VsrSB(7);
2621 ppc_avr_t bcd_one;
2623 bcd_one.VsrD(0) = 0;
2624 bcd_one.VsrD(1) = 0x10;
2626 if (bcd_is_valid(b) == false) {
2627 return CRF_SO;
2630 if (unlikely(i > 31)) {
2631 i = 31;
2632 } else if (unlikely(i < -31)) {
2633 i = -31;
2636 if (i > 0) {
2637 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2638 } else {
2639 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2641 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2642 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2645 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2647 cr = bcd_cmp_zero(&ret);
2648 if (ox_flag) {
2649 cr |= CRF_SO;
2651 *r = ret;
2653 return cr;
2656 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2658 uint64_t mask;
2659 uint32_t ox_flag = 0;
2660 int i = a->VsrSH(3) + 1;
2661 ppc_avr_t ret = *b;
2663 if (bcd_is_valid(b) == false) {
2664 return CRF_SO;
2667 if (i > 16 && i < 32) {
2668 mask = (uint64_t)-1 >> (128 - i * 4);
2669 if (ret.VsrD(0) & ~mask) {
2670 ox_flag = CRF_SO;
2673 ret.VsrD(0) &= mask;
2674 } else if (i >= 0 && i <= 16) {
2675 mask = (uint64_t)-1 >> (64 - i * 4);
2676 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2677 ox_flag = CRF_SO;
2680 ret.VsrD(1) &= mask;
2681 ret.VsrD(0) = 0;
2683 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2684 *r = ret;
2686 return bcd_cmp_zero(&ret) | ox_flag;
2689 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2691 int i;
2692 uint64_t mask;
2693 uint32_t ox_flag = 0;
2694 int invalid = 0;
2695 ppc_avr_t ret = *b;
2697 for (i = 0; i < 32; i++) {
2698 bcd_get_digit(b, i, &invalid);
2700 if (unlikely(invalid)) {
2701 return CRF_SO;
2705 i = a->VsrSH(3);
2706 if (i > 16 && i < 33) {
2707 mask = (uint64_t)-1 >> (128 - i * 4);
2708 if (ret.VsrD(0) & ~mask) {
2709 ox_flag = CRF_SO;
2712 ret.VsrD(0) &= mask;
2713 } else if (i > 0 && i <= 16) {
2714 mask = (uint64_t)-1 >> (64 - i * 4);
2715 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2716 ox_flag = CRF_SO;
2719 ret.VsrD(1) &= mask;
2720 ret.VsrD(0) = 0;
2721 } else if (i == 0) {
2722 if (ret.VsrD(0) || ret.VsrD(1)) {
2723 ox_flag = CRF_SO;
2725 ret.VsrD(0) = ret.VsrD(1) = 0;
2728 *r = ret;
2729 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2730 return ox_flag | CRF_EQ;
2733 return ox_flag | CRF_GT;
2736 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2738 int i;
2739 VECTOR_FOR_INORDER_I(i, u8) {
2740 r->u8[i] = AES_sbox[a->u8[i]];
2744 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2746 ppc_avr_t result;
2747 int i;
2749 VECTOR_FOR_INORDER_I(i, u32) {
2750 result.VsrW(i) = b->VsrW(i) ^
2751 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2752 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2753 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2754 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2756 *r = result;
2759 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2761 ppc_avr_t result;
2762 int i;
2764 VECTOR_FOR_INORDER_I(i, u8) {
2765 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2767 *r = result;
2770 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2772 /* This differs from what is written in ISA V2.07. The RTL is */
2773 /* incorrect and will be fixed in V2.07B. */
2774 int i;
2775 ppc_avr_t tmp;
2777 VECTOR_FOR_INORDER_I(i, u8) {
2778 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2781 VECTOR_FOR_INORDER_I(i, u32) {
2782 r->VsrW(i) =
2783 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2784 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2785 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2786 AES_imc[tmp.VsrB(4 * i + 3)][3];
2790 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2792 ppc_avr_t result;
2793 int i;
2795 VECTOR_FOR_INORDER_I(i, u8) {
2796 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2798 *r = result;
2801 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2803 int st = (st_six & 0x10) != 0;
2804 int six = st_six & 0xF;
2805 int i;
2807 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2808 if (st == 0) {
2809 if ((six & (0x8 >> i)) == 0) {
2810 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2811 ror32(a->VsrW(i), 18) ^
2812 (a->VsrW(i) >> 3);
2813 } else { /* six.bit[i] == 1 */
2814 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2815 ror32(a->VsrW(i), 19) ^
2816 (a->VsrW(i) >> 10);
2818 } else { /* st == 1 */
2819 if ((six & (0x8 >> i)) == 0) {
2820 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2821 ror32(a->VsrW(i), 13) ^
2822 ror32(a->VsrW(i), 22);
2823 } else { /* six.bit[i] == 1 */
2824 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2825 ror32(a->VsrW(i), 11) ^
2826 ror32(a->VsrW(i), 25);
2832 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2834 int st = (st_six & 0x10) != 0;
2835 int six = st_six & 0xF;
2836 int i;
2838 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2839 if (st == 0) {
2840 if ((six & (0x8 >> (2 * i))) == 0) {
2841 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2842 ror64(a->VsrD(i), 8) ^
2843 (a->VsrD(i) >> 7);
2844 } else { /* six.bit[2*i] == 1 */
2845 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2846 ror64(a->VsrD(i), 61) ^
2847 (a->VsrD(i) >> 6);
2849 } else { /* st == 1 */
2850 if ((six & (0x8 >> (2 * i))) == 0) {
2851 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2852 ror64(a->VsrD(i), 34) ^
2853 ror64(a->VsrD(i), 39);
2854 } else { /* six.bit[2*i] == 1 */
2855 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2856 ror64(a->VsrD(i), 18) ^
2857 ror64(a->VsrD(i), 41);
2863 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2865 ppc_avr_t result;
2866 int i;
2868 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2869 int indexA = c->VsrB(i) >> 4;
2870 int indexB = c->VsrB(i) & 0xF;
2872 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2874 *r = result;
2877 #undef VECTOR_FOR_INORDER_I
2879 /*****************************************************************************/
2880 /* SPE extension helpers */
2881 /* Use a table to make this quicker */
2882 static const uint8_t hbrev[16] = {
2883 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2884 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2887 static inline uint8_t byte_reverse(uint8_t val)
2889 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2892 static inline uint32_t word_reverse(uint32_t val)
2894 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2895 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2898 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2899 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2901 uint32_t a, b, d, mask;
2903 mask = UINT32_MAX >> (32 - MASKBITS);
2904 a = arg1 & mask;
2905 b = arg2 & mask;
2906 d = word_reverse(1 + word_reverse(a | ~b));
2907 return (arg1 & ~mask) | (d & b);
2910 uint32_t helper_cntlsw32(uint32_t val)
2912 if (val & 0x80000000) {
2913 return clz32(~val);
2914 } else {
2915 return clz32(val);
2919 uint32_t helper_cntlzw32(uint32_t val)
2921 return clz32(val);
2924 /* 440 specific */
2925 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2926 target_ulong low, uint32_t update_Rc)
2928 target_ulong mask;
2929 int i;
2931 i = 1;
2932 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2933 if ((high & mask) == 0) {
2934 if (update_Rc) {
2935 env->crf[0] = 0x4;
2937 goto done;
2939 i++;
2941 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2942 if ((low & mask) == 0) {
2943 if (update_Rc) {
2944 env->crf[0] = 0x8;
2946 goto done;
2948 i++;
2950 i = 8;
2951 if (update_Rc) {
2952 env->crf[0] = 0x2;
2954 done:
2955 env->xer = (env->xer & ~0x7F) | i;
2956 if (update_Rc) {
2957 env->crf[0] |= xer_so;
2959 return i;