target/ppc: Move Vector Compare Not Equal or Zero to decodetree
[qemu.git] / target / ppc / int_helper.c
blobf31dba946942e58099e2a090332f371a11c01f23
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
48 uint64_t rt = 0;
49 int overflow = 0;
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
69 return (target_ulong)rt;
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
75 int64_t rt = 0;
76 int overflow = 0;
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
97 return (target_ulong)rt;
100 #if defined(TARGET_PPC64)
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 uint64_t rt = 0;
105 int overflow = 0;
107 if (unlikely(rb == 0 || ra >= rb)) {
108 overflow = 1;
109 rt = 0; /* Undefined */
110 } else {
111 divu128(&rt, &ra, rb);
114 if (oe) {
115 helper_update_ov_legacy(env, overflow);
118 return rt;
121 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
123 uint64_t rt = 0;
124 int64_t ra = (int64_t)rau;
125 int64_t rb = (int64_t)rbu;
126 int overflow = 0;
128 if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
129 overflow = 1;
130 rt = 0; /* Undefined */
131 } else {
132 divs128(&rt, &ra, rb);
135 if (oe) {
136 helper_update_ov_legacy(env, overflow);
139 return rt;
142 #endif
145 #if defined(TARGET_PPC64)
146 /* if x = 0xab, returns 0xababababababababa */
147 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
151 * byte.
152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
153 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
155 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
157 /* When you XOR the pattern and there is a match, that byte will be zero */
158 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
160 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
162 return hasvalue(rb, ra) ? CRF_GT : 0;
165 #undef pattern
166 #undef haszero
167 #undef hasvalue
170 * Return a random number.
172 uint64_t helper_darn32(void)
174 Error *err = NULL;
175 uint32_t ret;
177 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
178 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
179 error_get_pretty(err));
180 error_free(err);
181 return -1;
184 return ret;
187 uint64_t helper_darn64(void)
189 Error *err = NULL;
190 uint64_t ret;
192 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
193 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
194 error_get_pretty(err));
195 error_free(err);
196 return -1;
199 return ret;
202 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
204 int i;
205 uint64_t ra = 0;
207 for (i = 0; i < 8; i++) {
208 int index = (rs >> (i * 8)) & 0xFF;
209 if (index < 64) {
210 if (rb & PPC_BIT(index)) {
211 ra |= 1 << i;
215 return ra;
218 #endif
220 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
222 target_ulong mask = 0xff;
223 target_ulong ra = 0;
224 int i;
226 for (i = 0; i < sizeof(target_ulong); i++) {
227 if ((rs & mask) == (rb & mask)) {
228 ra |= mask;
230 mask <<= 8;
232 return ra;
235 /* shift right arithmetic helper */
236 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
237 target_ulong shift)
239 int32_t ret;
241 if (likely(!(shift & 0x20))) {
242 if (likely((uint32_t)shift != 0)) {
243 shift &= 0x1f;
244 ret = (int32_t)value >> shift;
245 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 env->ca32 = env->ca = 0;
247 } else {
248 env->ca32 = env->ca = 1;
250 } else {
251 ret = (int32_t)value;
252 env->ca32 = env->ca = 0;
254 } else {
255 ret = (int32_t)value >> 31;
256 env->ca32 = env->ca = (ret != 0);
258 return (target_long)ret;
261 #if defined(TARGET_PPC64)
262 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
263 target_ulong shift)
265 int64_t ret;
267 if (likely(!(shift & 0x40))) {
268 if (likely((uint64_t)shift != 0)) {
269 shift &= 0x3f;
270 ret = (int64_t)value >> shift;
271 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
272 env->ca32 = env->ca = 0;
273 } else {
274 env->ca32 = env->ca = 1;
276 } else {
277 ret = (int64_t)value;
278 env->ca32 = env->ca = 0;
280 } else {
281 ret = (int64_t)value >> 63;
282 env->ca32 = env->ca = (ret != 0);
284 return ret;
286 #endif
288 #if defined(TARGET_PPC64)
289 target_ulong helper_popcntb(target_ulong val)
291 /* Note that we don't fold past bytes */
292 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
293 0x5555555555555555ULL);
294 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
295 0x3333333333333333ULL);
296 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
297 0x0f0f0f0f0f0f0f0fULL);
298 return val;
301 target_ulong helper_popcntw(target_ulong val)
303 /* Note that we don't fold past words. */
304 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
305 0x5555555555555555ULL);
306 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
307 0x3333333333333333ULL);
308 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
309 0x0f0f0f0f0f0f0f0fULL);
310 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
311 0x00ff00ff00ff00ffULL);
312 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
313 0x0000ffff0000ffffULL);
314 return val;
316 #else
317 target_ulong helper_popcntb(target_ulong val)
319 /* Note that we don't fold past bytes */
320 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
321 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
322 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
323 return val;
325 #endif
327 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
330 * Instead of processing the mask bit-by-bit from the most significant to
331 * the least significant bit, as described in PowerISA, we'll handle it in
332 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
333 * ctz or cto, we negate the mask at the end of the loop.
335 target_ulong m, left = 0, right = 0;
336 unsigned int n, i = 64;
337 bool bit = false; /* tracks if we are processing zeros or ones */
339 if (mask == 0 || mask == -1) {
340 return src;
343 /* Processes the mask in blocks, from LSB to MSB */
344 while (i) {
345 /* Find how many bits we should take */
346 n = ctz64(mask);
347 if (n > i) {
348 n = i;
352 * Extracts 'n' trailing bits of src and put them on the leading 'n'
353 * bits of 'right' or 'left', pushing down the previously extracted
354 * values.
356 m = (1ll << n) - 1;
357 if (bit) {
358 right = ror64(right | (src & m), n);
359 } else {
360 left = ror64(left | (src & m), n);
364 * Discards the processed bits from 'src' and 'mask'. Note that we are
365 * removing 'n' trailing zeros from 'mask', but the logical shift will
366 * add 'n' leading zeros back, so the population count of 'mask' is kept
367 * the same.
369 src >>= n;
370 mask >>= n;
371 i -= n;
372 bit = !bit;
373 mask = ~mask;
377 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
378 * we'll shift it more 64-ctpop(mask) times.
380 if (bit) {
381 n = ctpop64(mask);
382 } else {
383 n = 64 - ctpop64(mask);
386 return left | (right >> n);
389 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
391 int i, o;
392 uint64_t result = 0;
394 if (mask == -1) {
395 return src;
398 for (i = 0; mask != 0; i++) {
399 o = ctz64(mask);
400 mask &= mask - 1;
401 result |= ((src >> i) & 1) << o;
404 return result;
407 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
409 int i, o;
410 uint64_t result = 0;
412 if (mask == -1) {
413 return src;
416 for (o = 0; mask != 0; o++) {
417 i = ctz64(mask);
418 mask &= mask - 1;
419 result |= ((src >> i) & 1) << o;
422 return result;
425 /*****************************************************************************/
426 /* Altivec extension helpers */
427 #if defined(HOST_WORDS_BIGENDIAN)
428 #define VECTOR_FOR_INORDER_I(index, element) \
429 for (index = 0; index < ARRAY_SIZE(r->element); index++)
430 #else
431 #define VECTOR_FOR_INORDER_I(index, element) \
432 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
433 #endif
435 /* Saturating arithmetic helpers. */
436 #define SATCVT(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
439 to_type r; \
441 if (x < (from_type)min) { \
442 r = min; \
443 *sat = 1; \
444 } else if (x > (from_type)max) { \
445 r = max; \
446 *sat = 1; \
447 } else { \
448 r = x; \
450 return r; \
452 #define SATCVTU(from, to, from_type, to_type, min, max) \
453 static inline to_type cvt##from##to(from_type x, int *sat) \
455 to_type r; \
457 if (x > (from_type)max) { \
458 r = max; \
459 *sat = 1; \
460 } else { \
461 r = x; \
463 return r; \
465 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
466 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
467 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
469 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
470 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
471 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
472 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
473 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
474 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
475 #undef SATCVT
476 #undef SATCVTU
478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 ppc_store_vscr(env, vscr);
483 uint32_t helper_mfvscr(CPUPPCState *env)
485 return ppc_get_vscr(env);
488 static inline void set_vscr_sat(CPUPPCState *env)
490 /* The choice of non-zero value is arbitrary. */
491 env->vscr_sat.u32[0] = 1;
494 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
496 int i;
498 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
499 r->u32[i] = ~a->u32[i] < b->u32[i];
503 /* vprtybw */
504 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
508 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
509 res ^= res >> 8;
510 r->u32[i] = res & 1;
514 /* vprtybd */
515 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
517 int i;
518 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
519 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->u64[i] = res & 1;
526 /* vprtybq */
527 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
529 uint64_t res = b->u64[0] ^ b->u64[1];
530 res ^= res >> 32;
531 res ^= res >> 16;
532 res ^= res >> 8;
533 r->VsrD(1) = res & 1;
534 r->VsrD(0) = 0;
537 #define VARITHFP(suffix, func) \
538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
541 int i; \
543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
547 VARITHFP(addfp, float32_add)
548 VARITHFP(subfp, float32_sub)
549 VARITHFP(minfp, float32_min)
550 VARITHFP(maxfp, float32_max)
551 #undef VARITHFP
553 #define VARITHFPFMA(suffix, type) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b, ppc_avr_t *c) \
557 int i; \
558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
560 type, &env->vec_status); \
563 VARITHFPFMA(maddfp, 0);
564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
565 #undef VARITHFPFMA
567 #define VARITHSAT_CASE(type, op, cvt, element) \
569 type result = (type)a->element[i] op (type)b->element[i]; \
570 r->element[i] = cvt(result, &sat); \
573 #define VARITHSAT_DO(name, op, optype, cvt, element) \
574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
577 int sat = 0; \
578 int i; \
580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
581 VARITHSAT_CASE(optype, op, cvt, element); \
583 if (sat) { \
584 vscr_sat->u32[0] = 1; \
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
600 #undef VARITHSAT_DO
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 int i; \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
616 unsigned_type) \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
622 #undef VAVG_DO
623 #undef VAVG
625 #define VABSDU_DO(name, element) \
626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 int i; \
630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
631 r->element[i] = (a->element[i] > b->element[i]) ? \
632 (a->element[i] - b->element[i]) : \
633 (b->element[i] - a->element[i]); \
638 * VABSDU - Vector absolute difference unsigned
639 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
640 * element - element type to access from vector
642 #define VABSDU(type, element) \
643 VABSDU_DO(absdu##type, element)
644 VABSDU(b, u8)
645 VABSDU(h, u16)
646 VABSDU(w, u32)
647 #undef VABSDU_DO
648 #undef VABSDU
650 #define VCF(suffix, cvt, element) \
651 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
652 ppc_avr_t *b, uint32_t uim) \
654 int i; \
656 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
657 float32 t = cvt(b->element[i], &env->vec_status); \
658 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
661 VCF(ux, uint32_to_float32, u32)
662 VCF(sx, int32_to_float32, s32)
663 #undef VCF
665 #define VCMPNEZ(NAME, ELEM) \
666 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
668 for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \
669 t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \
670 (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \
673 VCMPNEZ(VCMPNEZB, u8)
674 VCMPNEZ(VCMPNEZH, u16)
675 VCMPNEZ(VCMPNEZW, u32)
676 #undef VCMPNEZ
678 #define VCMPFP_DO(suffix, compare, order, record) \
679 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
680 ppc_avr_t *a, ppc_avr_t *b) \
682 uint32_t ones = (uint32_t)-1; \
683 uint32_t all = ones; \
684 uint32_t none = 0; \
685 int i; \
687 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
688 uint32_t result; \
689 FloatRelation rel = \
690 float32_compare_quiet(a->f32[i], b->f32[i], \
691 &env->vec_status); \
692 if (rel == float_relation_unordered) { \
693 result = 0; \
694 } else if (rel compare order) { \
695 result = ones; \
696 } else { \
697 result = 0; \
699 r->u32[i] = result; \
700 all &= result; \
701 none |= result; \
703 if (record) { \
704 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
707 #define VCMPFP(suffix, compare, order) \
708 VCMPFP_DO(suffix, compare, order, 0) \
709 VCMPFP_DO(suffix##_dot, compare, order, 1)
710 VCMPFP(eqfp, ==, float_relation_equal)
711 VCMPFP(gefp, !=, float_relation_less)
712 VCMPFP(gtfp, ==, float_relation_greater)
713 #undef VCMPFP_DO
714 #undef VCMPFP
716 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
717 ppc_avr_t *a, ppc_avr_t *b, int record)
719 int i;
720 int all_in = 0;
722 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
723 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
724 &env->vec_status);
725 if (le_rel == float_relation_unordered) {
726 r->u32[i] = 0xc0000000;
727 all_in = 1;
728 } else {
729 float32 bneg = float32_chs(b->f32[i]);
730 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
731 &env->vec_status);
732 int le = le_rel != float_relation_greater;
733 int ge = ge_rel != float_relation_less;
735 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
736 all_in |= (!le | !ge);
739 if (record) {
740 env->crf[6] = (all_in == 0) << 1;
744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 vcmpbfp_internal(env, r, a, b, 0);
749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
750 ppc_avr_t *b)
752 vcmpbfp_internal(env, r, a, b, 1);
755 #define VCT(suffix, satcvt, element) \
756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
757 ppc_avr_t *b, uint32_t uim) \
759 int i; \
760 int sat = 0; \
761 float_status s = env->vec_status; \
763 set_float_rounding_mode(float_round_to_zero, &s); \
764 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
765 if (float32_is_any_nan(b->f32[i])) { \
766 r->element[i] = 0; \
767 } else { \
768 float64 t = float32_to_float64(b->f32[i], &s); \
769 int64_t j; \
771 t = float64_scalbn(t, uim, &s); \
772 j = float64_to_int64(t, &s); \
773 r->element[i] = satcvt(j, &sat); \
776 if (sat) { \
777 set_vscr_sat(env); \
780 VCT(uxs, cvtsduw, u32)
781 VCT(sxs, cvtsdsw, s32)
782 #undef VCT
784 target_ulong helper_vclzlsbb(ppc_avr_t *r)
786 target_ulong count = 0;
787 int i;
788 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
789 if (r->VsrB(i) & 0x01) {
790 break;
792 count++;
794 return count;
797 target_ulong helper_vctzlsbb(ppc_avr_t *r)
799 target_ulong count = 0;
800 int i;
801 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
802 if (r->VsrB(i) & 0x01) {
803 break;
805 count++;
807 return count;
810 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
811 ppc_avr_t *b, ppc_avr_t *c)
813 int sat = 0;
814 int i;
816 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
817 int32_t prod = a->s16[i] * b->s16[i];
818 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
820 r->s16[i] = cvtswsh(t, &sat);
823 if (sat) {
824 set_vscr_sat(env);
828 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
829 ppc_avr_t *b, ppc_avr_t *c)
831 int sat = 0;
832 int i;
834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
835 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
836 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
837 r->s16[i] = cvtswsh(t, &sat);
840 if (sat) {
841 set_vscr_sat(env);
845 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
847 int i;
849 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
850 int32_t prod = a->s16[i] * b->s16[i];
851 r->s16[i] = (int16_t) (prod + c->s16[i]);
855 #define VMRG_DO(name, element, access, ofs) \
856 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
858 ppc_avr_t result; \
859 int i, half = ARRAY_SIZE(r->element) / 2; \
861 for (i = 0; i < half; i++) { \
862 result.access(i * 2 + 0) = a->access(i + ofs); \
863 result.access(i * 2 + 1) = b->access(i + ofs); \
865 *r = result; \
868 #define VMRG(suffix, element, access) \
869 VMRG_DO(mrgl##suffix, element, access, half) \
870 VMRG_DO(mrgh##suffix, element, access, 0)
871 VMRG(b, u8, VsrB)
872 VMRG(h, u16, VsrH)
873 VMRG(w, u32, VsrW)
874 #undef VMRG_DO
875 #undef VMRG
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
880 int32_t prod[16];
881 int i;
883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884 prod[i] = (int32_t)a->s8[i] * b->u8[i];
887 VECTOR_FOR_INORDER_I(i, s32) {
888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889 prod[4 * i + 2] + prod[4 * i + 3];
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
896 int32_t prod[8];
897 int i;
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 prod[i] = a->s16[i] * b->s16[i];
903 VECTOR_FOR_INORDER_I(i, s32) {
904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909 ppc_avr_t *b, ppc_avr_t *c)
911 int32_t prod[8];
912 int i;
913 int sat = 0;
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 prod[i] = (int32_t)a->s16[i] * b->s16[i];
919 VECTOR_FOR_INORDER_I(i, s32) {
920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922 r->u32[i] = cvtsdsw(t, &sat);
925 if (sat) {
926 set_vscr_sat(env);
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931 ppc_avr_t *b, ppc_avr_t *c)
933 uint16_t prod[16];
934 int i;
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 prod[i] = a->u8[i] * b->u8[i];
940 VECTOR_FOR_INORDER_I(i, u32) {
941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942 prod[4 * i + 2] + prod[4 * i + 3];
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947 ppc_avr_t *b, ppc_avr_t *c)
949 uint32_t prod[8];
950 int i;
952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953 prod[i] = a->u16[i] * b->u16[i];
956 VECTOR_FOR_INORDER_I(i, u32) {
957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
964 uint32_t prod[8];
965 int i;
966 int sat = 0;
968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969 prod[i] = a->u16[i] * b->u16[i];
972 VECTOR_FOR_INORDER_I(i, s32) {
973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975 r->u32[i] = cvtuduw(t, &sat);
978 if (sat) {
979 set_vscr_sat(env);
983 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
984 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 int i; \
988 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
989 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
990 (cast)b->mul_access(i); \
994 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
995 void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
997 int i; \
999 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1000 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1001 (cast)b->mul_access(i + 1); \
1005 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1006 VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \
1007 VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1008 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1009 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1010 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1011 VMUL(UB, u8, VsrB, VsrH, uint16_t)
1012 VMUL(UH, u16, VsrH, VsrW, uint32_t)
1013 VMUL(UW, u32, VsrW, VsrD, uint64_t)
1014 #undef VMUL_DO_EVN
1015 #undef VMUL_DO_ODD
1016 #undef VMUL
1018 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1019 ppc_avr_t *c)
1021 ppc_avr_t result;
1022 int i;
1024 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1025 int s = c->VsrB(i) & 0x1f;
1026 int index = s & 0xf;
1028 if (s & 0x10) {
1029 result.VsrB(i) = b->VsrB(index);
1030 } else {
1031 result.VsrB(i) = a->VsrB(index);
1034 *r = result;
1037 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1038 ppc_avr_t *c)
1040 ppc_avr_t result;
1041 int i;
1043 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1044 int s = c->VsrB(i) & 0x1f;
1045 int index = 15 - (s & 0xf);
1047 if (s & 0x10) {
1048 result.VsrB(i) = a->VsrB(index);
1049 } else {
1050 result.VsrB(i) = b->VsrB(index);
1053 *r = result;
1056 #if defined(HOST_WORDS_BIGENDIAN)
1057 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1058 #define VBPERMD_INDEX(i) (i)
1059 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1060 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1061 #else
1062 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1063 #define VBPERMD_INDEX(i) (1 - i)
1064 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1065 #define EXTRACT_BIT(avr, i, index) \
1066 (extract64((avr)->u64[1 - i], 63 - index, 1))
1067 #endif
1069 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1071 int i, j;
1072 ppc_avr_t result = { .u64 = { 0, 0 } };
1073 VECTOR_FOR_INORDER_I(i, u64) {
1074 for (j = 0; j < 8; j++) {
1075 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1076 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1077 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1081 *r = result;
1084 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1086 int i;
1087 uint64_t perm = 0;
1089 VECTOR_FOR_INORDER_I(i, u8) {
1090 int index = VBPERMQ_INDEX(b, i);
1092 if (index < 128) {
1093 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1094 if (a->u64[VBPERMQ_DW(index)] & mask) {
1095 perm |= (0x8000 >> i);
1100 r->VsrD(0) = perm;
1101 r->VsrD(1) = 0;
1104 #undef VBPERMQ_INDEX
1105 #undef VBPERMQ_DW
1107 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1108 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1110 int i, j; \
1111 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1113 VECTOR_FOR_INORDER_I(i, srcfld) { \
1114 prod[i] = 0; \
1115 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1116 if (a->srcfld[i] & (1ull << j)) { \
1117 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1122 VECTOR_FOR_INORDER_I(i, trgfld) { \
1123 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1127 PMSUM(vpmsumb, u8, u16, uint16_t)
1128 PMSUM(vpmsumh, u16, u32, uint32_t)
1129 PMSUM(vpmsumw, u32, u64, uint64_t)
1131 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1134 #ifdef CONFIG_INT128
1135 int i, j;
1136 __uint128_t prod[2];
1138 VECTOR_FOR_INORDER_I(i, u64) {
1139 prod[i] = 0;
1140 for (j = 0; j < 64; j++) {
1141 if (a->u64[i] & (1ull << j)) {
1142 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1147 r->u128 = prod[0] ^ prod[1];
1149 #else
1150 int i, j;
1151 ppc_avr_t prod[2];
1153 VECTOR_FOR_INORDER_I(i, u64) {
1154 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1155 for (j = 0; j < 64; j++) {
1156 if (a->u64[i] & (1ull << j)) {
1157 ppc_avr_t bshift;
1158 if (j == 0) {
1159 bshift.VsrD(0) = 0;
1160 bshift.VsrD(1) = b->u64[i];
1161 } else {
1162 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1163 bshift.VsrD(1) = b->u64[i] << j;
1165 prod[i].VsrD(1) ^= bshift.VsrD(1);
1166 prod[i].VsrD(0) ^= bshift.VsrD(0);
1171 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1172 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1173 #endif
1177 #if defined(HOST_WORDS_BIGENDIAN)
1178 #define PKBIG 1
1179 #else
1180 #define PKBIG 0
1181 #endif
1182 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1184 int i, j;
1185 ppc_avr_t result;
1186 #if defined(HOST_WORDS_BIGENDIAN)
1187 const ppc_avr_t *x[2] = { a, b };
1188 #else
1189 const ppc_avr_t *x[2] = { b, a };
1190 #endif
1192 VECTOR_FOR_INORDER_I(i, u64) {
1193 VECTOR_FOR_INORDER_I(j, u32) {
1194 uint32_t e = x[i]->u32[j];
1196 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1197 ((e >> 6) & 0x3e0) |
1198 ((e >> 3) & 0x1f));
1201 *r = result;
1204 #define VPK(suffix, from, to, cvt, dosat) \
1205 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1206 ppc_avr_t *a, ppc_avr_t *b) \
1208 int i; \
1209 int sat = 0; \
1210 ppc_avr_t result; \
1211 ppc_avr_t *a0 = PKBIG ? a : b; \
1212 ppc_avr_t *a1 = PKBIG ? b : a; \
1214 VECTOR_FOR_INORDER_I(i, from) { \
1215 result.to[i] = cvt(a0->from[i], &sat); \
1216 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1218 *r = result; \
1219 if (dosat && sat) { \
1220 set_vscr_sat(env); \
1223 #define I(x, y) (x)
1224 VPK(shss, s16, s8, cvtshsb, 1)
1225 VPK(shus, s16, u8, cvtshub, 1)
1226 VPK(swss, s32, s16, cvtswsh, 1)
1227 VPK(swus, s32, u16, cvtswuh, 1)
1228 VPK(sdss, s64, s32, cvtsdsw, 1)
1229 VPK(sdus, s64, u32, cvtsduw, 1)
1230 VPK(uhus, u16, u8, cvtuhub, 1)
1231 VPK(uwus, u32, u16, cvtuwuh, 1)
1232 VPK(udus, u64, u32, cvtuduw, 1)
1233 VPK(uhum, u16, u8, I, 0)
1234 VPK(uwum, u32, u16, I, 0)
1235 VPK(udum, u64, u32, I, 0)
1236 #undef I
1237 #undef VPK
1238 #undef PKBIG
1240 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1242 int i;
1244 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1245 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1249 #define VRFI(suffix, rounding) \
1250 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1251 ppc_avr_t *b) \
1253 int i; \
1254 float_status s = env->vec_status; \
1256 set_float_rounding_mode(rounding, &s); \
1257 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1258 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1261 VRFI(n, float_round_nearest_even)
1262 VRFI(m, float_round_down)
1263 VRFI(p, float_round_up)
1264 VRFI(z, float_round_to_zero)
1265 #undef VRFI
1267 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1269 int i;
1271 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1272 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1274 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1278 #define VRLMI(name, size, element, insert) \
1279 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1281 int i; \
1282 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1283 uint##size##_t src1 = a->element[i]; \
1284 uint##size##_t src2 = b->element[i]; \
1285 uint##size##_t src3 = r->element[i]; \
1286 uint##size##_t begin, end, shift, mask, rot_val; \
1288 shift = extract##size(src2, 0, 6); \
1289 end = extract##size(src2, 8, 6); \
1290 begin = extract##size(src2, 16, 6); \
1291 rot_val = rol##size(src1, shift); \
1292 mask = mask_u##size(begin, end); \
1293 if (insert) { \
1294 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1295 } else { \
1296 r->element[i] = (rot_val & mask); \
1301 VRLMI(vrldmi, 64, u64, 1);
1302 VRLMI(vrlwmi, 32, u32, 1);
1303 VRLMI(vrldnm, 64, u64, 0);
1304 VRLMI(vrlwnm, 32, u32, 0);
1306 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1307 ppc_avr_t *c)
1309 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1310 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1313 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1315 int i;
1317 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1318 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1322 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1324 int i;
1326 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1327 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1331 #define VEXTU_X_DO(name, size, left) \
1332 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1334 int index = (a & 0xf) * 8; \
1335 if (left) { \
1336 index = 128 - index - size; \
1338 return int128_getlo(int128_rshift(b->s128, index)) & \
1339 MAKE_64BIT_MASK(0, size); \
1341 VEXTU_X_DO(vextublx, 8, 1)
1342 VEXTU_X_DO(vextuhlx, 16, 1)
1343 VEXTU_X_DO(vextuwlx, 32, 1)
1344 VEXTU_X_DO(vextubrx, 8, 0)
1345 VEXTU_X_DO(vextuhrx, 16, 0)
1346 VEXTU_X_DO(vextuwrx, 32, 0)
1347 #undef VEXTU_X_DO
1349 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1351 int i;
1352 unsigned int shift, bytes, size;
1354 size = ARRAY_SIZE(r->u8);
1355 for (i = 0; i < size; i++) {
1356 shift = b->VsrB(i) & 0x7; /* extract shift value */
1357 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1358 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1359 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1363 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1365 int i;
1366 unsigned int shift, bytes;
1369 * Use reverse order, as destination and source register can be
1370 * same. Its being modified in place saving temporary, reverse
1371 * order will guarantee that computed result is not fed back.
1373 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1374 shift = b->VsrB(i) & 0x7; /* extract shift value */
1375 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1376 /* extract adjacent bytes */
1377 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1381 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1383 int sh = shift & 0xf;
1384 int i;
1385 ppc_avr_t result;
1387 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1388 int index = sh + i;
1389 if (index > 0xf) {
1390 result.VsrB(i) = b->VsrB(index - 0x10);
1391 } else {
1392 result.VsrB(i) = a->VsrB(index);
1395 *r = result;
1398 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1400 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1402 #if defined(HOST_WORDS_BIGENDIAN)
1403 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1404 memset(&r->u8[16 - sh], 0, sh);
1405 #else
1406 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1407 memset(&r->u8[0], 0, sh);
1408 #endif
1411 #if defined(HOST_WORDS_BIGENDIAN)
1412 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1413 #else
1414 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1415 #endif
1417 #define VINSX(SUFFIX, TYPE) \
1418 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t, \
1419 uint64_t val, target_ulong index) \
1421 const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE); \
1422 target_long idx = index; \
1424 if (idx < 0 || idx > maxidx) { \
1425 idx = idx < 0 ? sizeof(TYPE) - idx : idx; \
1426 qemu_log_mask(LOG_GUEST_ERROR, \
1427 "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx \
1428 ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx); \
1429 } else { \
1430 TYPE src = val; \
1431 memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE)); \
1434 VINSX(B, uint8_t)
1435 VINSX(H, uint16_t)
1436 VINSX(W, uint32_t)
1437 VINSX(D, uint64_t)
1438 #undef ELEM_ADDR
1439 #undef VINSX
1440 #if defined(HOST_WORDS_BIGENDIAN)
1441 #define VEXTDVLX(NAME, SIZE) \
1442 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1443 target_ulong index) \
1445 const target_long idx = index; \
1446 ppc_avr_t tmp[2] = { *a, *b }; \
1447 memset(t, 0, sizeof(*t)); \
1448 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1449 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1450 } else { \
1451 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1452 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1453 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1456 #else
1457 #define VEXTDVLX(NAME, SIZE) \
1458 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1459 target_ulong index) \
1461 const target_long idx = index; \
1462 ppc_avr_t tmp[2] = { *b, *a }; \
1463 memset(t, 0, sizeof(*t)); \
1464 if (idx >= 0 && idx + SIZE <= sizeof(tmp)) { \
1465 memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2], \
1466 (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE); \
1467 } else { \
1468 qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x" \
1469 TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", \
1470 env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE); \
1473 #endif
1474 VEXTDVLX(VEXTDUBVLX, 1)
1475 VEXTDVLX(VEXTDUHVLX, 2)
1476 VEXTDVLX(VEXTDUWVLX, 4)
1477 VEXTDVLX(VEXTDDVLX, 8)
1478 #undef VEXTDVLX
1479 #if defined(HOST_WORDS_BIGENDIAN)
1480 #define VEXTRACT(suffix, element) \
1481 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1483 uint32_t es = sizeof(r->element[0]); \
1484 memmove(&r->u8[8 - es], &b->u8[index], es); \
1485 memset(&r->u8[8], 0, 8); \
1486 memset(&r->u8[0], 0, 8 - es); \
1488 #else
1489 #define VEXTRACT(suffix, element) \
1490 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1492 uint32_t es = sizeof(r->element[0]); \
1493 uint32_t s = (16 - index) - es; \
1494 memmove(&r->u8[8], &b->u8[s], es); \
1495 memset(&r->u8[0], 0, 8); \
1496 memset(&r->u8[8 + es], 0, 8 - es); \
1498 #endif
1499 VEXTRACT(ub, u8)
1500 VEXTRACT(uh, u16)
1501 VEXTRACT(uw, u32)
1502 VEXTRACT(d, u64)
1503 #undef VEXTRACT
1505 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1506 ppc_vsr_t *xb, uint32_t index)
1508 ppc_vsr_t t = { };
1509 size_t es = sizeof(uint32_t);
1510 uint32_t ext_index;
1511 int i;
1513 ext_index = index;
1514 for (i = 0; i < es; i++, ext_index++) {
1515 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1518 *xt = t;
1521 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1522 ppc_vsr_t *xb, uint32_t index)
1524 ppc_vsr_t t = *xt;
1525 size_t es = sizeof(uint32_t);
1526 int ins_index, i = 0;
1528 ins_index = index;
1529 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1530 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1533 *xt = t;
1536 #define XXBLEND(name, sz) \
1537 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1538 ppc_avr_t *c, uint32_t desc) \
1540 for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) { \
1541 t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ? \
1542 b->glue(u, sz)[i] : a->glue(u, sz)[i]; \
1545 XXBLEND(B, 8)
1546 XXBLEND(H, 16)
1547 XXBLEND(W, 32)
1548 XXBLEND(D, 64)
1549 #undef XXBLEND
1551 #define VNEG(name, element) \
1552 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1554 int i; \
1555 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1556 r->element[i] = -b->element[i]; \
1559 VNEG(vnegw, s32)
1560 VNEG(vnegd, s64)
1561 #undef VNEG
1563 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1565 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1567 #if defined(HOST_WORDS_BIGENDIAN)
1568 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1569 memset(&r->u8[0], 0, sh);
1570 #else
1571 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1572 memset(&r->u8[16 - sh], 0, sh);
1573 #endif
1576 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1578 int i;
1580 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1581 r->u32[i] = a->u32[i] >= b->u32[i];
1585 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1587 int64_t t;
1588 int i, upper;
1589 ppc_avr_t result;
1590 int sat = 0;
1592 upper = ARRAY_SIZE(r->s32) - 1;
1593 t = (int64_t)b->VsrSW(upper);
1594 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1595 t += a->VsrSW(i);
1596 result.VsrSW(i) = 0;
1598 result.VsrSW(upper) = cvtsdsw(t, &sat);
1599 *r = result;
1601 if (sat) {
1602 set_vscr_sat(env);
1606 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1608 int i, j, upper;
1609 ppc_avr_t result;
1610 int sat = 0;
1612 upper = 1;
1613 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1614 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1616 result.VsrD(i) = 0;
1617 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1618 t += a->VsrSW(2 * i + j);
1620 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1623 *r = result;
1624 if (sat) {
1625 set_vscr_sat(env);
1629 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1631 int i, j;
1632 int sat = 0;
1634 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1635 int64_t t = (int64_t)b->s32[i];
1637 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1638 t += a->s8[4 * i + j];
1640 r->s32[i] = cvtsdsw(t, &sat);
1643 if (sat) {
1644 set_vscr_sat(env);
1648 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1650 int sat = 0;
1651 int i;
1653 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1654 int64_t t = (int64_t)b->s32[i];
1656 t += a->s16[2 * i] + a->s16[2 * i + 1];
1657 r->s32[i] = cvtsdsw(t, &sat);
1660 if (sat) {
1661 set_vscr_sat(env);
1665 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1667 int i, j;
1668 int sat = 0;
1670 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1671 uint64_t t = (uint64_t)b->u32[i];
1673 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1674 t += a->u8[4 * i + j];
1676 r->u32[i] = cvtuduw(t, &sat);
1679 if (sat) {
1680 set_vscr_sat(env);
1684 #if defined(HOST_WORDS_BIGENDIAN)
1685 #define UPKHI 1
1686 #define UPKLO 0
1687 #else
1688 #define UPKHI 0
1689 #define UPKLO 1
1690 #endif
1691 #define VUPKPX(suffix, hi) \
1692 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1694 int i; \
1695 ppc_avr_t result; \
1697 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1698 uint16_t e = b->u16[hi ? i : i + 4]; \
1699 uint8_t a = (e >> 15) ? 0xff : 0; \
1700 uint8_t r = (e >> 10) & 0x1f; \
1701 uint8_t g = (e >> 5) & 0x1f; \
1702 uint8_t b = e & 0x1f; \
1704 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1706 *r = result; \
1708 VUPKPX(lpx, UPKLO)
1709 VUPKPX(hpx, UPKHI)
1710 #undef VUPKPX
1712 #define VUPK(suffix, unpacked, packee, hi) \
1713 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1715 int i; \
1716 ppc_avr_t result; \
1718 if (hi) { \
1719 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1720 result.unpacked[i] = b->packee[i]; \
1722 } else { \
1723 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1724 i++) { \
1725 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1728 *r = result; \
1730 VUPK(hsb, s16, s8, UPKHI)
1731 VUPK(hsh, s32, s16, UPKHI)
1732 VUPK(hsw, s64, s32, UPKHI)
1733 VUPK(lsb, s16, s8, UPKLO)
1734 VUPK(lsh, s32, s16, UPKLO)
1735 VUPK(lsw, s64, s32, UPKLO)
1736 #undef VUPK
1737 #undef UPKHI
1738 #undef UPKLO
1740 #define VGENERIC_DO(name, element) \
1741 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1743 int i; \
1745 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1746 r->element[i] = name(b->element[i]); \
1750 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1751 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1753 VGENERIC_DO(clzb, u8)
1754 VGENERIC_DO(clzh, u16)
1756 #undef clzb
1757 #undef clzh
1759 #define ctzb(v) ((v) ? ctz32(v) : 8)
1760 #define ctzh(v) ((v) ? ctz32(v) : 16)
1761 #define ctzw(v) ctz32((v))
1762 #define ctzd(v) ctz64((v))
1764 VGENERIC_DO(ctzb, u8)
1765 VGENERIC_DO(ctzh, u16)
1766 VGENERIC_DO(ctzw, u32)
1767 VGENERIC_DO(ctzd, u64)
1769 #undef ctzb
1770 #undef ctzh
1771 #undef ctzw
1772 #undef ctzd
1774 #define popcntb(v) ctpop8(v)
1775 #define popcnth(v) ctpop16(v)
1776 #define popcntw(v) ctpop32(v)
1777 #define popcntd(v) ctpop64(v)
1779 VGENERIC_DO(popcntb, u8)
1780 VGENERIC_DO(popcnth, u16)
1781 VGENERIC_DO(popcntw, u32)
1782 VGENERIC_DO(popcntd, u64)
1784 #undef popcntb
1785 #undef popcnth
1786 #undef popcntw
1787 #undef popcntd
1789 #undef VGENERIC_DO
1791 #if defined(HOST_WORDS_BIGENDIAN)
1792 #define QW_ONE { .u64 = { 0, 1 } }
1793 #else
1794 #define QW_ONE { .u64 = { 1, 0 } }
1795 #endif
1797 #ifndef CONFIG_INT128
1799 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1801 t->u64[0] = ~a.u64[0];
1802 t->u64[1] = ~a.u64[1];
1805 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1807 if (a.VsrD(0) < b.VsrD(0)) {
1808 return -1;
1809 } else if (a.VsrD(0) > b.VsrD(0)) {
1810 return 1;
1811 } else if (a.VsrD(1) < b.VsrD(1)) {
1812 return -1;
1813 } else if (a.VsrD(1) > b.VsrD(1)) {
1814 return 1;
1815 } else {
1816 return 0;
1820 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1822 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1823 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1824 (~a.VsrD(1) < b.VsrD(1));
1827 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1829 ppc_avr_t not_a;
1830 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1831 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1832 (~a.VsrD(1) < b.VsrD(1));
1833 avr_qw_not(&not_a, a);
1834 return avr_qw_cmpu(not_a, b) < 0;
1837 #endif
1839 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1841 #ifdef CONFIG_INT128
1842 r->u128 = a->u128 + b->u128;
1843 #else
1844 avr_qw_add(r, *a, *b);
1845 #endif
1848 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1850 #ifdef CONFIG_INT128
1851 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1852 #else
1854 if (c->VsrD(1) & 1) {
1855 ppc_avr_t tmp;
1857 tmp.VsrD(0) = 0;
1858 tmp.VsrD(1) = c->VsrD(1) & 1;
1859 avr_qw_add(&tmp, *a, tmp);
1860 avr_qw_add(r, tmp, *b);
1861 } else {
1862 avr_qw_add(r, *a, *b);
1864 #endif
1867 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1869 #ifdef CONFIG_INT128
1870 r->u128 = (~a->u128 < b->u128);
1871 #else
1872 ppc_avr_t not_a;
1874 avr_qw_not(&not_a, *a);
1876 r->VsrD(0) = 0;
1877 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1878 #endif
1881 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1883 #ifdef CONFIG_INT128
1884 int carry_out = (~a->u128 < b->u128);
1885 if (!carry_out && (c->u128 & 1)) {
1886 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1887 ((a->u128 != 0) || (b->u128 != 0));
1889 r->u128 = carry_out;
1890 #else
1892 int carry_in = c->VsrD(1) & 1;
1893 int carry_out = 0;
1894 ppc_avr_t tmp;
1896 carry_out = avr_qw_addc(&tmp, *a, *b);
1898 if (!carry_out && carry_in) {
1899 ppc_avr_t one = QW_ONE;
1900 carry_out = avr_qw_addc(&tmp, tmp, one);
1902 r->VsrD(0) = 0;
1903 r->VsrD(1) = carry_out;
1904 #endif
1907 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1909 #ifdef CONFIG_INT128
1910 r->u128 = a->u128 - b->u128;
1911 #else
1912 ppc_avr_t tmp;
1913 ppc_avr_t one = QW_ONE;
1915 avr_qw_not(&tmp, *b);
1916 avr_qw_add(&tmp, *a, tmp);
1917 avr_qw_add(r, tmp, one);
1918 #endif
1921 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1923 #ifdef CONFIG_INT128
1924 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1925 #else
1926 ppc_avr_t tmp, sum;
1928 avr_qw_not(&tmp, *b);
1929 avr_qw_add(&sum, *a, tmp);
1931 tmp.VsrD(0) = 0;
1932 tmp.VsrD(1) = c->VsrD(1) & 1;
1933 avr_qw_add(r, sum, tmp);
1934 #endif
1937 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1939 #ifdef CONFIG_INT128
1940 r->u128 = (~a->u128 < ~b->u128) ||
1941 (a->u128 + ~b->u128 == (__uint128_t)-1);
1942 #else
1943 int carry = (avr_qw_cmpu(*a, *b) > 0);
1944 if (!carry) {
1945 ppc_avr_t tmp;
1946 avr_qw_not(&tmp, *b);
1947 avr_qw_add(&tmp, *a, tmp);
1948 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
1950 r->VsrD(0) = 0;
1951 r->VsrD(1) = carry;
1952 #endif
1955 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1957 #ifdef CONFIG_INT128
1958 r->u128 =
1959 (~a->u128 < ~b->u128) ||
1960 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
1961 #else
1962 int carry_in = c->VsrD(1) & 1;
1963 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
1964 if (!carry_out && carry_in) {
1965 ppc_avr_t tmp;
1966 avr_qw_not(&tmp, *b);
1967 avr_qw_add(&tmp, *a, tmp);
1968 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
1971 r->VsrD(0) = 0;
1972 r->VsrD(1) = carry_out;
1973 #endif
1976 #define BCD_PLUS_PREF_1 0xC
1977 #define BCD_PLUS_PREF_2 0xF
1978 #define BCD_PLUS_ALT_1 0xA
1979 #define BCD_NEG_PREF 0xD
1980 #define BCD_NEG_ALT 0xB
1981 #define BCD_PLUS_ALT_2 0xE
1982 #define NATIONAL_PLUS 0x2B
1983 #define NATIONAL_NEG 0x2D
1985 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
1987 static int bcd_get_sgn(ppc_avr_t *bcd)
1989 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
1990 case BCD_PLUS_PREF_1:
1991 case BCD_PLUS_PREF_2:
1992 case BCD_PLUS_ALT_1:
1993 case BCD_PLUS_ALT_2:
1995 return 1;
1998 case BCD_NEG_PREF:
1999 case BCD_NEG_ALT:
2001 return -1;
2004 default:
2006 return 0;
2011 static int bcd_preferred_sgn(int sgn, int ps)
2013 if (sgn >= 0) {
2014 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2015 } else {
2016 return BCD_NEG_PREF;
2020 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2022 uint8_t result;
2023 if (n & 1) {
2024 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2025 } else {
2026 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2029 if (unlikely(result > 9)) {
2030 *invalid = true;
2032 return result;
2035 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2037 if (n & 1) {
2038 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2039 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2040 } else {
2041 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2042 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2046 static bool bcd_is_valid(ppc_avr_t *bcd)
2048 int i;
2049 int invalid = 0;
2051 if (bcd_get_sgn(bcd) == 0) {
2052 return false;
2055 for (i = 1; i < 32; i++) {
2056 bcd_get_digit(bcd, i, &invalid);
2057 if (unlikely(invalid)) {
2058 return false;
2061 return true;
2064 static int bcd_cmp_zero(ppc_avr_t *bcd)
2066 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2067 return CRF_EQ;
2068 } else {
2069 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2073 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2075 return reg->VsrH(7 - n);
2078 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2080 reg->VsrH(7 - n) = val;
2083 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2085 int i;
2086 int invalid = 0;
2087 for (i = 31; i > 0; i--) {
2088 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2089 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2090 if (unlikely(invalid)) {
2091 return 0; /* doesn't matter */
2092 } else if (dig_a > dig_b) {
2093 return 1;
2094 } else if (dig_a < dig_b) {
2095 return -1;
2099 return 0;
2102 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2103 int *overflow)
2105 int carry = 0;
2106 int i;
2107 int is_zero = 1;
2109 for (i = 1; i <= 31; i++) {
2110 uint8_t digit = bcd_get_digit(a, i, invalid) +
2111 bcd_get_digit(b, i, invalid) + carry;
2112 is_zero &= (digit == 0);
2113 if (digit > 9) {
2114 carry = 1;
2115 digit -= 10;
2116 } else {
2117 carry = 0;
2120 bcd_put_digit(t, digit, i);
2123 *overflow = carry;
2124 return is_zero;
2127 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2128 int *overflow)
2130 int carry = 0;
2131 int i;
2133 for (i = 1; i <= 31; i++) {
2134 uint8_t digit = bcd_get_digit(a, i, invalid) -
2135 bcd_get_digit(b, i, invalid) + carry;
2136 if (digit & 0x80) {
2137 carry = -1;
2138 digit += 10;
2139 } else {
2140 carry = 0;
2143 bcd_put_digit(t, digit, i);
2146 *overflow = carry;
2149 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2152 int sgna = bcd_get_sgn(a);
2153 int sgnb = bcd_get_sgn(b);
2154 int invalid = (sgna == 0) || (sgnb == 0);
2155 int overflow = 0;
2156 int zero = 0;
2157 uint32_t cr = 0;
2158 ppc_avr_t result = { .u64 = { 0, 0 } };
2160 if (!invalid) {
2161 if (sgna == sgnb) {
2162 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2163 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2164 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2165 } else {
2166 int magnitude = bcd_cmp_mag(a, b);
2167 if (magnitude > 0) {
2168 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2169 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2170 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2171 } else if (magnitude < 0) {
2172 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2173 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2174 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2175 } else {
2176 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2177 cr = CRF_EQ;
2182 if (unlikely(invalid)) {
2183 result.VsrD(0) = result.VsrD(1) = -1;
2184 cr = CRF_SO;
2185 } else if (overflow) {
2186 cr |= CRF_SO;
2187 } else if (zero) {
2188 cr |= CRF_EQ;
2191 *r = result;
2193 return cr;
2196 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2198 ppc_avr_t bcopy = *b;
2199 int sgnb = bcd_get_sgn(b);
2200 if (sgnb < 0) {
2201 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2202 } else if (sgnb > 0) {
2203 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2205 /* else invalid ... defer to bcdadd code for proper handling */
2207 return helper_bcdadd(r, a, &bcopy, ps);
2210 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2212 int i;
2213 int cr = 0;
2214 uint16_t national = 0;
2215 uint16_t sgnb = get_national_digit(b, 0);
2216 ppc_avr_t ret = { .u64 = { 0, 0 } };
2217 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2219 for (i = 1; i < 8; i++) {
2220 national = get_national_digit(b, i);
2221 if (unlikely(national < 0x30 || national > 0x39)) {
2222 invalid = 1;
2223 break;
2226 bcd_put_digit(&ret, national & 0xf, i);
2229 if (sgnb == NATIONAL_PLUS) {
2230 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2231 } else {
2232 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2235 cr = bcd_cmp_zero(&ret);
2237 if (unlikely(invalid)) {
2238 cr = CRF_SO;
2241 *r = ret;
2243 return cr;
2246 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2248 int i;
2249 int cr = 0;
2250 int sgnb = bcd_get_sgn(b);
2251 int invalid = (sgnb == 0);
2252 ppc_avr_t ret = { .u64 = { 0, 0 } };
2254 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2256 for (i = 1; i < 8; i++) {
2257 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2259 if (unlikely(invalid)) {
2260 break;
2263 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2265 cr = bcd_cmp_zero(b);
2267 if (ox_flag) {
2268 cr |= CRF_SO;
2271 if (unlikely(invalid)) {
2272 cr = CRF_SO;
2275 *r = ret;
2277 return cr;
2280 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2282 int i;
2283 int cr = 0;
2284 int invalid = 0;
2285 int zone_digit = 0;
2286 int zone_lead = ps ? 0xF : 0x3;
2287 int digit = 0;
2288 ppc_avr_t ret = { .u64 = { 0, 0 } };
2289 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2291 if (unlikely((sgnb < 0xA) && ps)) {
2292 invalid = 1;
2295 for (i = 0; i < 16; i++) {
2296 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2297 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2298 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2299 invalid = 1;
2300 break;
2303 bcd_put_digit(&ret, digit, i + 1);
2306 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2307 (!ps && (sgnb & 0x4))) {
2308 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2309 } else {
2310 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2313 cr = bcd_cmp_zero(&ret);
2315 if (unlikely(invalid)) {
2316 cr = CRF_SO;
2319 *r = ret;
2321 return cr;
2324 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2326 int i;
2327 int cr = 0;
2328 uint8_t digit = 0;
2329 int sgnb = bcd_get_sgn(b);
2330 int zone_lead = (ps) ? 0xF0 : 0x30;
2331 int invalid = (sgnb == 0);
2332 ppc_avr_t ret = { .u64 = { 0, 0 } };
2334 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2336 for (i = 0; i < 16; i++) {
2337 digit = bcd_get_digit(b, i + 1, &invalid);
2339 if (unlikely(invalid)) {
2340 break;
2343 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2346 if (ps) {
2347 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2348 } else {
2349 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2352 cr = bcd_cmp_zero(b);
2354 if (ox_flag) {
2355 cr |= CRF_SO;
2358 if (unlikely(invalid)) {
2359 cr = CRF_SO;
2362 *r = ret;
2364 return cr;
2368 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2370 * Returns:
2371 * > 0 if ahi|alo > bhi|blo,
2372 * 0 if ahi|alo == bhi|blo,
2373 * < 0 if ahi|alo < bhi|blo
2375 static inline int ucmp128(uint64_t alo, uint64_t ahi,
2376 uint64_t blo, uint64_t bhi)
2378 return (ahi == bhi) ?
2379 (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2380 (ahi > bhi ? 1 : -1);
2383 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2385 int i;
2386 int cr;
2387 uint64_t lo_value;
2388 uint64_t hi_value;
2389 uint64_t rem;
2390 ppc_avr_t ret = { .u64 = { 0, 0 } };
2392 if (b->VsrSD(0) < 0) {
2393 lo_value = -b->VsrSD(1);
2394 hi_value = ~b->VsrD(0) + !lo_value;
2395 bcd_put_digit(&ret, 0xD, 0);
2397 cr = CRF_LT;
2398 } else {
2399 lo_value = b->VsrD(1);
2400 hi_value = b->VsrD(0);
2401 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2403 if (hi_value == 0 && lo_value == 0) {
2404 cr = CRF_EQ;
2405 } else {
2406 cr = CRF_GT;
2411 * Check src limits: abs(src) <= 10^31 - 1
2413 * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2415 if (ucmp128(lo_value, hi_value,
2416 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2417 cr |= CRF_SO;
2420 * According to the ISA, if src wouldn't fit in the destination
2421 * register, the result is undefined.
2422 * In that case, we leave r unchanged.
2424 } else {
2425 rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2427 for (i = 1; i < 16; rem /= 10, i++) {
2428 bcd_put_digit(&ret, rem % 10, i);
2431 for (; i < 32; lo_value /= 10, i++) {
2432 bcd_put_digit(&ret, lo_value % 10, i);
2435 *r = ret;
2438 return cr;
2441 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2443 uint8_t i;
2444 int cr;
2445 uint64_t carry;
2446 uint64_t unused;
2447 uint64_t lo_value;
2448 uint64_t hi_value = 0;
2449 int sgnb = bcd_get_sgn(b);
2450 int invalid = (sgnb == 0);
2452 lo_value = bcd_get_digit(b, 31, &invalid);
2453 for (i = 30; i > 0; i--) {
2454 mulu64(&lo_value, &carry, lo_value, 10ULL);
2455 mulu64(&hi_value, &unused, hi_value, 10ULL);
2456 lo_value += bcd_get_digit(b, i, &invalid);
2457 hi_value += carry;
2459 if (unlikely(invalid)) {
2460 break;
2464 if (sgnb == -1) {
2465 r->VsrSD(1) = -lo_value;
2466 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2467 } else {
2468 r->VsrSD(1) = lo_value;
2469 r->VsrSD(0) = hi_value;
2472 cr = bcd_cmp_zero(b);
2474 if (unlikely(invalid)) {
2475 cr = CRF_SO;
2478 return cr;
2481 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2483 int i;
2484 int invalid = 0;
2486 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2487 return CRF_SO;
2490 *r = *a;
2491 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2493 for (i = 1; i < 32; i++) {
2494 bcd_get_digit(a, i, &invalid);
2495 bcd_get_digit(b, i, &invalid);
2496 if (unlikely(invalid)) {
2497 return CRF_SO;
2501 return bcd_cmp_zero(r);
2504 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2506 int sgnb = bcd_get_sgn(b);
2508 *r = *b;
2509 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2511 if (bcd_is_valid(b) == false) {
2512 return CRF_SO;
2515 return bcd_cmp_zero(r);
2518 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2520 int cr;
2521 int i = a->VsrSB(7);
2522 bool ox_flag = false;
2523 int sgnb = bcd_get_sgn(b);
2524 ppc_avr_t ret = *b;
2525 ret.VsrD(1) &= ~0xf;
2527 if (bcd_is_valid(b) == false) {
2528 return CRF_SO;
2531 if (unlikely(i > 31)) {
2532 i = 31;
2533 } else if (unlikely(i < -31)) {
2534 i = -31;
2537 if (i > 0) {
2538 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2539 } else {
2540 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2542 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2544 *r = ret;
2546 cr = bcd_cmp_zero(r);
2547 if (ox_flag) {
2548 cr |= CRF_SO;
2551 return cr;
2554 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2556 int cr;
2557 int i;
2558 int invalid = 0;
2559 bool ox_flag = false;
2560 ppc_avr_t ret = *b;
2562 for (i = 0; i < 32; i++) {
2563 bcd_get_digit(b, i, &invalid);
2565 if (unlikely(invalid)) {
2566 return CRF_SO;
2570 i = a->VsrSB(7);
2571 if (i >= 32) {
2572 ox_flag = true;
2573 ret.VsrD(1) = ret.VsrD(0) = 0;
2574 } else if (i <= -32) {
2575 ret.VsrD(1) = ret.VsrD(0) = 0;
2576 } else if (i > 0) {
2577 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2578 } else {
2579 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2581 *r = ret;
2583 cr = bcd_cmp_zero(r);
2584 if (ox_flag) {
2585 cr |= CRF_SO;
2588 return cr;
2591 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2593 int cr;
2594 int unused = 0;
2595 int invalid = 0;
2596 bool ox_flag = false;
2597 int sgnb = bcd_get_sgn(b);
2598 ppc_avr_t ret = *b;
2599 ret.VsrD(1) &= ~0xf;
2601 int i = a->VsrSB(7);
2602 ppc_avr_t bcd_one;
2604 bcd_one.VsrD(0) = 0;
2605 bcd_one.VsrD(1) = 0x10;
2607 if (bcd_is_valid(b) == false) {
2608 return CRF_SO;
2611 if (unlikely(i > 31)) {
2612 i = 31;
2613 } else if (unlikely(i < -31)) {
2614 i = -31;
2617 if (i > 0) {
2618 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2619 } else {
2620 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2622 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2623 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2626 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2628 cr = bcd_cmp_zero(&ret);
2629 if (ox_flag) {
2630 cr |= CRF_SO;
2632 *r = ret;
2634 return cr;
2637 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2639 uint64_t mask;
2640 uint32_t ox_flag = 0;
2641 int i = a->VsrSH(3) + 1;
2642 ppc_avr_t ret = *b;
2644 if (bcd_is_valid(b) == false) {
2645 return CRF_SO;
2648 if (i > 16 && i < 32) {
2649 mask = (uint64_t)-1 >> (128 - i * 4);
2650 if (ret.VsrD(0) & ~mask) {
2651 ox_flag = CRF_SO;
2654 ret.VsrD(0) &= mask;
2655 } else if (i >= 0 && i <= 16) {
2656 mask = (uint64_t)-1 >> (64 - i * 4);
2657 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2658 ox_flag = CRF_SO;
2661 ret.VsrD(1) &= mask;
2662 ret.VsrD(0) = 0;
2664 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2665 *r = ret;
2667 return bcd_cmp_zero(&ret) | ox_flag;
2670 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2672 int i;
2673 uint64_t mask;
2674 uint32_t ox_flag = 0;
2675 int invalid = 0;
2676 ppc_avr_t ret = *b;
2678 for (i = 0; i < 32; i++) {
2679 bcd_get_digit(b, i, &invalid);
2681 if (unlikely(invalid)) {
2682 return CRF_SO;
2686 i = a->VsrSH(3);
2687 if (i > 16 && i < 33) {
2688 mask = (uint64_t)-1 >> (128 - i * 4);
2689 if (ret.VsrD(0) & ~mask) {
2690 ox_flag = CRF_SO;
2693 ret.VsrD(0) &= mask;
2694 } else if (i > 0 && i <= 16) {
2695 mask = (uint64_t)-1 >> (64 - i * 4);
2696 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2697 ox_flag = CRF_SO;
2700 ret.VsrD(1) &= mask;
2701 ret.VsrD(0) = 0;
2702 } else if (i == 0) {
2703 if (ret.VsrD(0) || ret.VsrD(1)) {
2704 ox_flag = CRF_SO;
2706 ret.VsrD(0) = ret.VsrD(1) = 0;
2709 *r = ret;
2710 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2711 return ox_flag | CRF_EQ;
2714 return ox_flag | CRF_GT;
2717 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2719 int i;
2720 VECTOR_FOR_INORDER_I(i, u8) {
2721 r->u8[i] = AES_sbox[a->u8[i]];
2725 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2727 ppc_avr_t result;
2728 int i;
2730 VECTOR_FOR_INORDER_I(i, u32) {
2731 result.VsrW(i) = b->VsrW(i) ^
2732 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2733 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2734 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2735 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2737 *r = result;
2740 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2742 ppc_avr_t result;
2743 int i;
2745 VECTOR_FOR_INORDER_I(i, u8) {
2746 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2748 *r = result;
2751 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2753 /* This differs from what is written in ISA V2.07. The RTL is */
2754 /* incorrect and will be fixed in V2.07B. */
2755 int i;
2756 ppc_avr_t tmp;
2758 VECTOR_FOR_INORDER_I(i, u8) {
2759 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2762 VECTOR_FOR_INORDER_I(i, u32) {
2763 r->VsrW(i) =
2764 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2765 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2766 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2767 AES_imc[tmp.VsrB(4 * i + 3)][3];
2771 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2773 ppc_avr_t result;
2774 int i;
2776 VECTOR_FOR_INORDER_I(i, u8) {
2777 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2779 *r = result;
2782 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2784 int st = (st_six & 0x10) != 0;
2785 int six = st_six & 0xF;
2786 int i;
2788 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2789 if (st == 0) {
2790 if ((six & (0x8 >> i)) == 0) {
2791 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2792 ror32(a->VsrW(i), 18) ^
2793 (a->VsrW(i) >> 3);
2794 } else { /* six.bit[i] == 1 */
2795 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2796 ror32(a->VsrW(i), 19) ^
2797 (a->VsrW(i) >> 10);
2799 } else { /* st == 1 */
2800 if ((six & (0x8 >> i)) == 0) {
2801 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2802 ror32(a->VsrW(i), 13) ^
2803 ror32(a->VsrW(i), 22);
2804 } else { /* six.bit[i] == 1 */
2805 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2806 ror32(a->VsrW(i), 11) ^
2807 ror32(a->VsrW(i), 25);
2813 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2815 int st = (st_six & 0x10) != 0;
2816 int six = st_six & 0xF;
2817 int i;
2819 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2820 if (st == 0) {
2821 if ((six & (0x8 >> (2 * i))) == 0) {
2822 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2823 ror64(a->VsrD(i), 8) ^
2824 (a->VsrD(i) >> 7);
2825 } else { /* six.bit[2*i] == 1 */
2826 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2827 ror64(a->VsrD(i), 61) ^
2828 (a->VsrD(i) >> 6);
2830 } else { /* st == 1 */
2831 if ((six & (0x8 >> (2 * i))) == 0) {
2832 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2833 ror64(a->VsrD(i), 34) ^
2834 ror64(a->VsrD(i), 39);
2835 } else { /* six.bit[2*i] == 1 */
2836 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2837 ror64(a->VsrD(i), 18) ^
2838 ror64(a->VsrD(i), 41);
2844 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2846 ppc_avr_t result;
2847 int i;
2849 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2850 int indexA = c->VsrB(i) >> 4;
2851 int indexB = c->VsrB(i) & 0xF;
2853 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2855 *r = result;
2858 #undef VECTOR_FOR_INORDER_I
2860 /*****************************************************************************/
2861 /* SPE extension helpers */
2862 /* Use a table to make this quicker */
2863 static const uint8_t hbrev[16] = {
2864 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2865 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2868 static inline uint8_t byte_reverse(uint8_t val)
2870 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2873 static inline uint32_t word_reverse(uint32_t val)
2875 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2876 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2879 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2880 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2882 uint32_t a, b, d, mask;
2884 mask = UINT32_MAX >> (32 - MASKBITS);
2885 a = arg1 & mask;
2886 b = arg2 & mask;
2887 d = word_reverse(1 + word_reverse(a | ~b));
2888 return (arg1 & ~mask) | (d & b);
2891 uint32_t helper_cntlsw32(uint32_t val)
2893 if (val & 0x80000000) {
2894 return clz32(~val);
2895 } else {
2896 return clz32(val);
2900 uint32_t helper_cntlzw32(uint32_t val)
2902 return clz32(val);
2905 /* 440 specific */
2906 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2907 target_ulong low, uint32_t update_Rc)
2909 target_ulong mask;
2910 int i;
2912 i = 1;
2913 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2914 if ((high & mask) == 0) {
2915 if (update_Rc) {
2916 env->crf[0] = 0x4;
2918 goto done;
2920 i++;
2922 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2923 if ((low & mask) == 0) {
2924 if (update_Rc) {
2925 env->crf[0] = 0x8;
2927 goto done;
2929 i++;
2931 i = 8;
2932 if (update_Rc) {
2933 env->crf[0] = 0x2;
2935 done:
2936 env->xer = (env->xer & ~0x7F) | i;
2937 if (update_Rc) {
2938 env->crf[0] |= xer_so;
2940 return i;