target/ppc: fix vextu[bhw][lr]x helpers
[qemu/rayw.git] / target / ppc / int_helper.c
blobc2d3248d1e4be80b7b29f071a68d917d8307e35b
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
48 uint64_t rt = 0;
49 int overflow = 0;
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
69 return (target_ulong)rt;
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
75 int64_t rt = 0;
76 int overflow = 0;
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
97 return (target_ulong)rt;
100 #if defined(TARGET_PPC64)
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 uint64_t rt = 0;
105 int overflow = 0;
107 overflow = divu128(&rt, &ra, rb);
109 if (unlikely(overflow)) {
110 rt = 0; /* Undefined */
113 if (oe) {
114 helper_update_ov_legacy(env, overflow);
117 return rt;
120 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 int64_t rt = 0;
123 int64_t ra = (int64_t)rau;
124 int64_t rb = (int64_t)rbu;
125 int overflow = divs128(&rt, &ra, rb);
127 if (unlikely(overflow)) {
128 rt = 0; /* Undefined */
131 if (oe) {
132 helper_update_ov_legacy(env, overflow);
135 return rt;
138 #endif
141 #if defined(TARGET_PPC64)
142 /* if x = 0xab, returns 0xababababababababa */
143 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
146 * subtract 1 from each byte, and with inverse, check if MSB is set at each
147 * byte.
148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
149 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
151 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
153 /* When you XOR the pattern and there is a match, that byte will be zero */
154 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
156 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
158 return hasvalue(rb, ra) ? CRF_GT : 0;
161 #undef pattern
162 #undef haszero
163 #undef hasvalue
166 * Return a random number.
168 uint64_t helper_darn32(void)
170 Error *err = NULL;
171 uint32_t ret;
173 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
174 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
175 error_get_pretty(err));
176 error_free(err);
177 return -1;
180 return ret;
183 uint64_t helper_darn64(void)
185 Error *err = NULL;
186 uint64_t ret;
188 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
189 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
190 error_get_pretty(err));
191 error_free(err);
192 return -1;
195 return ret;
198 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
200 int i;
201 uint64_t ra = 0;
203 for (i = 0; i < 8; i++) {
204 int index = (rs >> (i * 8)) & 0xFF;
205 if (index < 64) {
206 if (rb & PPC_BIT(index)) {
207 ra |= 1 << i;
211 return ra;
214 #endif
216 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
218 target_ulong mask = 0xff;
219 target_ulong ra = 0;
220 int i;
222 for (i = 0; i < sizeof(target_ulong); i++) {
223 if ((rs & mask) == (rb & mask)) {
224 ra |= mask;
226 mask <<= 8;
228 return ra;
231 /* shift right arithmetic helper */
232 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
233 target_ulong shift)
235 int32_t ret;
237 if (likely(!(shift & 0x20))) {
238 if (likely((uint32_t)shift != 0)) {
239 shift &= 0x1f;
240 ret = (int32_t)value >> shift;
241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
242 env->ca32 = env->ca = 0;
243 } else {
244 env->ca32 = env->ca = 1;
246 } else {
247 ret = (int32_t)value;
248 env->ca32 = env->ca = 0;
250 } else {
251 ret = (int32_t)value >> 31;
252 env->ca32 = env->ca = (ret != 0);
254 return (target_long)ret;
257 #if defined(TARGET_PPC64)
258 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
259 target_ulong shift)
261 int64_t ret;
263 if (likely(!(shift & 0x40))) {
264 if (likely((uint64_t)shift != 0)) {
265 shift &= 0x3f;
266 ret = (int64_t)value >> shift;
267 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
268 env->ca32 = env->ca = 0;
269 } else {
270 env->ca32 = env->ca = 1;
272 } else {
273 ret = (int64_t)value;
274 env->ca32 = env->ca = 0;
276 } else {
277 ret = (int64_t)value >> 63;
278 env->ca32 = env->ca = (ret != 0);
280 return ret;
282 #endif
284 #if defined(TARGET_PPC64)
285 target_ulong helper_popcntb(target_ulong val)
287 /* Note that we don't fold past bytes */
288 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
289 0x5555555555555555ULL);
290 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
291 0x3333333333333333ULL);
292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
293 0x0f0f0f0f0f0f0f0fULL);
294 return val;
297 target_ulong helper_popcntw(target_ulong val)
299 /* Note that we don't fold past words. */
300 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
301 0x5555555555555555ULL);
302 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
303 0x3333333333333333ULL);
304 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
305 0x0f0f0f0f0f0f0f0fULL);
306 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
307 0x00ff00ff00ff00ffULL);
308 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
309 0x0000ffff0000ffffULL);
310 return val;
312 #else
313 target_ulong helper_popcntb(target_ulong val)
315 /* Note that we don't fold past bytes */
316 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
317 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
318 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
319 return val;
321 #endif
323 uint64_t helper_cfuged(uint64_t src, uint64_t mask)
326 * Instead of processing the mask bit-by-bit from the most significant to
327 * the least significant bit, as described in PowerISA, we'll handle it in
328 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
329 * ctz or cto, we negate the mask at the end of the loop.
331 target_ulong m, left = 0, right = 0;
332 unsigned int n, i = 64;
333 bool bit = false; /* tracks if we are processing zeros or ones */
335 if (mask == 0 || mask == -1) {
336 return src;
339 /* Processes the mask in blocks, from LSB to MSB */
340 while (i) {
341 /* Find how many bits we should take */
342 n = ctz64(mask);
343 if (n > i) {
344 n = i;
348 * Extracts 'n' trailing bits of src and put them on the leading 'n'
349 * bits of 'right' or 'left', pushing down the previously extracted
350 * values.
352 m = (1ll << n) - 1;
353 if (bit) {
354 right = ror64(right | (src & m), n);
355 } else {
356 left = ror64(left | (src & m), n);
360 * Discards the processed bits from 'src' and 'mask'. Note that we are
361 * removing 'n' trailing zeros from 'mask', but the logical shift will
362 * add 'n' leading zeros back, so the population count of 'mask' is kept
363 * the same.
365 src >>= n;
366 mask >>= n;
367 i -= n;
368 bit = !bit;
369 mask = ~mask;
373 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
374 * we'll shift it more 64-ctpop(mask) times.
376 if (bit) {
377 n = ctpop64(mask);
378 } else {
379 n = 64 - ctpop64(mask);
382 return left | (right >> n);
385 /*****************************************************************************/
386 /* PowerPC 601 specific instructions (POWER bridge) */
387 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
389 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
391 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
392 (int32_t)arg2 == 0) {
393 env->spr[SPR_MQ] = 0;
394 return INT32_MIN;
395 } else {
396 env->spr[SPR_MQ] = tmp % arg2;
397 return tmp / (int32_t)arg2;
401 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
402 target_ulong arg2)
404 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
406 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
407 (int32_t)arg2 == 0) {
408 env->so = env->ov = 1;
409 env->spr[SPR_MQ] = 0;
410 return INT32_MIN;
411 } else {
412 env->spr[SPR_MQ] = tmp % arg2;
413 tmp /= (int32_t)arg2;
414 if ((int32_t)tmp != tmp) {
415 env->so = env->ov = 1;
416 } else {
417 env->ov = 0;
419 return tmp;
423 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
424 target_ulong arg2)
426 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
427 (int32_t)arg2 == 0) {
428 env->spr[SPR_MQ] = 0;
429 return INT32_MIN;
430 } else {
431 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
432 return (int32_t)arg1 / (int32_t)arg2;
436 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
437 target_ulong arg2)
439 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
440 (int32_t)arg2 == 0) {
441 env->so = env->ov = 1;
442 env->spr[SPR_MQ] = 0;
443 return INT32_MIN;
444 } else {
445 env->ov = 0;
446 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
447 return (int32_t)arg1 / (int32_t)arg2;
451 /*****************************************************************************/
452 /* 602 specific instructions */
453 /* mfrom is the most crazy instruction ever seen, imho ! */
454 /* Real implementation uses a ROM table. Do the same */
456 * Extremely decomposed:
457 * -arg / 256
458 * return 256 * log10(10 + 1.0) + 0.5
460 #if !defined(CONFIG_USER_ONLY)
461 target_ulong helper_602_mfrom(target_ulong arg)
463 if (likely(arg < 602)) {
464 #include "mfrom_table.c.inc"
465 return mfrom_ROM_table[arg];
466 } else {
467 return 0;
470 #endif
472 /*****************************************************************************/
473 /* Altivec extension helpers */
474 #if defined(HOST_WORDS_BIGENDIAN)
475 #define VECTOR_FOR_INORDER_I(index, element) \
476 for (index = 0; index < ARRAY_SIZE(r->element); index++)
477 #else
478 #define VECTOR_FOR_INORDER_I(index, element) \
479 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
480 #endif
482 /* Saturating arithmetic helpers. */
483 #define SATCVT(from, to, from_type, to_type, min, max) \
484 static inline to_type cvt##from##to(from_type x, int *sat) \
486 to_type r; \
488 if (x < (from_type)min) { \
489 r = min; \
490 *sat = 1; \
491 } else if (x > (from_type)max) { \
492 r = max; \
493 *sat = 1; \
494 } else { \
495 r = x; \
497 return r; \
499 #define SATCVTU(from, to, from_type, to_type, min, max) \
500 static inline to_type cvt##from##to(from_type x, int *sat) \
502 to_type r; \
504 if (x > (from_type)max) { \
505 r = max; \
506 *sat = 1; \
507 } else { \
508 r = x; \
510 return r; \
512 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
513 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
514 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
516 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
517 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
518 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
519 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
520 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
521 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
522 #undef SATCVT
523 #undef SATCVTU
525 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
527 ppc_store_vscr(env, vscr);
530 uint32_t helper_mfvscr(CPUPPCState *env)
532 return ppc_get_vscr(env);
535 static inline void set_vscr_sat(CPUPPCState *env)
537 /* The choice of non-zero value is arbitrary. */
538 env->vscr_sat.u32[0] = 1;
541 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
543 int i;
545 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
546 r->u32[i] = ~a->u32[i] < b->u32[i];
550 /* vprtybw */
551 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
553 int i;
554 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
555 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
556 res ^= res >> 8;
557 r->u32[i] = res & 1;
561 /* vprtybd */
562 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
564 int i;
565 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
566 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
567 res ^= res >> 16;
568 res ^= res >> 8;
569 r->u64[i] = res & 1;
573 /* vprtybq */
574 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
576 uint64_t res = b->u64[0] ^ b->u64[1];
577 res ^= res >> 32;
578 res ^= res >> 16;
579 res ^= res >> 8;
580 r->VsrD(1) = res & 1;
581 r->VsrD(0) = 0;
584 #define VARITHFP(suffix, func) \
585 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
586 ppc_avr_t *b) \
588 int i; \
590 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
591 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
594 VARITHFP(addfp, float32_add)
595 VARITHFP(subfp, float32_sub)
596 VARITHFP(minfp, float32_min)
597 VARITHFP(maxfp, float32_max)
598 #undef VARITHFP
600 #define VARITHFPFMA(suffix, type) \
601 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
602 ppc_avr_t *b, ppc_avr_t *c) \
604 int i; \
605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
606 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
607 type, &env->vec_status); \
610 VARITHFPFMA(maddfp, 0);
611 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
612 #undef VARITHFPFMA
614 #define VARITHSAT_CASE(type, op, cvt, element) \
616 type result = (type)a->element[i] op (type)b->element[i]; \
617 r->element[i] = cvt(result, &sat); \
620 #define VARITHSAT_DO(name, op, optype, cvt, element) \
621 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
622 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
624 int sat = 0; \
625 int i; \
627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
628 VARITHSAT_CASE(optype, op, cvt, element); \
630 if (sat) { \
631 vscr_sat->u32[0] = 1; \
634 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
635 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
636 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
637 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
638 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
639 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
640 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
641 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
642 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
643 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
644 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
645 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
646 #undef VARITHSAT_CASE
647 #undef VARITHSAT_DO
648 #undef VARITHSAT_SIGNED
649 #undef VARITHSAT_UNSIGNED
651 #define VAVG_DO(name, element, etype) \
652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
654 int i; \
656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
657 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
658 r->element[i] = x >> 1; \
662 #define VAVG(type, signed_element, signed_type, unsigned_element, \
663 unsigned_type) \
664 VAVG_DO(avgs##type, signed_element, signed_type) \
665 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
666 VAVG(b, s8, int16_t, u8, uint16_t)
667 VAVG(h, s16, int32_t, u16, uint32_t)
668 VAVG(w, s32, int64_t, u32, uint64_t)
669 #undef VAVG_DO
670 #undef VAVG
672 #define VABSDU_DO(name, element) \
673 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
675 int i; \
677 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
678 r->element[i] = (a->element[i] > b->element[i]) ? \
679 (a->element[i] - b->element[i]) : \
680 (b->element[i] - a->element[i]); \
685 * VABSDU - Vector absolute difference unsigned
686 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
687 * element - element type to access from vector
689 #define VABSDU(type, element) \
690 VABSDU_DO(absdu##type, element)
691 VABSDU(b, u8)
692 VABSDU(h, u16)
693 VABSDU(w, u32)
694 #undef VABSDU_DO
695 #undef VABSDU
697 #define VCF(suffix, cvt, element) \
698 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
699 ppc_avr_t *b, uint32_t uim) \
701 int i; \
703 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
704 float32 t = cvt(b->element[i], &env->vec_status); \
705 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
708 VCF(ux, uint32_to_float32, u32)
709 VCF(sx, int32_to_float32, s32)
710 #undef VCF
712 #define VCMP_DO(suffix, compare, element, record) \
713 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
714 ppc_avr_t *a, ppc_avr_t *b) \
716 uint64_t ones = (uint64_t)-1; \
717 uint64_t all = ones; \
718 uint64_t none = 0; \
719 int i; \
721 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
722 uint64_t result = (a->element[i] compare b->element[i] ? \
723 ones : 0x0); \
724 switch (sizeof(a->element[0])) { \
725 case 8: \
726 r->u64[i] = result; \
727 break; \
728 case 4: \
729 r->u32[i] = result; \
730 break; \
731 case 2: \
732 r->u16[i] = result; \
733 break; \
734 case 1: \
735 r->u8[i] = result; \
736 break; \
738 all &= result; \
739 none |= result; \
741 if (record) { \
742 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
745 #define VCMP(suffix, compare, element) \
746 VCMP_DO(suffix, compare, element, 0) \
747 VCMP_DO(suffix##_dot, compare, element, 1)
748 VCMP(equb, ==, u8)
749 VCMP(equh, ==, u16)
750 VCMP(equw, ==, u32)
751 VCMP(equd, ==, u64)
752 VCMP(gtub, >, u8)
753 VCMP(gtuh, >, u16)
754 VCMP(gtuw, >, u32)
755 VCMP(gtud, >, u64)
756 VCMP(gtsb, >, s8)
757 VCMP(gtsh, >, s16)
758 VCMP(gtsw, >, s32)
759 VCMP(gtsd, >, s64)
760 #undef VCMP_DO
761 #undef VCMP
763 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
764 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
765 ppc_avr_t *a, ppc_avr_t *b) \
767 etype ones = (etype)-1; \
768 etype all = ones; \
769 etype result, none = 0; \
770 int i; \
772 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
773 if (cmpzero) { \
774 result = ((a->element[i] == 0) \
775 || (b->element[i] == 0) \
776 || (a->element[i] != b->element[i]) ? \
777 ones : 0x0); \
778 } else { \
779 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
781 r->element[i] = result; \
782 all &= result; \
783 none |= result; \
785 if (record) { \
786 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
791 * VCMPNEZ - Vector compare not equal to zero
792 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
793 * element - element type to access from vector
795 #define VCMPNE(suffix, element, etype, cmpzero) \
796 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
797 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
798 VCMPNE(zb, u8, uint8_t, 1)
799 VCMPNE(zh, u16, uint16_t, 1)
800 VCMPNE(zw, u32, uint32_t, 1)
801 VCMPNE(b, u8, uint8_t, 0)
802 VCMPNE(h, u16, uint16_t, 0)
803 VCMPNE(w, u32, uint32_t, 0)
804 #undef VCMPNE_DO
805 #undef VCMPNE
807 #define VCMPFP_DO(suffix, compare, order, record) \
808 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
809 ppc_avr_t *a, ppc_avr_t *b) \
811 uint32_t ones = (uint32_t)-1; \
812 uint32_t all = ones; \
813 uint32_t none = 0; \
814 int i; \
816 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
817 uint32_t result; \
818 FloatRelation rel = \
819 float32_compare_quiet(a->f32[i], b->f32[i], \
820 &env->vec_status); \
821 if (rel == float_relation_unordered) { \
822 result = 0; \
823 } else if (rel compare order) { \
824 result = ones; \
825 } else { \
826 result = 0; \
828 r->u32[i] = result; \
829 all &= result; \
830 none |= result; \
832 if (record) { \
833 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
836 #define VCMPFP(suffix, compare, order) \
837 VCMPFP_DO(suffix, compare, order, 0) \
838 VCMPFP_DO(suffix##_dot, compare, order, 1)
839 VCMPFP(eqfp, ==, float_relation_equal)
840 VCMPFP(gefp, !=, float_relation_less)
841 VCMPFP(gtfp, ==, float_relation_greater)
842 #undef VCMPFP_DO
843 #undef VCMPFP
845 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
846 ppc_avr_t *a, ppc_avr_t *b, int record)
848 int i;
849 int all_in = 0;
851 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
852 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
853 &env->vec_status);
854 if (le_rel == float_relation_unordered) {
855 r->u32[i] = 0xc0000000;
856 all_in = 1;
857 } else {
858 float32 bneg = float32_chs(b->f32[i]);
859 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
860 &env->vec_status);
861 int le = le_rel != float_relation_greater;
862 int ge = ge_rel != float_relation_less;
864 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
865 all_in |= (!le | !ge);
868 if (record) {
869 env->crf[6] = (all_in == 0) << 1;
873 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
875 vcmpbfp_internal(env, r, a, b, 0);
878 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b)
881 vcmpbfp_internal(env, r, a, b, 1);
884 #define VCT(suffix, satcvt, element) \
885 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
886 ppc_avr_t *b, uint32_t uim) \
888 int i; \
889 int sat = 0; \
890 float_status s = env->vec_status; \
892 set_float_rounding_mode(float_round_to_zero, &s); \
893 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
894 if (float32_is_any_nan(b->f32[i])) { \
895 r->element[i] = 0; \
896 } else { \
897 float64 t = float32_to_float64(b->f32[i], &s); \
898 int64_t j; \
900 t = float64_scalbn(t, uim, &s); \
901 j = float64_to_int64(t, &s); \
902 r->element[i] = satcvt(j, &sat); \
905 if (sat) { \
906 set_vscr_sat(env); \
909 VCT(uxs, cvtsduw, u32)
910 VCT(sxs, cvtsdsw, s32)
911 #undef VCT
913 target_ulong helper_vclzlsbb(ppc_avr_t *r)
915 target_ulong count = 0;
916 int i;
917 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
918 if (r->VsrB(i) & 0x01) {
919 break;
921 count++;
923 return count;
926 target_ulong helper_vctzlsbb(ppc_avr_t *r)
928 target_ulong count = 0;
929 int i;
930 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
931 if (r->VsrB(i) & 0x01) {
932 break;
934 count++;
936 return count;
939 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
940 ppc_avr_t *b, ppc_avr_t *c)
942 int sat = 0;
943 int i;
945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
946 int32_t prod = a->s16[i] * b->s16[i];
947 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
949 r->s16[i] = cvtswsh(t, &sat);
952 if (sat) {
953 set_vscr_sat(env);
957 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
958 ppc_avr_t *b, ppc_avr_t *c)
960 int sat = 0;
961 int i;
963 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
964 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
965 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
966 r->s16[i] = cvtswsh(t, &sat);
969 if (sat) {
970 set_vscr_sat(env);
974 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
976 int i;
978 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
979 int32_t prod = a->s16[i] * b->s16[i];
980 r->s16[i] = (int16_t) (prod + c->s16[i]);
984 #define VMRG_DO(name, element, access, ofs) \
985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 ppc_avr_t result; \
988 int i, half = ARRAY_SIZE(r->element) / 2; \
990 for (i = 0; i < half; i++) { \
991 result.access(i * 2 + 0) = a->access(i + ofs); \
992 result.access(i * 2 + 1) = b->access(i + ofs); \
994 *r = result; \
997 #define VMRG(suffix, element, access) \
998 VMRG_DO(mrgl##suffix, element, access, half) \
999 VMRG_DO(mrgh##suffix, element, access, 0)
1000 VMRG(b, u8, VsrB)
1001 VMRG(h, u16, VsrH)
1002 VMRG(w, u32, VsrW)
1003 #undef VMRG_DO
1004 #undef VMRG
1006 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1007 ppc_avr_t *b, ppc_avr_t *c)
1009 int32_t prod[16];
1010 int i;
1012 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1013 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1016 VECTOR_FOR_INORDER_I(i, s32) {
1017 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1018 prod[4 * i + 2] + prod[4 * i + 3];
1022 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1023 ppc_avr_t *b, ppc_avr_t *c)
1025 int32_t prod[8];
1026 int i;
1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1029 prod[i] = a->s16[i] * b->s16[i];
1032 VECTOR_FOR_INORDER_I(i, s32) {
1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1037 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1038 ppc_avr_t *b, ppc_avr_t *c)
1040 int32_t prod[8];
1041 int i;
1042 int sat = 0;
1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1045 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1048 VECTOR_FOR_INORDER_I(i, s32) {
1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1051 r->u32[i] = cvtsdsw(t, &sat);
1054 if (sat) {
1055 set_vscr_sat(env);
1059 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1060 ppc_avr_t *b, ppc_avr_t *c)
1062 uint16_t prod[16];
1063 int i;
1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1066 prod[i] = a->u8[i] * b->u8[i];
1069 VECTOR_FOR_INORDER_I(i, u32) {
1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1071 prod[4 * i + 2] + prod[4 * i + 3];
1075 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1076 ppc_avr_t *b, ppc_avr_t *c)
1078 uint32_t prod[8];
1079 int i;
1081 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1082 prod[i] = a->u16[i] * b->u16[i];
1085 VECTOR_FOR_INORDER_I(i, u32) {
1086 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1090 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1091 ppc_avr_t *b, ppc_avr_t *c)
1093 uint32_t prod[8];
1094 int i;
1095 int sat = 0;
1097 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1098 prod[i] = a->u16[i] * b->u16[i];
1101 VECTOR_FOR_INORDER_I(i, s32) {
1102 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1104 r->u32[i] = cvtuduw(t, &sat);
1107 if (sat) {
1108 set_vscr_sat(env);
1112 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1113 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1115 int i; \
1117 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1118 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1119 (cast)b->mul_access(i); \
1123 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1124 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1126 int i; \
1128 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1129 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1130 (cast)b->mul_access(i + 1); \
1134 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1135 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1136 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1137 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1138 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1139 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1140 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1141 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1142 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1143 #undef VMUL_DO_EVN
1144 #undef VMUL_DO_ODD
1145 #undef VMUL
1147 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1149 int i;
1151 for (i = 0; i < 4; i++) {
1152 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
1156 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1158 int i;
1160 for (i = 0; i < 4; i++) {
1161 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
1162 (uint64_t)b->u32[i]) >> 32);
1166 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1168 uint64_t discard;
1170 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
1171 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
1174 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1176 uint64_t discard;
1178 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
1179 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
1182 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1183 ppc_avr_t *c)
1185 ppc_avr_t result;
1186 int i;
1188 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1189 int s = c->VsrB(i) & 0x1f;
1190 int index = s & 0xf;
1192 if (s & 0x10) {
1193 result.VsrB(i) = b->VsrB(index);
1194 } else {
1195 result.VsrB(i) = a->VsrB(index);
1198 *r = result;
1201 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1202 ppc_avr_t *c)
1204 ppc_avr_t result;
1205 int i;
1207 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1208 int s = c->VsrB(i) & 0x1f;
1209 int index = 15 - (s & 0xf);
1211 if (s & 0x10) {
1212 result.VsrB(i) = a->VsrB(index);
1213 } else {
1214 result.VsrB(i) = b->VsrB(index);
1217 *r = result;
1220 #if defined(HOST_WORDS_BIGENDIAN)
1221 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1222 #define VBPERMD_INDEX(i) (i)
1223 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1224 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1225 #else
1226 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1227 #define VBPERMD_INDEX(i) (1 - i)
1228 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1229 #define EXTRACT_BIT(avr, i, index) \
1230 (extract64((avr)->u64[1 - i], 63 - index, 1))
1231 #endif
1233 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1235 int i, j;
1236 ppc_avr_t result = { .u64 = { 0, 0 } };
1237 VECTOR_FOR_INORDER_I(i, u64) {
1238 for (j = 0; j < 8; j++) {
1239 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1240 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1241 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1245 *r = result;
1248 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1250 int i;
1251 uint64_t perm = 0;
1253 VECTOR_FOR_INORDER_I(i, u8) {
1254 int index = VBPERMQ_INDEX(b, i);
1256 if (index < 128) {
1257 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1258 if (a->u64[VBPERMQ_DW(index)] & mask) {
1259 perm |= (0x8000 >> i);
1264 r->VsrD(0) = perm;
1265 r->VsrD(1) = 0;
1268 #undef VBPERMQ_INDEX
1269 #undef VBPERMQ_DW
1271 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1272 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1274 int i, j; \
1275 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1277 VECTOR_FOR_INORDER_I(i, srcfld) { \
1278 prod[i] = 0; \
1279 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1280 if (a->srcfld[i] & (1ull << j)) { \
1281 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1286 VECTOR_FOR_INORDER_I(i, trgfld) { \
1287 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1291 PMSUM(vpmsumb, u8, u16, uint16_t)
1292 PMSUM(vpmsumh, u16, u32, uint32_t)
1293 PMSUM(vpmsumw, u32, u64, uint64_t)
1295 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1298 #ifdef CONFIG_INT128
1299 int i, j;
1300 __uint128_t prod[2];
1302 VECTOR_FOR_INORDER_I(i, u64) {
1303 prod[i] = 0;
1304 for (j = 0; j < 64; j++) {
1305 if (a->u64[i] & (1ull << j)) {
1306 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1311 r->u128 = prod[0] ^ prod[1];
1313 #else
1314 int i, j;
1315 ppc_avr_t prod[2];
1317 VECTOR_FOR_INORDER_I(i, u64) {
1318 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1319 for (j = 0; j < 64; j++) {
1320 if (a->u64[i] & (1ull << j)) {
1321 ppc_avr_t bshift;
1322 if (j == 0) {
1323 bshift.VsrD(0) = 0;
1324 bshift.VsrD(1) = b->u64[i];
1325 } else {
1326 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1327 bshift.VsrD(1) = b->u64[i] << j;
1329 prod[i].VsrD(1) ^= bshift.VsrD(1);
1330 prod[i].VsrD(0) ^= bshift.VsrD(0);
1335 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1336 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1337 #endif
1341 #if defined(HOST_WORDS_BIGENDIAN)
1342 #define PKBIG 1
1343 #else
1344 #define PKBIG 0
1345 #endif
1346 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1348 int i, j;
1349 ppc_avr_t result;
1350 #if defined(HOST_WORDS_BIGENDIAN)
1351 const ppc_avr_t *x[2] = { a, b };
1352 #else
1353 const ppc_avr_t *x[2] = { b, a };
1354 #endif
1356 VECTOR_FOR_INORDER_I(i, u64) {
1357 VECTOR_FOR_INORDER_I(j, u32) {
1358 uint32_t e = x[i]->u32[j];
1360 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1361 ((e >> 6) & 0x3e0) |
1362 ((e >> 3) & 0x1f));
1365 *r = result;
1368 #define VPK(suffix, from, to, cvt, dosat) \
1369 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1370 ppc_avr_t *a, ppc_avr_t *b) \
1372 int i; \
1373 int sat = 0; \
1374 ppc_avr_t result; \
1375 ppc_avr_t *a0 = PKBIG ? a : b; \
1376 ppc_avr_t *a1 = PKBIG ? b : a; \
1378 VECTOR_FOR_INORDER_I(i, from) { \
1379 result.to[i] = cvt(a0->from[i], &sat); \
1380 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1382 *r = result; \
1383 if (dosat && sat) { \
1384 set_vscr_sat(env); \
1387 #define I(x, y) (x)
1388 VPK(shss, s16, s8, cvtshsb, 1)
1389 VPK(shus, s16, u8, cvtshub, 1)
1390 VPK(swss, s32, s16, cvtswsh, 1)
1391 VPK(swus, s32, u16, cvtswuh, 1)
1392 VPK(sdss, s64, s32, cvtsdsw, 1)
1393 VPK(sdus, s64, u32, cvtsduw, 1)
1394 VPK(uhus, u16, u8, cvtuhub, 1)
1395 VPK(uwus, u32, u16, cvtuwuh, 1)
1396 VPK(udus, u64, u32, cvtuduw, 1)
1397 VPK(uhum, u16, u8, I, 0)
1398 VPK(uwum, u32, u16, I, 0)
1399 VPK(udum, u64, u32, I, 0)
1400 #undef I
1401 #undef VPK
1402 #undef PKBIG
1404 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1406 int i;
1408 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1409 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1413 #define VRFI(suffix, rounding) \
1414 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1415 ppc_avr_t *b) \
1417 int i; \
1418 float_status s = env->vec_status; \
1420 set_float_rounding_mode(rounding, &s); \
1421 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1422 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1425 VRFI(n, float_round_nearest_even)
1426 VRFI(m, float_round_down)
1427 VRFI(p, float_round_up)
1428 VRFI(z, float_round_to_zero)
1429 #undef VRFI
1431 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1433 int i;
1435 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1436 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1438 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1442 #define VRLMI(name, size, element, insert) \
1443 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1445 int i; \
1446 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1447 uint##size##_t src1 = a->element[i]; \
1448 uint##size##_t src2 = b->element[i]; \
1449 uint##size##_t src3 = r->element[i]; \
1450 uint##size##_t begin, end, shift, mask, rot_val; \
1452 shift = extract##size(src2, 0, 6); \
1453 end = extract##size(src2, 8, 6); \
1454 begin = extract##size(src2, 16, 6); \
1455 rot_val = rol##size(src1, shift); \
1456 mask = mask_u##size(begin, end); \
1457 if (insert) { \
1458 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1459 } else { \
1460 r->element[i] = (rot_val & mask); \
1465 VRLMI(vrldmi, 64, u64, 1);
1466 VRLMI(vrlwmi, 32, u32, 1);
1467 VRLMI(vrldnm, 64, u64, 0);
1468 VRLMI(vrlwnm, 32, u32, 0);
1470 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1471 ppc_avr_t *c)
1473 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1474 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1477 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1479 int i;
1481 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1482 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1486 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1488 int i;
1490 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1491 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1495 #define VEXTU_X_DO(name, size, left) \
1496 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1498 int index = (a & 0xf) * 8; \
1499 if (left) { \
1500 index = 128 - index - size; \
1502 return int128_getlo(int128_rshift(b->s128, index)) & \
1503 MAKE_64BIT_MASK(0, size); \
1505 VEXTU_X_DO(vextublx, 8, 1)
1506 VEXTU_X_DO(vextuhlx, 16, 1)
1507 VEXTU_X_DO(vextuwlx, 32, 1)
1508 VEXTU_X_DO(vextubrx, 8, 0)
1509 VEXTU_X_DO(vextuhrx, 16, 0)
1510 VEXTU_X_DO(vextuwrx, 32, 0)
1511 #undef VEXTU_X_DO
1513 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1515 int i;
1516 unsigned int shift, bytes, size;
1518 size = ARRAY_SIZE(r->u8);
1519 for (i = 0; i < size; i++) {
1520 shift = b->VsrB(i) & 0x7; /* extract shift value */
1521 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1522 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1523 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1527 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1529 int i;
1530 unsigned int shift, bytes;
1533 * Use reverse order, as destination and source register can be
1534 * same. Its being modified in place saving temporary, reverse
1535 * order will guarantee that computed result is not fed back.
1537 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1538 shift = b->VsrB(i) & 0x7; /* extract shift value */
1539 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1540 /* extract adjacent bytes */
1541 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1545 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1547 int sh = shift & 0xf;
1548 int i;
1549 ppc_avr_t result;
1551 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1552 int index = sh + i;
1553 if (index > 0xf) {
1554 result.VsrB(i) = b->VsrB(index - 0x10);
1555 } else {
1556 result.VsrB(i) = a->VsrB(index);
1559 *r = result;
1562 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1564 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1566 #if defined(HOST_WORDS_BIGENDIAN)
1567 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1568 memset(&r->u8[16 - sh], 0, sh);
1569 #else
1570 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1571 memset(&r->u8[0], 0, sh);
1572 #endif
1575 #if defined(HOST_WORDS_BIGENDIAN)
1576 #define VINSERT(suffix, element) \
1577 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1579 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1580 sizeof(r->element[0])); \
1582 #else
1583 #define VINSERT(suffix, element) \
1584 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1586 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1587 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1589 #endif
1590 VINSERT(b, u8)
1591 VINSERT(h, u16)
1592 VINSERT(w, u32)
1593 VINSERT(d, u64)
1594 #undef VINSERT
1595 #if defined(HOST_WORDS_BIGENDIAN)
1596 #define VEXTRACT(suffix, element) \
1597 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1599 uint32_t es = sizeof(r->element[0]); \
1600 memmove(&r->u8[8 - es], &b->u8[index], es); \
1601 memset(&r->u8[8], 0, 8); \
1602 memset(&r->u8[0], 0, 8 - es); \
1604 #else
1605 #define VEXTRACT(suffix, element) \
1606 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1608 uint32_t es = sizeof(r->element[0]); \
1609 uint32_t s = (16 - index) - es; \
1610 memmove(&r->u8[8], &b->u8[s], es); \
1611 memset(&r->u8[0], 0, 8); \
1612 memset(&r->u8[8 + es], 0, 8 - es); \
1614 #endif
1615 VEXTRACT(ub, u8)
1616 VEXTRACT(uh, u16)
1617 VEXTRACT(uw, u32)
1618 VEXTRACT(d, u64)
1619 #undef VEXTRACT
1621 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1622 ppc_vsr_t *xb, uint32_t index)
1624 ppc_vsr_t t = { };
1625 size_t es = sizeof(uint32_t);
1626 uint32_t ext_index;
1627 int i;
1629 ext_index = index;
1630 for (i = 0; i < es; i++, ext_index++) {
1631 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1634 *xt = t;
1637 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1638 ppc_vsr_t *xb, uint32_t index)
1640 ppc_vsr_t t = *xt;
1641 size_t es = sizeof(uint32_t);
1642 int ins_index, i = 0;
1644 ins_index = index;
1645 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1646 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1649 *xt = t;
1652 #define VEXT_SIGNED(name, element, cast) \
1653 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1655 int i; \
1656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1657 r->element[i] = (cast)b->element[i]; \
1660 VEXT_SIGNED(vextsb2w, s32, int8_t)
1661 VEXT_SIGNED(vextsb2d, s64, int8_t)
1662 VEXT_SIGNED(vextsh2w, s32, int16_t)
1663 VEXT_SIGNED(vextsh2d, s64, int16_t)
1664 VEXT_SIGNED(vextsw2d, s64, int32_t)
1665 #undef VEXT_SIGNED
1667 #define VNEG(name, element) \
1668 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1670 int i; \
1671 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1672 r->element[i] = -b->element[i]; \
1675 VNEG(vnegw, s32)
1676 VNEG(vnegd, s64)
1677 #undef VNEG
1679 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1681 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1683 #if defined(HOST_WORDS_BIGENDIAN)
1684 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1685 memset(&r->u8[0], 0, sh);
1686 #else
1687 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1688 memset(&r->u8[16 - sh], 0, sh);
1689 #endif
1692 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1694 int i;
1696 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1697 r->u32[i] = a->u32[i] >= b->u32[i];
1701 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1703 int64_t t;
1704 int i, upper;
1705 ppc_avr_t result;
1706 int sat = 0;
1708 upper = ARRAY_SIZE(r->s32) - 1;
1709 t = (int64_t)b->VsrSW(upper);
1710 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1711 t += a->VsrSW(i);
1712 result.VsrSW(i) = 0;
1714 result.VsrSW(upper) = cvtsdsw(t, &sat);
1715 *r = result;
1717 if (sat) {
1718 set_vscr_sat(env);
1722 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1724 int i, j, upper;
1725 ppc_avr_t result;
1726 int sat = 0;
1728 upper = 1;
1729 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1730 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1732 result.VsrD(i) = 0;
1733 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1734 t += a->VsrSW(2 * i + j);
1736 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1739 *r = result;
1740 if (sat) {
1741 set_vscr_sat(env);
1745 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1747 int i, j;
1748 int sat = 0;
1750 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1751 int64_t t = (int64_t)b->s32[i];
1753 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1754 t += a->s8[4 * i + j];
1756 r->s32[i] = cvtsdsw(t, &sat);
1759 if (sat) {
1760 set_vscr_sat(env);
1764 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1766 int sat = 0;
1767 int i;
1769 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1770 int64_t t = (int64_t)b->s32[i];
1772 t += a->s16[2 * i] + a->s16[2 * i + 1];
1773 r->s32[i] = cvtsdsw(t, &sat);
1776 if (sat) {
1777 set_vscr_sat(env);
1781 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1783 int i, j;
1784 int sat = 0;
1786 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1787 uint64_t t = (uint64_t)b->u32[i];
1789 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1790 t += a->u8[4 * i + j];
1792 r->u32[i] = cvtuduw(t, &sat);
1795 if (sat) {
1796 set_vscr_sat(env);
1800 #if defined(HOST_WORDS_BIGENDIAN)
1801 #define UPKHI 1
1802 #define UPKLO 0
1803 #else
1804 #define UPKHI 0
1805 #define UPKLO 1
1806 #endif
1807 #define VUPKPX(suffix, hi) \
1808 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1810 int i; \
1811 ppc_avr_t result; \
1813 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1814 uint16_t e = b->u16[hi ? i : i + 4]; \
1815 uint8_t a = (e >> 15) ? 0xff : 0; \
1816 uint8_t r = (e >> 10) & 0x1f; \
1817 uint8_t g = (e >> 5) & 0x1f; \
1818 uint8_t b = e & 0x1f; \
1820 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1822 *r = result; \
1824 VUPKPX(lpx, UPKLO)
1825 VUPKPX(hpx, UPKHI)
1826 #undef VUPKPX
1828 #define VUPK(suffix, unpacked, packee, hi) \
1829 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1831 int i; \
1832 ppc_avr_t result; \
1834 if (hi) { \
1835 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1836 result.unpacked[i] = b->packee[i]; \
1838 } else { \
1839 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1840 i++) { \
1841 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1844 *r = result; \
1846 VUPK(hsb, s16, s8, UPKHI)
1847 VUPK(hsh, s32, s16, UPKHI)
1848 VUPK(hsw, s64, s32, UPKHI)
1849 VUPK(lsb, s16, s8, UPKLO)
1850 VUPK(lsh, s32, s16, UPKLO)
1851 VUPK(lsw, s64, s32, UPKLO)
1852 #undef VUPK
1853 #undef UPKHI
1854 #undef UPKLO
1856 #define VGENERIC_DO(name, element) \
1857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1859 int i; \
1861 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1862 r->element[i] = name(b->element[i]); \
1866 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1867 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1869 VGENERIC_DO(clzb, u8)
1870 VGENERIC_DO(clzh, u16)
1872 #undef clzb
1873 #undef clzh
1875 #define ctzb(v) ((v) ? ctz32(v) : 8)
1876 #define ctzh(v) ((v) ? ctz32(v) : 16)
1877 #define ctzw(v) ctz32((v))
1878 #define ctzd(v) ctz64((v))
1880 VGENERIC_DO(ctzb, u8)
1881 VGENERIC_DO(ctzh, u16)
1882 VGENERIC_DO(ctzw, u32)
1883 VGENERIC_DO(ctzd, u64)
1885 #undef ctzb
1886 #undef ctzh
1887 #undef ctzw
1888 #undef ctzd
1890 #define popcntb(v) ctpop8(v)
1891 #define popcnth(v) ctpop16(v)
1892 #define popcntw(v) ctpop32(v)
1893 #define popcntd(v) ctpop64(v)
1895 VGENERIC_DO(popcntb, u8)
1896 VGENERIC_DO(popcnth, u16)
1897 VGENERIC_DO(popcntw, u32)
1898 VGENERIC_DO(popcntd, u64)
1900 #undef popcntb
1901 #undef popcnth
1902 #undef popcntw
1903 #undef popcntd
1905 #undef VGENERIC_DO
1907 #if defined(HOST_WORDS_BIGENDIAN)
1908 #define QW_ONE { .u64 = { 0, 1 } }
1909 #else
1910 #define QW_ONE { .u64 = { 1, 0 } }
1911 #endif
1913 #ifndef CONFIG_INT128
1915 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1917 t->u64[0] = ~a.u64[0];
1918 t->u64[1] = ~a.u64[1];
1921 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1923 if (a.VsrD(0) < b.VsrD(0)) {
1924 return -1;
1925 } else if (a.VsrD(0) > b.VsrD(0)) {
1926 return 1;
1927 } else if (a.VsrD(1) < b.VsrD(1)) {
1928 return -1;
1929 } else if (a.VsrD(1) > b.VsrD(1)) {
1930 return 1;
1931 } else {
1932 return 0;
1936 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1938 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1939 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1940 (~a.VsrD(1) < b.VsrD(1));
1943 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1945 ppc_avr_t not_a;
1946 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1947 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1948 (~a.VsrD(1) < b.VsrD(1));
1949 avr_qw_not(&not_a, a);
1950 return avr_qw_cmpu(not_a, b) < 0;
1953 #endif
1955 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1957 #ifdef CONFIG_INT128
1958 r->u128 = a->u128 + b->u128;
1959 #else
1960 avr_qw_add(r, *a, *b);
1961 #endif
1964 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1966 #ifdef CONFIG_INT128
1967 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1968 #else
1970 if (c->VsrD(1) & 1) {
1971 ppc_avr_t tmp;
1973 tmp.VsrD(0) = 0;
1974 tmp.VsrD(1) = c->VsrD(1) & 1;
1975 avr_qw_add(&tmp, *a, tmp);
1976 avr_qw_add(r, tmp, *b);
1977 } else {
1978 avr_qw_add(r, *a, *b);
1980 #endif
1983 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1985 #ifdef CONFIG_INT128
1986 r->u128 = (~a->u128 < b->u128);
1987 #else
1988 ppc_avr_t not_a;
1990 avr_qw_not(&not_a, *a);
1992 r->VsrD(0) = 0;
1993 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1994 #endif
1997 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1999 #ifdef CONFIG_INT128
2000 int carry_out = (~a->u128 < b->u128);
2001 if (!carry_out && (c->u128 & 1)) {
2002 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2003 ((a->u128 != 0) || (b->u128 != 0));
2005 r->u128 = carry_out;
2006 #else
2008 int carry_in = c->VsrD(1) & 1;
2009 int carry_out = 0;
2010 ppc_avr_t tmp;
2012 carry_out = avr_qw_addc(&tmp, *a, *b);
2014 if (!carry_out && carry_in) {
2015 ppc_avr_t one = QW_ONE;
2016 carry_out = avr_qw_addc(&tmp, tmp, one);
2018 r->VsrD(0) = 0;
2019 r->VsrD(1) = carry_out;
2020 #endif
2023 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2025 #ifdef CONFIG_INT128
2026 r->u128 = a->u128 - b->u128;
2027 #else
2028 ppc_avr_t tmp;
2029 ppc_avr_t one = QW_ONE;
2031 avr_qw_not(&tmp, *b);
2032 avr_qw_add(&tmp, *a, tmp);
2033 avr_qw_add(r, tmp, one);
2034 #endif
2037 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2039 #ifdef CONFIG_INT128
2040 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2041 #else
2042 ppc_avr_t tmp, sum;
2044 avr_qw_not(&tmp, *b);
2045 avr_qw_add(&sum, *a, tmp);
2047 tmp.VsrD(0) = 0;
2048 tmp.VsrD(1) = c->VsrD(1) & 1;
2049 avr_qw_add(r, sum, tmp);
2050 #endif
2053 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2055 #ifdef CONFIG_INT128
2056 r->u128 = (~a->u128 < ~b->u128) ||
2057 (a->u128 + ~b->u128 == (__uint128_t)-1);
2058 #else
2059 int carry = (avr_qw_cmpu(*a, *b) > 0);
2060 if (!carry) {
2061 ppc_avr_t tmp;
2062 avr_qw_not(&tmp, *b);
2063 avr_qw_add(&tmp, *a, tmp);
2064 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2066 r->VsrD(0) = 0;
2067 r->VsrD(1) = carry;
2068 #endif
2071 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2073 #ifdef CONFIG_INT128
2074 r->u128 =
2075 (~a->u128 < ~b->u128) ||
2076 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2077 #else
2078 int carry_in = c->VsrD(1) & 1;
2079 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2080 if (!carry_out && carry_in) {
2081 ppc_avr_t tmp;
2082 avr_qw_not(&tmp, *b);
2083 avr_qw_add(&tmp, *a, tmp);
2084 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2087 r->VsrD(0) = 0;
2088 r->VsrD(1) = carry_out;
2089 #endif
2092 #define BCD_PLUS_PREF_1 0xC
2093 #define BCD_PLUS_PREF_2 0xF
2094 #define BCD_PLUS_ALT_1 0xA
2095 #define BCD_NEG_PREF 0xD
2096 #define BCD_NEG_ALT 0xB
2097 #define BCD_PLUS_ALT_2 0xE
2098 #define NATIONAL_PLUS 0x2B
2099 #define NATIONAL_NEG 0x2D
2101 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2103 static int bcd_get_sgn(ppc_avr_t *bcd)
2105 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2106 case BCD_PLUS_PREF_1:
2107 case BCD_PLUS_PREF_2:
2108 case BCD_PLUS_ALT_1:
2109 case BCD_PLUS_ALT_2:
2111 return 1;
2114 case BCD_NEG_PREF:
2115 case BCD_NEG_ALT:
2117 return -1;
2120 default:
2122 return 0;
2127 static int bcd_preferred_sgn(int sgn, int ps)
2129 if (sgn >= 0) {
2130 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2131 } else {
2132 return BCD_NEG_PREF;
2136 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2138 uint8_t result;
2139 if (n & 1) {
2140 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2141 } else {
2142 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2145 if (unlikely(result > 9)) {
2146 *invalid = true;
2148 return result;
2151 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2153 if (n & 1) {
2154 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2155 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2156 } else {
2157 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2158 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2162 static bool bcd_is_valid(ppc_avr_t *bcd)
2164 int i;
2165 int invalid = 0;
2167 if (bcd_get_sgn(bcd) == 0) {
2168 return false;
2171 for (i = 1; i < 32; i++) {
2172 bcd_get_digit(bcd, i, &invalid);
2173 if (unlikely(invalid)) {
2174 return false;
2177 return true;
2180 static int bcd_cmp_zero(ppc_avr_t *bcd)
2182 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2183 return CRF_EQ;
2184 } else {
2185 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2189 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2191 return reg->VsrH(7 - n);
2194 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2196 reg->VsrH(7 - n) = val;
2199 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2201 int i;
2202 int invalid = 0;
2203 for (i = 31; i > 0; i--) {
2204 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2205 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2206 if (unlikely(invalid)) {
2207 return 0; /* doesn't matter */
2208 } else if (dig_a > dig_b) {
2209 return 1;
2210 } else if (dig_a < dig_b) {
2211 return -1;
2215 return 0;
2218 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2219 int *overflow)
2221 int carry = 0;
2222 int i;
2223 int is_zero = 1;
2225 for (i = 1; i <= 31; i++) {
2226 uint8_t digit = bcd_get_digit(a, i, invalid) +
2227 bcd_get_digit(b, i, invalid) + carry;
2228 is_zero &= (digit == 0);
2229 if (digit > 9) {
2230 carry = 1;
2231 digit -= 10;
2232 } else {
2233 carry = 0;
2236 bcd_put_digit(t, digit, i);
2239 *overflow = carry;
2240 return is_zero;
2243 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2244 int *overflow)
2246 int carry = 0;
2247 int i;
2249 for (i = 1; i <= 31; i++) {
2250 uint8_t digit = bcd_get_digit(a, i, invalid) -
2251 bcd_get_digit(b, i, invalid) + carry;
2252 if (digit & 0x80) {
2253 carry = -1;
2254 digit += 10;
2255 } else {
2256 carry = 0;
2259 bcd_put_digit(t, digit, i);
2262 *overflow = carry;
2265 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2268 int sgna = bcd_get_sgn(a);
2269 int sgnb = bcd_get_sgn(b);
2270 int invalid = (sgna == 0) || (sgnb == 0);
2271 int overflow = 0;
2272 int zero = 0;
2273 uint32_t cr = 0;
2274 ppc_avr_t result = { .u64 = { 0, 0 } };
2276 if (!invalid) {
2277 if (sgna == sgnb) {
2278 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2279 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2280 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2281 } else {
2282 int magnitude = bcd_cmp_mag(a, b);
2283 if (magnitude > 0) {
2284 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2285 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2286 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2287 } else if (magnitude < 0) {
2288 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2289 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2290 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2291 } else {
2292 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2293 cr = CRF_EQ;
2298 if (unlikely(invalid)) {
2299 result.VsrD(0) = result.VsrD(1) = -1;
2300 cr = CRF_SO;
2301 } else if (overflow) {
2302 cr |= CRF_SO;
2303 } else if (zero) {
2304 cr |= CRF_EQ;
2307 *r = result;
2309 return cr;
2312 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2314 ppc_avr_t bcopy = *b;
2315 int sgnb = bcd_get_sgn(b);
2316 if (sgnb < 0) {
2317 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2318 } else if (sgnb > 0) {
2319 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2321 /* else invalid ... defer to bcdadd code for proper handling */
2323 return helper_bcdadd(r, a, &bcopy, ps);
2326 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2328 int i;
2329 int cr = 0;
2330 uint16_t national = 0;
2331 uint16_t sgnb = get_national_digit(b, 0);
2332 ppc_avr_t ret = { .u64 = { 0, 0 } };
2333 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2335 for (i = 1; i < 8; i++) {
2336 national = get_national_digit(b, i);
2337 if (unlikely(national < 0x30 || national > 0x39)) {
2338 invalid = 1;
2339 break;
2342 bcd_put_digit(&ret, national & 0xf, i);
2345 if (sgnb == NATIONAL_PLUS) {
2346 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2347 } else {
2348 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2351 cr = bcd_cmp_zero(&ret);
2353 if (unlikely(invalid)) {
2354 cr = CRF_SO;
2357 *r = ret;
2359 return cr;
2362 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2364 int i;
2365 int cr = 0;
2366 int sgnb = bcd_get_sgn(b);
2367 int invalid = (sgnb == 0);
2368 ppc_avr_t ret = { .u64 = { 0, 0 } };
2370 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2372 for (i = 1; i < 8; i++) {
2373 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2375 if (unlikely(invalid)) {
2376 break;
2379 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2381 cr = bcd_cmp_zero(b);
2383 if (ox_flag) {
2384 cr |= CRF_SO;
2387 if (unlikely(invalid)) {
2388 cr = CRF_SO;
2391 *r = ret;
2393 return cr;
2396 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2398 int i;
2399 int cr = 0;
2400 int invalid = 0;
2401 int zone_digit = 0;
2402 int zone_lead = ps ? 0xF : 0x3;
2403 int digit = 0;
2404 ppc_avr_t ret = { .u64 = { 0, 0 } };
2405 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2407 if (unlikely((sgnb < 0xA) && ps)) {
2408 invalid = 1;
2411 for (i = 0; i < 16; i++) {
2412 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2413 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2414 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2415 invalid = 1;
2416 break;
2419 bcd_put_digit(&ret, digit, i + 1);
2422 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2423 (!ps && (sgnb & 0x4))) {
2424 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2425 } else {
2426 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2429 cr = bcd_cmp_zero(&ret);
2431 if (unlikely(invalid)) {
2432 cr = CRF_SO;
2435 *r = ret;
2437 return cr;
2440 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2442 int i;
2443 int cr = 0;
2444 uint8_t digit = 0;
2445 int sgnb = bcd_get_sgn(b);
2446 int zone_lead = (ps) ? 0xF0 : 0x30;
2447 int invalid = (sgnb == 0);
2448 ppc_avr_t ret = { .u64 = { 0, 0 } };
2450 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2452 for (i = 0; i < 16; i++) {
2453 digit = bcd_get_digit(b, i + 1, &invalid);
2455 if (unlikely(invalid)) {
2456 break;
2459 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2462 if (ps) {
2463 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2464 } else {
2465 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2468 cr = bcd_cmp_zero(b);
2470 if (ox_flag) {
2471 cr |= CRF_SO;
2474 if (unlikely(invalid)) {
2475 cr = CRF_SO;
2478 *r = ret;
2480 return cr;
2483 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2485 int i;
2486 int cr = 0;
2487 uint64_t lo_value;
2488 uint64_t hi_value;
2489 ppc_avr_t ret = { .u64 = { 0, 0 } };
2491 if (b->VsrSD(0) < 0) {
2492 lo_value = -b->VsrSD(1);
2493 hi_value = ~b->VsrD(0) + !lo_value;
2494 bcd_put_digit(&ret, 0xD, 0);
2495 } else {
2496 lo_value = b->VsrD(1);
2497 hi_value = b->VsrD(0);
2498 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2501 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2502 lo_value > 9999999999999999ULL) {
2503 cr = CRF_SO;
2506 for (i = 1; i < 16; hi_value /= 10, i++) {
2507 bcd_put_digit(&ret, hi_value % 10, i);
2510 for (; i < 32; lo_value /= 10, i++) {
2511 bcd_put_digit(&ret, lo_value % 10, i);
2514 cr |= bcd_cmp_zero(&ret);
2516 *r = ret;
2518 return cr;
2521 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2523 uint8_t i;
2524 int cr;
2525 uint64_t carry;
2526 uint64_t unused;
2527 uint64_t lo_value;
2528 uint64_t hi_value = 0;
2529 int sgnb = bcd_get_sgn(b);
2530 int invalid = (sgnb == 0);
2532 lo_value = bcd_get_digit(b, 31, &invalid);
2533 for (i = 30; i > 0; i--) {
2534 mulu64(&lo_value, &carry, lo_value, 10ULL);
2535 mulu64(&hi_value, &unused, hi_value, 10ULL);
2536 lo_value += bcd_get_digit(b, i, &invalid);
2537 hi_value += carry;
2539 if (unlikely(invalid)) {
2540 break;
2544 if (sgnb == -1) {
2545 r->VsrSD(1) = -lo_value;
2546 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2547 } else {
2548 r->VsrSD(1) = lo_value;
2549 r->VsrSD(0) = hi_value;
2552 cr = bcd_cmp_zero(b);
2554 if (unlikely(invalid)) {
2555 cr = CRF_SO;
2558 return cr;
2561 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2563 int i;
2564 int invalid = 0;
2566 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2567 return CRF_SO;
2570 *r = *a;
2571 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2573 for (i = 1; i < 32; i++) {
2574 bcd_get_digit(a, i, &invalid);
2575 bcd_get_digit(b, i, &invalid);
2576 if (unlikely(invalid)) {
2577 return CRF_SO;
2581 return bcd_cmp_zero(r);
2584 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2586 int sgnb = bcd_get_sgn(b);
2588 *r = *b;
2589 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2591 if (bcd_is_valid(b) == false) {
2592 return CRF_SO;
2595 return bcd_cmp_zero(r);
2598 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2600 int cr;
2601 int i = a->VsrSB(7);
2602 bool ox_flag = false;
2603 int sgnb = bcd_get_sgn(b);
2604 ppc_avr_t ret = *b;
2605 ret.VsrD(1) &= ~0xf;
2607 if (bcd_is_valid(b) == false) {
2608 return CRF_SO;
2611 if (unlikely(i > 31)) {
2612 i = 31;
2613 } else if (unlikely(i < -31)) {
2614 i = -31;
2617 if (i > 0) {
2618 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2619 } else {
2620 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2622 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2624 *r = ret;
2626 cr = bcd_cmp_zero(r);
2627 if (ox_flag) {
2628 cr |= CRF_SO;
2631 return cr;
2634 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2636 int cr;
2637 int i;
2638 int invalid = 0;
2639 bool ox_flag = false;
2640 ppc_avr_t ret = *b;
2642 for (i = 0; i < 32; i++) {
2643 bcd_get_digit(b, i, &invalid);
2645 if (unlikely(invalid)) {
2646 return CRF_SO;
2650 i = a->VsrSB(7);
2651 if (i >= 32) {
2652 ox_flag = true;
2653 ret.VsrD(1) = ret.VsrD(0) = 0;
2654 } else if (i <= -32) {
2655 ret.VsrD(1) = ret.VsrD(0) = 0;
2656 } else if (i > 0) {
2657 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2658 } else {
2659 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2661 *r = ret;
2663 cr = bcd_cmp_zero(r);
2664 if (ox_flag) {
2665 cr |= CRF_SO;
2668 return cr;
2671 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2673 int cr;
2674 int unused = 0;
2675 int invalid = 0;
2676 bool ox_flag = false;
2677 int sgnb = bcd_get_sgn(b);
2678 ppc_avr_t ret = *b;
2679 ret.VsrD(1) &= ~0xf;
2681 int i = a->VsrSB(7);
2682 ppc_avr_t bcd_one;
2684 bcd_one.VsrD(0) = 0;
2685 bcd_one.VsrD(1) = 0x10;
2687 if (bcd_is_valid(b) == false) {
2688 return CRF_SO;
2691 if (unlikely(i > 31)) {
2692 i = 31;
2693 } else if (unlikely(i < -31)) {
2694 i = -31;
2697 if (i > 0) {
2698 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2699 } else {
2700 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2702 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2703 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2706 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2708 cr = bcd_cmp_zero(&ret);
2709 if (ox_flag) {
2710 cr |= CRF_SO;
2712 *r = ret;
2714 return cr;
2717 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2719 uint64_t mask;
2720 uint32_t ox_flag = 0;
2721 int i = a->VsrSH(3) + 1;
2722 ppc_avr_t ret = *b;
2724 if (bcd_is_valid(b) == false) {
2725 return CRF_SO;
2728 if (i > 16 && i < 32) {
2729 mask = (uint64_t)-1 >> (128 - i * 4);
2730 if (ret.VsrD(0) & ~mask) {
2731 ox_flag = CRF_SO;
2734 ret.VsrD(0) &= mask;
2735 } else if (i >= 0 && i <= 16) {
2736 mask = (uint64_t)-1 >> (64 - i * 4);
2737 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2738 ox_flag = CRF_SO;
2741 ret.VsrD(1) &= mask;
2742 ret.VsrD(0) = 0;
2744 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2745 *r = ret;
2747 return bcd_cmp_zero(&ret) | ox_flag;
2750 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2752 int i;
2753 uint64_t mask;
2754 uint32_t ox_flag = 0;
2755 int invalid = 0;
2756 ppc_avr_t ret = *b;
2758 for (i = 0; i < 32; i++) {
2759 bcd_get_digit(b, i, &invalid);
2761 if (unlikely(invalid)) {
2762 return CRF_SO;
2766 i = a->VsrSH(3);
2767 if (i > 16 && i < 33) {
2768 mask = (uint64_t)-1 >> (128 - i * 4);
2769 if (ret.VsrD(0) & ~mask) {
2770 ox_flag = CRF_SO;
2773 ret.VsrD(0) &= mask;
2774 } else if (i > 0 && i <= 16) {
2775 mask = (uint64_t)-1 >> (64 - i * 4);
2776 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2777 ox_flag = CRF_SO;
2780 ret.VsrD(1) &= mask;
2781 ret.VsrD(0) = 0;
2782 } else if (i == 0) {
2783 if (ret.VsrD(0) || ret.VsrD(1)) {
2784 ox_flag = CRF_SO;
2786 ret.VsrD(0) = ret.VsrD(1) = 0;
2789 *r = ret;
2790 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2791 return ox_flag | CRF_EQ;
2794 return ox_flag | CRF_GT;
2797 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2799 int i;
2800 VECTOR_FOR_INORDER_I(i, u8) {
2801 r->u8[i] = AES_sbox[a->u8[i]];
2805 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2807 ppc_avr_t result;
2808 int i;
2810 VECTOR_FOR_INORDER_I(i, u32) {
2811 result.VsrW(i) = b->VsrW(i) ^
2812 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2813 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2814 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2815 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2817 *r = result;
2820 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2822 ppc_avr_t result;
2823 int i;
2825 VECTOR_FOR_INORDER_I(i, u8) {
2826 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2828 *r = result;
2831 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2833 /* This differs from what is written in ISA V2.07. The RTL is */
2834 /* incorrect and will be fixed in V2.07B. */
2835 int i;
2836 ppc_avr_t tmp;
2838 VECTOR_FOR_INORDER_I(i, u8) {
2839 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2842 VECTOR_FOR_INORDER_I(i, u32) {
2843 r->VsrW(i) =
2844 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2845 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2846 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2847 AES_imc[tmp.VsrB(4 * i + 3)][3];
2851 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2853 ppc_avr_t result;
2854 int i;
2856 VECTOR_FOR_INORDER_I(i, u8) {
2857 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2859 *r = result;
2862 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2864 int st = (st_six & 0x10) != 0;
2865 int six = st_six & 0xF;
2866 int i;
2868 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2869 if (st == 0) {
2870 if ((six & (0x8 >> i)) == 0) {
2871 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2872 ror32(a->VsrW(i), 18) ^
2873 (a->VsrW(i) >> 3);
2874 } else { /* six.bit[i] == 1 */
2875 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2876 ror32(a->VsrW(i), 19) ^
2877 (a->VsrW(i) >> 10);
2879 } else { /* st == 1 */
2880 if ((six & (0x8 >> i)) == 0) {
2881 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2882 ror32(a->VsrW(i), 13) ^
2883 ror32(a->VsrW(i), 22);
2884 } else { /* six.bit[i] == 1 */
2885 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2886 ror32(a->VsrW(i), 11) ^
2887 ror32(a->VsrW(i), 25);
2893 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2895 int st = (st_six & 0x10) != 0;
2896 int six = st_six & 0xF;
2897 int i;
2899 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2900 if (st == 0) {
2901 if ((six & (0x8 >> (2 * i))) == 0) {
2902 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2903 ror64(a->VsrD(i), 8) ^
2904 (a->VsrD(i) >> 7);
2905 } else { /* six.bit[2*i] == 1 */
2906 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2907 ror64(a->VsrD(i), 61) ^
2908 (a->VsrD(i) >> 6);
2910 } else { /* st == 1 */
2911 if ((six & (0x8 >> (2 * i))) == 0) {
2912 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2913 ror64(a->VsrD(i), 34) ^
2914 ror64(a->VsrD(i), 39);
2915 } else { /* six.bit[2*i] == 1 */
2916 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2917 ror64(a->VsrD(i), 18) ^
2918 ror64(a->VsrD(i), 41);
2924 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2926 ppc_avr_t result;
2927 int i;
2929 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2930 int indexA = c->VsrB(i) >> 4;
2931 int indexB = c->VsrB(i) & 0xF;
2933 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2935 *r = result;
2938 #undef VECTOR_FOR_INORDER_I
2940 /*****************************************************************************/
2941 /* SPE extension helpers */
2942 /* Use a table to make this quicker */
2943 static const uint8_t hbrev[16] = {
2944 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2945 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2948 static inline uint8_t byte_reverse(uint8_t val)
2950 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2953 static inline uint32_t word_reverse(uint32_t val)
2955 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2956 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2959 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2960 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2962 uint32_t a, b, d, mask;
2964 mask = UINT32_MAX >> (32 - MASKBITS);
2965 a = arg1 & mask;
2966 b = arg2 & mask;
2967 d = word_reverse(1 + word_reverse(a | ~b));
2968 return (arg1 & ~mask) | (d & b);
2971 uint32_t helper_cntlsw32(uint32_t val)
2973 if (val & 0x80000000) {
2974 return clz32(~val);
2975 } else {
2976 return clz32(val);
2980 uint32_t helper_cntlzw32(uint32_t val)
2982 return clz32(val);
2985 /* 440 specific */
2986 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2987 target_ulong low, uint32_t update_Rc)
2989 target_ulong mask;
2990 int i;
2992 i = 1;
2993 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2994 if ((high & mask) == 0) {
2995 if (update_Rc) {
2996 env->crf[0] = 0x4;
2998 goto done;
3000 i++;
3002 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3003 if ((low & mask) == 0) {
3004 if (update_Rc) {
3005 env->crf[0] = 0x8;
3007 goto done;
3009 i++;
3011 i = 8;
3012 if (update_Rc) {
3013 env->crf[0] = 0x2;
3015 done:
3016 env->xer = (env->xer & ~0x7F) | i;
3017 if (update_Rc) {
3018 env->crf[0] |= xer_so;
3020 return i;