target/ppc: moved store_40x_sler to helper_regs.c
[qemu/rayw.git] / target / ppc / int_helper.c
blobefa833ef64cd6195f657aef7a02e58d1878322cc
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "qemu/log.h"
26 #include "exec/helper-proto.h"
27 #include "crypto/aes.h"
28 #include "fpu/softfloat.h"
29 #include "qapi/error.h"
30 #include "qemu/guest-random.h"
32 #include "helper_regs.h"
33 /*****************************************************************************/
34 /* Fixed point operations helpers */
36 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 if (unlikely(ov)) {
39 env->so = env->ov = 1;
40 } else {
41 env->ov = 0;
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
48 uint64_t rt = 0;
49 int overflow = 0;
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
65 if (oe) {
66 helper_update_ov_legacy(env, overflow);
69 return (target_ulong)rt;
72 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 uint32_t oe)
75 int64_t rt = 0;
76 int overflow = 0;
78 int64_t dividend = (int64_t)ra << 32;
79 int64_t divisor = (int64_t)((int32_t)rb);
81 if (unlikely((divisor == 0) ||
82 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
83 overflow = 1;
84 } else {
85 rt = dividend / divisor;
86 overflow = rt != (int32_t)rt;
89 if (unlikely(overflow)) {
90 rt = 0; /* Undefined */
93 if (oe) {
94 helper_update_ov_legacy(env, overflow);
97 return (target_ulong)rt;
100 #if defined(TARGET_PPC64)
102 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 uint64_t rt = 0;
105 int overflow = 0;
107 overflow = divu128(&rt, &ra, rb);
109 if (unlikely(overflow)) {
110 rt = 0; /* Undefined */
113 if (oe) {
114 helper_update_ov_legacy(env, overflow);
117 return rt;
120 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 int64_t rt = 0;
123 int64_t ra = (int64_t)rau;
124 int64_t rb = (int64_t)rbu;
125 int overflow = divs128(&rt, &ra, rb);
127 if (unlikely(overflow)) {
128 rt = 0; /* Undefined */
131 if (oe) {
132 helper_update_ov_legacy(env, overflow);
135 return rt;
138 #endif
141 #if defined(TARGET_PPC64)
142 /* if x = 0xab, returns 0xababababababababa */
143 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
146 * subtract 1 from each byte, and with inverse, check if MSB is set at each
147 * byte.
148 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
149 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
151 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
153 /* When you XOR the pattern and there is a match, that byte will be zero */
154 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
156 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
158 return hasvalue(rb, ra) ? CRF_GT : 0;
161 #undef pattern
162 #undef haszero
163 #undef hasvalue
166 * Return a random number.
168 uint64_t helper_darn32(void)
170 Error *err = NULL;
171 uint32_t ret;
173 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
174 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
175 error_get_pretty(err));
176 error_free(err);
177 return -1;
180 return ret;
183 uint64_t helper_darn64(void)
185 Error *err = NULL;
186 uint64_t ret;
188 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
189 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
190 error_get_pretty(err));
191 error_free(err);
192 return -1;
195 return ret;
198 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
200 int i;
201 uint64_t ra = 0;
203 for (i = 0; i < 8; i++) {
204 int index = (rs >> (i * 8)) & 0xFF;
205 if (index < 64) {
206 if (rb & PPC_BIT(index)) {
207 ra |= 1 << i;
211 return ra;
214 #endif
216 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
218 target_ulong mask = 0xff;
219 target_ulong ra = 0;
220 int i;
222 for (i = 0; i < sizeof(target_ulong); i++) {
223 if ((rs & mask) == (rb & mask)) {
224 ra |= mask;
226 mask <<= 8;
228 return ra;
231 /* shift right arithmetic helper */
232 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
233 target_ulong shift)
235 int32_t ret;
237 if (likely(!(shift & 0x20))) {
238 if (likely((uint32_t)shift != 0)) {
239 shift &= 0x1f;
240 ret = (int32_t)value >> shift;
241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
242 env->ca32 = env->ca = 0;
243 } else {
244 env->ca32 = env->ca = 1;
246 } else {
247 ret = (int32_t)value;
248 env->ca32 = env->ca = 0;
250 } else {
251 ret = (int32_t)value >> 31;
252 env->ca32 = env->ca = (ret != 0);
254 return (target_long)ret;
257 #if defined(TARGET_PPC64)
258 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
259 target_ulong shift)
261 int64_t ret;
263 if (likely(!(shift & 0x40))) {
264 if (likely((uint64_t)shift != 0)) {
265 shift &= 0x3f;
266 ret = (int64_t)value >> shift;
267 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
268 env->ca32 = env->ca = 0;
269 } else {
270 env->ca32 = env->ca = 1;
272 } else {
273 ret = (int64_t)value;
274 env->ca32 = env->ca = 0;
276 } else {
277 ret = (int64_t)value >> 63;
278 env->ca32 = env->ca = (ret != 0);
280 return ret;
282 #endif
284 #if defined(TARGET_PPC64)
285 target_ulong helper_popcntb(target_ulong val)
287 /* Note that we don't fold past bytes */
288 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
289 0x5555555555555555ULL);
290 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
291 0x3333333333333333ULL);
292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
293 0x0f0f0f0f0f0f0f0fULL);
294 return val;
297 target_ulong helper_popcntw(target_ulong val)
299 /* Note that we don't fold past words. */
300 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
301 0x5555555555555555ULL);
302 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
303 0x3333333333333333ULL);
304 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
305 0x0f0f0f0f0f0f0f0fULL);
306 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
307 0x00ff00ff00ff00ffULL);
308 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
309 0x0000ffff0000ffffULL);
310 return val;
312 #else
313 target_ulong helper_popcntb(target_ulong val)
315 /* Note that we don't fold past bytes */
316 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
317 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
318 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
319 return val;
321 #endif
323 uint64_t helper_cfuged(uint64_t src, uint64_t mask)
326 * Instead of processing the mask bit-by-bit from the most significant to
327 * the least significant bit, as described in PowerISA, we'll handle it in
328 * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
329 * ctz or cto, we negate the mask at the end of the loop.
331 target_ulong m, left = 0, right = 0;
332 unsigned int n, i = 64;
333 bool bit = false; /* tracks if we are processing zeros or ones */
335 if (mask == 0 || mask == -1) {
336 return src;
339 /* Processes the mask in blocks, from LSB to MSB */
340 while (i) {
341 /* Find how many bits we should take */
342 n = ctz64(mask);
343 if (n > i) {
344 n = i;
348 * Extracts 'n' trailing bits of src and put them on the leading 'n'
349 * bits of 'right' or 'left', pushing down the previously extracted
350 * values.
352 m = (1ll << n) - 1;
353 if (bit) {
354 right = ror64(right | (src & m), n);
355 } else {
356 left = ror64(left | (src & m), n);
360 * Discards the processed bits from 'src' and 'mask'. Note that we are
361 * removing 'n' trailing zeros from 'mask', but the logical shift will
362 * add 'n' leading zeros back, so the population count of 'mask' is kept
363 * the same.
365 src >>= n;
366 mask >>= n;
367 i -= n;
368 bit = !bit;
369 mask = ~mask;
373 * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
374 * we'll shift it more 64-ctpop(mask) times.
376 if (bit) {
377 n = ctpop64(mask);
378 } else {
379 n = 64 - ctpop64(mask);
382 return left | (right >> n);
385 /*****************************************************************************/
386 /* PowerPC 601 specific instructions (POWER bridge) */
387 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
389 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
391 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
392 (int32_t)arg2 == 0) {
393 env->spr[SPR_MQ] = 0;
394 return INT32_MIN;
395 } else {
396 env->spr[SPR_MQ] = tmp % arg2;
397 return tmp / (int32_t)arg2;
401 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
402 target_ulong arg2)
404 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
406 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
407 (int32_t)arg2 == 0) {
408 env->so = env->ov = 1;
409 env->spr[SPR_MQ] = 0;
410 return INT32_MIN;
411 } else {
412 env->spr[SPR_MQ] = tmp % arg2;
413 tmp /= (int32_t)arg2;
414 if ((int32_t)tmp != tmp) {
415 env->so = env->ov = 1;
416 } else {
417 env->ov = 0;
419 return tmp;
423 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
424 target_ulong arg2)
426 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
427 (int32_t)arg2 == 0) {
428 env->spr[SPR_MQ] = 0;
429 return INT32_MIN;
430 } else {
431 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
432 return (int32_t)arg1 / (int32_t)arg2;
436 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
437 target_ulong arg2)
439 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
440 (int32_t)arg2 == 0) {
441 env->so = env->ov = 1;
442 env->spr[SPR_MQ] = 0;
443 return INT32_MIN;
444 } else {
445 env->ov = 0;
446 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
447 return (int32_t)arg1 / (int32_t)arg2;
451 /*****************************************************************************/
452 /* 602 specific instructions */
453 /* mfrom is the most crazy instruction ever seen, imho ! */
454 /* Real implementation uses a ROM table. Do the same */
456 * Extremely decomposed:
457 * -arg / 256
458 * return 256 * log10(10 + 1.0) + 0.5
460 #if !defined(CONFIG_USER_ONLY)
461 target_ulong helper_602_mfrom(target_ulong arg)
463 if (likely(arg < 602)) {
464 #include "mfrom_table.c.inc"
465 return mfrom_ROM_table[arg];
466 } else {
467 return 0;
470 #endif
472 /*****************************************************************************/
473 /* Altivec extension helpers */
474 #if defined(HOST_WORDS_BIGENDIAN)
475 #define VECTOR_FOR_INORDER_I(index, element) \
476 for (index = 0; index < ARRAY_SIZE(r->element); index++)
477 #else
478 #define VECTOR_FOR_INORDER_I(index, element) \
479 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
480 #endif
482 /* Saturating arithmetic helpers. */
483 #define SATCVT(from, to, from_type, to_type, min, max) \
484 static inline to_type cvt##from##to(from_type x, int *sat) \
486 to_type r; \
488 if (x < (from_type)min) { \
489 r = min; \
490 *sat = 1; \
491 } else if (x > (from_type)max) { \
492 r = max; \
493 *sat = 1; \
494 } else { \
495 r = x; \
497 return r; \
499 #define SATCVTU(from, to, from_type, to_type, min, max) \
500 static inline to_type cvt##from##to(from_type x, int *sat) \
502 to_type r; \
504 if (x > (from_type)max) { \
505 r = max; \
506 *sat = 1; \
507 } else { \
508 r = x; \
510 return r; \
512 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
513 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
514 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
516 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
517 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
518 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
519 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
520 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
521 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
522 #undef SATCVT
523 #undef SATCVTU
525 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
527 ppc_store_vscr(env, vscr);
530 uint32_t helper_mfvscr(CPUPPCState *env)
532 return ppc_get_vscr(env);
535 static inline void set_vscr_sat(CPUPPCState *env)
537 /* The choice of non-zero value is arbitrary. */
538 env->vscr_sat.u32[0] = 1;
541 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
543 int i;
545 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
546 r->u32[i] = ~a->u32[i] < b->u32[i];
550 /* vprtybw */
551 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
553 int i;
554 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
555 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
556 res ^= res >> 8;
557 r->u32[i] = res & 1;
561 /* vprtybd */
562 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
564 int i;
565 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
566 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
567 res ^= res >> 16;
568 res ^= res >> 8;
569 r->u64[i] = res & 1;
573 /* vprtybq */
574 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
576 uint64_t res = b->u64[0] ^ b->u64[1];
577 res ^= res >> 32;
578 res ^= res >> 16;
579 res ^= res >> 8;
580 r->VsrD(1) = res & 1;
581 r->VsrD(0) = 0;
584 #define VARITHFP(suffix, func) \
585 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
586 ppc_avr_t *b) \
588 int i; \
590 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
591 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
594 VARITHFP(addfp, float32_add)
595 VARITHFP(subfp, float32_sub)
596 VARITHFP(minfp, float32_min)
597 VARITHFP(maxfp, float32_max)
598 #undef VARITHFP
600 #define VARITHFPFMA(suffix, type) \
601 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
602 ppc_avr_t *b, ppc_avr_t *c) \
604 int i; \
605 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
606 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
607 type, &env->vec_status); \
610 VARITHFPFMA(maddfp, 0);
611 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
612 #undef VARITHFPFMA
614 #define VARITHSAT_CASE(type, op, cvt, element) \
616 type result = (type)a->element[i] op (type)b->element[i]; \
617 r->element[i] = cvt(result, &sat); \
620 #define VARITHSAT_DO(name, op, optype, cvt, element) \
621 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
622 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
624 int sat = 0; \
625 int i; \
627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
628 VARITHSAT_CASE(optype, op, cvt, element); \
630 if (sat) { \
631 vscr_sat->u32[0] = 1; \
634 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
635 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
636 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
637 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
638 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
639 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
640 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
641 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
642 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
643 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
644 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
645 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
646 #undef VARITHSAT_CASE
647 #undef VARITHSAT_DO
648 #undef VARITHSAT_SIGNED
649 #undef VARITHSAT_UNSIGNED
651 #define VAVG_DO(name, element, etype) \
652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
654 int i; \
656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
657 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
658 r->element[i] = x >> 1; \
662 #define VAVG(type, signed_element, signed_type, unsigned_element, \
663 unsigned_type) \
664 VAVG_DO(avgs##type, signed_element, signed_type) \
665 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
666 VAVG(b, s8, int16_t, u8, uint16_t)
667 VAVG(h, s16, int32_t, u16, uint32_t)
668 VAVG(w, s32, int64_t, u32, uint64_t)
669 #undef VAVG_DO
670 #undef VAVG
672 #define VABSDU_DO(name, element) \
673 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
675 int i; \
677 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
678 r->element[i] = (a->element[i] > b->element[i]) ? \
679 (a->element[i] - b->element[i]) : \
680 (b->element[i] - a->element[i]); \
685 * VABSDU - Vector absolute difference unsigned
686 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
687 * element - element type to access from vector
689 #define VABSDU(type, element) \
690 VABSDU_DO(absdu##type, element)
691 VABSDU(b, u8)
692 VABSDU(h, u16)
693 VABSDU(w, u32)
694 #undef VABSDU_DO
695 #undef VABSDU
697 #define VCF(suffix, cvt, element) \
698 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
699 ppc_avr_t *b, uint32_t uim) \
701 int i; \
703 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
704 float32 t = cvt(b->element[i], &env->vec_status); \
705 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
708 VCF(ux, uint32_to_float32, u32)
709 VCF(sx, int32_to_float32, s32)
710 #undef VCF
712 #define VCMP_DO(suffix, compare, element, record) \
713 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
714 ppc_avr_t *a, ppc_avr_t *b) \
716 uint64_t ones = (uint64_t)-1; \
717 uint64_t all = ones; \
718 uint64_t none = 0; \
719 int i; \
721 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
722 uint64_t result = (a->element[i] compare b->element[i] ? \
723 ones : 0x0); \
724 switch (sizeof(a->element[0])) { \
725 case 8: \
726 r->u64[i] = result; \
727 break; \
728 case 4: \
729 r->u32[i] = result; \
730 break; \
731 case 2: \
732 r->u16[i] = result; \
733 break; \
734 case 1: \
735 r->u8[i] = result; \
736 break; \
738 all &= result; \
739 none |= result; \
741 if (record) { \
742 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
745 #define VCMP(suffix, compare, element) \
746 VCMP_DO(suffix, compare, element, 0) \
747 VCMP_DO(suffix##_dot, compare, element, 1)
748 VCMP(equb, ==, u8)
749 VCMP(equh, ==, u16)
750 VCMP(equw, ==, u32)
751 VCMP(equd, ==, u64)
752 VCMP(gtub, >, u8)
753 VCMP(gtuh, >, u16)
754 VCMP(gtuw, >, u32)
755 VCMP(gtud, >, u64)
756 VCMP(gtsb, >, s8)
757 VCMP(gtsh, >, s16)
758 VCMP(gtsw, >, s32)
759 VCMP(gtsd, >, s64)
760 #undef VCMP_DO
761 #undef VCMP
763 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
764 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
765 ppc_avr_t *a, ppc_avr_t *b) \
767 etype ones = (etype)-1; \
768 etype all = ones; \
769 etype result, none = 0; \
770 int i; \
772 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
773 if (cmpzero) { \
774 result = ((a->element[i] == 0) \
775 || (b->element[i] == 0) \
776 || (a->element[i] != b->element[i]) ? \
777 ones : 0x0); \
778 } else { \
779 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
781 r->element[i] = result; \
782 all &= result; \
783 none |= result; \
785 if (record) { \
786 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
791 * VCMPNEZ - Vector compare not equal to zero
792 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
793 * element - element type to access from vector
795 #define VCMPNE(suffix, element, etype, cmpzero) \
796 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
797 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
798 VCMPNE(zb, u8, uint8_t, 1)
799 VCMPNE(zh, u16, uint16_t, 1)
800 VCMPNE(zw, u32, uint32_t, 1)
801 VCMPNE(b, u8, uint8_t, 0)
802 VCMPNE(h, u16, uint16_t, 0)
803 VCMPNE(w, u32, uint32_t, 0)
804 #undef VCMPNE_DO
805 #undef VCMPNE
807 #define VCMPFP_DO(suffix, compare, order, record) \
808 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
809 ppc_avr_t *a, ppc_avr_t *b) \
811 uint32_t ones = (uint32_t)-1; \
812 uint32_t all = ones; \
813 uint32_t none = 0; \
814 int i; \
816 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
817 uint32_t result; \
818 FloatRelation rel = \
819 float32_compare_quiet(a->f32[i], b->f32[i], \
820 &env->vec_status); \
821 if (rel == float_relation_unordered) { \
822 result = 0; \
823 } else if (rel compare order) { \
824 result = ones; \
825 } else { \
826 result = 0; \
828 r->u32[i] = result; \
829 all &= result; \
830 none |= result; \
832 if (record) { \
833 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
836 #define VCMPFP(suffix, compare, order) \
837 VCMPFP_DO(suffix, compare, order, 0) \
838 VCMPFP_DO(suffix##_dot, compare, order, 1)
839 VCMPFP(eqfp, ==, float_relation_equal)
840 VCMPFP(gefp, !=, float_relation_less)
841 VCMPFP(gtfp, ==, float_relation_greater)
842 #undef VCMPFP_DO
843 #undef VCMPFP
845 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
846 ppc_avr_t *a, ppc_avr_t *b, int record)
848 int i;
849 int all_in = 0;
851 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
852 FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
853 &env->vec_status);
854 if (le_rel == float_relation_unordered) {
855 r->u32[i] = 0xc0000000;
856 all_in = 1;
857 } else {
858 float32 bneg = float32_chs(b->f32[i]);
859 FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
860 &env->vec_status);
861 int le = le_rel != float_relation_greater;
862 int ge = ge_rel != float_relation_less;
864 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
865 all_in |= (!le | !ge);
868 if (record) {
869 env->crf[6] = (all_in == 0) << 1;
873 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
875 vcmpbfp_internal(env, r, a, b, 0);
878 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b)
881 vcmpbfp_internal(env, r, a, b, 1);
884 #define VCT(suffix, satcvt, element) \
885 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
886 ppc_avr_t *b, uint32_t uim) \
888 int i; \
889 int sat = 0; \
890 float_status s = env->vec_status; \
892 set_float_rounding_mode(float_round_to_zero, &s); \
893 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
894 if (float32_is_any_nan(b->f32[i])) { \
895 r->element[i] = 0; \
896 } else { \
897 float64 t = float32_to_float64(b->f32[i], &s); \
898 int64_t j; \
900 t = float64_scalbn(t, uim, &s); \
901 j = float64_to_int64(t, &s); \
902 r->element[i] = satcvt(j, &sat); \
905 if (sat) { \
906 set_vscr_sat(env); \
909 VCT(uxs, cvtsduw, u32)
910 VCT(sxs, cvtsdsw, s32)
911 #undef VCT
913 target_ulong helper_vclzlsbb(ppc_avr_t *r)
915 target_ulong count = 0;
916 int i;
917 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
918 if (r->VsrB(i) & 0x01) {
919 break;
921 count++;
923 return count;
926 target_ulong helper_vctzlsbb(ppc_avr_t *r)
928 target_ulong count = 0;
929 int i;
930 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
931 if (r->VsrB(i) & 0x01) {
932 break;
934 count++;
936 return count;
939 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
940 ppc_avr_t *b, ppc_avr_t *c)
942 int sat = 0;
943 int i;
945 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
946 int32_t prod = a->s16[i] * b->s16[i];
947 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
949 r->s16[i] = cvtswsh(t, &sat);
952 if (sat) {
953 set_vscr_sat(env);
957 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
958 ppc_avr_t *b, ppc_avr_t *c)
960 int sat = 0;
961 int i;
963 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
964 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
965 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
966 r->s16[i] = cvtswsh(t, &sat);
969 if (sat) {
970 set_vscr_sat(env);
974 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
976 int i;
978 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
979 int32_t prod = a->s16[i] * b->s16[i];
980 r->s16[i] = (int16_t) (prod + c->s16[i]);
984 #define VMRG_DO(name, element, access, ofs) \
985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 ppc_avr_t result; \
988 int i, half = ARRAY_SIZE(r->element) / 2; \
990 for (i = 0; i < half; i++) { \
991 result.access(i * 2 + 0) = a->access(i + ofs); \
992 result.access(i * 2 + 1) = b->access(i + ofs); \
994 *r = result; \
997 #define VMRG(suffix, element, access) \
998 VMRG_DO(mrgl##suffix, element, access, half) \
999 VMRG_DO(mrgh##suffix, element, access, 0)
1000 VMRG(b, u8, VsrB)
1001 VMRG(h, u16, VsrH)
1002 VMRG(w, u32, VsrW)
1003 #undef VMRG_DO
1004 #undef VMRG
1006 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1007 ppc_avr_t *b, ppc_avr_t *c)
1009 int32_t prod[16];
1010 int i;
1012 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1013 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1016 VECTOR_FOR_INORDER_I(i, s32) {
1017 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1018 prod[4 * i + 2] + prod[4 * i + 3];
1022 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1023 ppc_avr_t *b, ppc_avr_t *c)
1025 int32_t prod[8];
1026 int i;
1028 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1029 prod[i] = a->s16[i] * b->s16[i];
1032 VECTOR_FOR_INORDER_I(i, s32) {
1033 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1037 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1038 ppc_avr_t *b, ppc_avr_t *c)
1040 int32_t prod[8];
1041 int i;
1042 int sat = 0;
1044 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1045 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1048 VECTOR_FOR_INORDER_I(i, s32) {
1049 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1051 r->u32[i] = cvtsdsw(t, &sat);
1054 if (sat) {
1055 set_vscr_sat(env);
1059 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1060 ppc_avr_t *b, ppc_avr_t *c)
1062 uint16_t prod[16];
1063 int i;
1065 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1066 prod[i] = a->u8[i] * b->u8[i];
1069 VECTOR_FOR_INORDER_I(i, u32) {
1070 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1071 prod[4 * i + 2] + prod[4 * i + 3];
1075 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1076 ppc_avr_t *b, ppc_avr_t *c)
1078 uint32_t prod[8];
1079 int i;
1081 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1082 prod[i] = a->u16[i] * b->u16[i];
1085 VECTOR_FOR_INORDER_I(i, u32) {
1086 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1090 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1091 ppc_avr_t *b, ppc_avr_t *c)
1093 uint32_t prod[8];
1094 int i;
1095 int sat = 0;
1097 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1098 prod[i] = a->u16[i] * b->u16[i];
1101 VECTOR_FOR_INORDER_I(i, s32) {
1102 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1104 r->u32[i] = cvtuduw(t, &sat);
1107 if (sat) {
1108 set_vscr_sat(env);
1112 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1113 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1115 int i; \
1117 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1118 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1119 (cast)b->mul_access(i); \
1123 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1124 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1126 int i; \
1128 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1129 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1130 (cast)b->mul_access(i + 1); \
1134 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1135 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1136 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1137 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1138 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1139 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1140 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1141 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1142 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1143 #undef VMUL_DO_EVN
1144 #undef VMUL_DO_ODD
1145 #undef VMUL
1147 void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1149 int i;
1151 for (i = 0; i < 4; i++) {
1152 r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
1156 void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1158 int i;
1160 for (i = 0; i < 4; i++) {
1161 r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
1162 (uint64_t)b->u32[i]) >> 32);
1166 void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1168 uint64_t discard;
1170 muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
1171 muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
1174 void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1176 uint64_t discard;
1178 mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
1179 mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
1182 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1183 ppc_avr_t *c)
1185 ppc_avr_t result;
1186 int i;
1188 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1189 int s = c->VsrB(i) & 0x1f;
1190 int index = s & 0xf;
1192 if (s & 0x10) {
1193 result.VsrB(i) = b->VsrB(index);
1194 } else {
1195 result.VsrB(i) = a->VsrB(index);
1198 *r = result;
1201 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1202 ppc_avr_t *c)
1204 ppc_avr_t result;
1205 int i;
1207 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1208 int s = c->VsrB(i) & 0x1f;
1209 int index = 15 - (s & 0xf);
1211 if (s & 0x10) {
1212 result.VsrB(i) = a->VsrB(index);
1213 } else {
1214 result.VsrB(i) = b->VsrB(index);
1217 *r = result;
1220 #if defined(HOST_WORDS_BIGENDIAN)
1221 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1222 #define VBPERMD_INDEX(i) (i)
1223 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1224 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1225 #else
1226 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1227 #define VBPERMD_INDEX(i) (1 - i)
1228 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1229 #define EXTRACT_BIT(avr, i, index) \
1230 (extract64((avr)->u64[1 - i], 63 - index, 1))
1231 #endif
1233 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1235 int i, j;
1236 ppc_avr_t result = { .u64 = { 0, 0 } };
1237 VECTOR_FOR_INORDER_I(i, u64) {
1238 for (j = 0; j < 8; j++) {
1239 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1240 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1241 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1245 *r = result;
1248 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1250 int i;
1251 uint64_t perm = 0;
1253 VECTOR_FOR_INORDER_I(i, u8) {
1254 int index = VBPERMQ_INDEX(b, i);
1256 if (index < 128) {
1257 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1258 if (a->u64[VBPERMQ_DW(index)] & mask) {
1259 perm |= (0x8000 >> i);
1264 r->VsrD(0) = perm;
1265 r->VsrD(1) = 0;
1268 #undef VBPERMQ_INDEX
1269 #undef VBPERMQ_DW
1271 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1272 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1274 int i, j; \
1275 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1277 VECTOR_FOR_INORDER_I(i, srcfld) { \
1278 prod[i] = 0; \
1279 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1280 if (a->srcfld[i] & (1ull << j)) { \
1281 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1286 VECTOR_FOR_INORDER_I(i, trgfld) { \
1287 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1291 PMSUM(vpmsumb, u8, u16, uint16_t)
1292 PMSUM(vpmsumh, u16, u32, uint32_t)
1293 PMSUM(vpmsumw, u32, u64, uint64_t)
1295 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1298 #ifdef CONFIG_INT128
1299 int i, j;
1300 __uint128_t prod[2];
1302 VECTOR_FOR_INORDER_I(i, u64) {
1303 prod[i] = 0;
1304 for (j = 0; j < 64; j++) {
1305 if (a->u64[i] & (1ull << j)) {
1306 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1311 r->u128 = prod[0] ^ prod[1];
1313 #else
1314 int i, j;
1315 ppc_avr_t prod[2];
1317 VECTOR_FOR_INORDER_I(i, u64) {
1318 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1319 for (j = 0; j < 64; j++) {
1320 if (a->u64[i] & (1ull << j)) {
1321 ppc_avr_t bshift;
1322 if (j == 0) {
1323 bshift.VsrD(0) = 0;
1324 bshift.VsrD(1) = b->u64[i];
1325 } else {
1326 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1327 bshift.VsrD(1) = b->u64[i] << j;
1329 prod[i].VsrD(1) ^= bshift.VsrD(1);
1330 prod[i].VsrD(0) ^= bshift.VsrD(0);
1335 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1336 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1337 #endif
1341 #if defined(HOST_WORDS_BIGENDIAN)
1342 #define PKBIG 1
1343 #else
1344 #define PKBIG 0
1345 #endif
1346 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1348 int i, j;
1349 ppc_avr_t result;
1350 #if defined(HOST_WORDS_BIGENDIAN)
1351 const ppc_avr_t *x[2] = { a, b };
1352 #else
1353 const ppc_avr_t *x[2] = { b, a };
1354 #endif
1356 VECTOR_FOR_INORDER_I(i, u64) {
1357 VECTOR_FOR_INORDER_I(j, u32) {
1358 uint32_t e = x[i]->u32[j];
1360 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1361 ((e >> 6) & 0x3e0) |
1362 ((e >> 3) & 0x1f));
1365 *r = result;
1368 #define VPK(suffix, from, to, cvt, dosat) \
1369 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1370 ppc_avr_t *a, ppc_avr_t *b) \
1372 int i; \
1373 int sat = 0; \
1374 ppc_avr_t result; \
1375 ppc_avr_t *a0 = PKBIG ? a : b; \
1376 ppc_avr_t *a1 = PKBIG ? b : a; \
1378 VECTOR_FOR_INORDER_I(i, from) { \
1379 result.to[i] = cvt(a0->from[i], &sat); \
1380 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1382 *r = result; \
1383 if (dosat && sat) { \
1384 set_vscr_sat(env); \
1387 #define I(x, y) (x)
1388 VPK(shss, s16, s8, cvtshsb, 1)
1389 VPK(shus, s16, u8, cvtshub, 1)
1390 VPK(swss, s32, s16, cvtswsh, 1)
1391 VPK(swus, s32, u16, cvtswuh, 1)
1392 VPK(sdss, s64, s32, cvtsdsw, 1)
1393 VPK(sdus, s64, u32, cvtsduw, 1)
1394 VPK(uhus, u16, u8, cvtuhub, 1)
1395 VPK(uwus, u32, u16, cvtuwuh, 1)
1396 VPK(udus, u64, u32, cvtuduw, 1)
1397 VPK(uhum, u16, u8, I, 0)
1398 VPK(uwum, u32, u16, I, 0)
1399 VPK(udum, u64, u32, I, 0)
1400 #undef I
1401 #undef VPK
1402 #undef PKBIG
1404 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1406 int i;
1408 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1409 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1413 #define VRFI(suffix, rounding) \
1414 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1415 ppc_avr_t *b) \
1417 int i; \
1418 float_status s = env->vec_status; \
1420 set_float_rounding_mode(rounding, &s); \
1421 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1422 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1425 VRFI(n, float_round_nearest_even)
1426 VRFI(m, float_round_down)
1427 VRFI(p, float_round_up)
1428 VRFI(z, float_round_to_zero)
1429 #undef VRFI
1431 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1433 int i;
1435 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1436 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1438 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1442 #define VRLMI(name, size, element, insert) \
1443 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1445 int i; \
1446 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1447 uint##size##_t src1 = a->element[i]; \
1448 uint##size##_t src2 = b->element[i]; \
1449 uint##size##_t src3 = r->element[i]; \
1450 uint##size##_t begin, end, shift, mask, rot_val; \
1452 shift = extract##size(src2, 0, 6); \
1453 end = extract##size(src2, 8, 6); \
1454 begin = extract##size(src2, 16, 6); \
1455 rot_val = rol##size(src1, shift); \
1456 mask = mask_u##size(begin, end); \
1457 if (insert) { \
1458 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1459 } else { \
1460 r->element[i] = (rot_val & mask); \
1465 VRLMI(vrldmi, 64, u64, 1);
1466 VRLMI(vrlwmi, 32, u32, 1);
1467 VRLMI(vrldnm, 64, u64, 0);
1468 VRLMI(vrlwnm, 32, u32, 0);
1470 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1471 ppc_avr_t *c)
1473 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1474 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1477 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1479 int i;
1481 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1482 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1486 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1488 int i;
1490 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1491 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1495 #if defined(HOST_WORDS_BIGENDIAN)
1496 #define VEXTU_X_DO(name, size, left) \
1497 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1499 int index; \
1500 if (left) { \
1501 index = (a & 0xf) * 8; \
1502 } else { \
1503 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1505 return int128_getlo(int128_rshift(b->s128, index)) & \
1506 MAKE_64BIT_MASK(0, size); \
1508 #else
1509 #define VEXTU_X_DO(name, size, left) \
1510 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1512 int index; \
1513 if (left) { \
1514 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1515 } else { \
1516 index = (a & 0xf) * 8; \
1518 return int128_getlo(int128_rshift(b->s128, index)) & \
1519 MAKE_64BIT_MASK(0, size); \
1521 #endif
1523 VEXTU_X_DO(vextublx, 8, 1)
1524 VEXTU_X_DO(vextuhlx, 16, 1)
1525 VEXTU_X_DO(vextuwlx, 32, 1)
1526 VEXTU_X_DO(vextubrx, 8, 0)
1527 VEXTU_X_DO(vextuhrx, 16, 0)
1528 VEXTU_X_DO(vextuwrx, 32, 0)
1529 #undef VEXTU_X_DO
1531 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1533 int i;
1534 unsigned int shift, bytes, size;
1536 size = ARRAY_SIZE(r->u8);
1537 for (i = 0; i < size; i++) {
1538 shift = b->VsrB(i) & 0x7; /* extract shift value */
1539 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1540 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1541 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1545 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1547 int i;
1548 unsigned int shift, bytes;
1551 * Use reverse order, as destination and source register can be
1552 * same. Its being modified in place saving temporary, reverse
1553 * order will guarantee that computed result is not fed back.
1555 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1556 shift = b->VsrB(i) & 0x7; /* extract shift value */
1557 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1558 /* extract adjacent bytes */
1559 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1563 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1565 int sh = shift & 0xf;
1566 int i;
1567 ppc_avr_t result;
1569 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1570 int index = sh + i;
1571 if (index > 0xf) {
1572 result.VsrB(i) = b->VsrB(index - 0x10);
1573 } else {
1574 result.VsrB(i) = a->VsrB(index);
1577 *r = result;
1580 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1582 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1584 #if defined(HOST_WORDS_BIGENDIAN)
1585 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1586 memset(&r->u8[16 - sh], 0, sh);
1587 #else
1588 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1589 memset(&r->u8[0], 0, sh);
1590 #endif
1593 #if defined(HOST_WORDS_BIGENDIAN)
1594 #define VINSERT(suffix, element) \
1595 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1597 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1598 sizeof(r->element[0])); \
1600 #else
1601 #define VINSERT(suffix, element) \
1602 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1604 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1605 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1607 #endif
1608 VINSERT(b, u8)
1609 VINSERT(h, u16)
1610 VINSERT(w, u32)
1611 VINSERT(d, u64)
1612 #undef VINSERT
1613 #if defined(HOST_WORDS_BIGENDIAN)
1614 #define VEXTRACT(suffix, element) \
1615 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1617 uint32_t es = sizeof(r->element[0]); \
1618 memmove(&r->u8[8 - es], &b->u8[index], es); \
1619 memset(&r->u8[8], 0, 8); \
1620 memset(&r->u8[0], 0, 8 - es); \
1622 #else
1623 #define VEXTRACT(suffix, element) \
1624 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1626 uint32_t es = sizeof(r->element[0]); \
1627 uint32_t s = (16 - index) - es; \
1628 memmove(&r->u8[8], &b->u8[s], es); \
1629 memset(&r->u8[0], 0, 8); \
1630 memset(&r->u8[8 + es], 0, 8 - es); \
1632 #endif
1633 VEXTRACT(ub, u8)
1634 VEXTRACT(uh, u16)
1635 VEXTRACT(uw, u32)
1636 VEXTRACT(d, u64)
1637 #undef VEXTRACT
1639 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1640 ppc_vsr_t *xb, uint32_t index)
1642 ppc_vsr_t t = { };
1643 size_t es = sizeof(uint32_t);
1644 uint32_t ext_index;
1645 int i;
1647 ext_index = index;
1648 for (i = 0; i < es; i++, ext_index++) {
1649 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1652 *xt = t;
1655 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1656 ppc_vsr_t *xb, uint32_t index)
1658 ppc_vsr_t t = *xt;
1659 size_t es = sizeof(uint32_t);
1660 int ins_index, i = 0;
1662 ins_index = index;
1663 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1664 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1667 *xt = t;
1670 #define VEXT_SIGNED(name, element, cast) \
1671 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1673 int i; \
1674 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1675 r->element[i] = (cast)b->element[i]; \
1678 VEXT_SIGNED(vextsb2w, s32, int8_t)
1679 VEXT_SIGNED(vextsb2d, s64, int8_t)
1680 VEXT_SIGNED(vextsh2w, s32, int16_t)
1681 VEXT_SIGNED(vextsh2d, s64, int16_t)
1682 VEXT_SIGNED(vextsw2d, s64, int32_t)
1683 #undef VEXT_SIGNED
1685 #define VNEG(name, element) \
1686 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1688 int i; \
1689 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1690 r->element[i] = -b->element[i]; \
1693 VNEG(vnegw, s32)
1694 VNEG(vnegd, s64)
1695 #undef VNEG
1697 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1699 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1701 #if defined(HOST_WORDS_BIGENDIAN)
1702 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1703 memset(&r->u8[0], 0, sh);
1704 #else
1705 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1706 memset(&r->u8[16 - sh], 0, sh);
1707 #endif
1710 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1712 int i;
1714 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1715 r->u32[i] = a->u32[i] >= b->u32[i];
1719 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1721 int64_t t;
1722 int i, upper;
1723 ppc_avr_t result;
1724 int sat = 0;
1726 upper = ARRAY_SIZE(r->s32) - 1;
1727 t = (int64_t)b->VsrSW(upper);
1728 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1729 t += a->VsrSW(i);
1730 result.VsrSW(i) = 0;
1732 result.VsrSW(upper) = cvtsdsw(t, &sat);
1733 *r = result;
1735 if (sat) {
1736 set_vscr_sat(env);
1740 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1742 int i, j, upper;
1743 ppc_avr_t result;
1744 int sat = 0;
1746 upper = 1;
1747 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1748 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1750 result.VsrD(i) = 0;
1751 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1752 t += a->VsrSW(2 * i + j);
1754 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1757 *r = result;
1758 if (sat) {
1759 set_vscr_sat(env);
1763 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1765 int i, j;
1766 int sat = 0;
1768 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1769 int64_t t = (int64_t)b->s32[i];
1771 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1772 t += a->s8[4 * i + j];
1774 r->s32[i] = cvtsdsw(t, &sat);
1777 if (sat) {
1778 set_vscr_sat(env);
1782 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1784 int sat = 0;
1785 int i;
1787 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1788 int64_t t = (int64_t)b->s32[i];
1790 t += a->s16[2 * i] + a->s16[2 * i + 1];
1791 r->s32[i] = cvtsdsw(t, &sat);
1794 if (sat) {
1795 set_vscr_sat(env);
1799 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1801 int i, j;
1802 int sat = 0;
1804 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1805 uint64_t t = (uint64_t)b->u32[i];
1807 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1808 t += a->u8[4 * i + j];
1810 r->u32[i] = cvtuduw(t, &sat);
1813 if (sat) {
1814 set_vscr_sat(env);
1818 #if defined(HOST_WORDS_BIGENDIAN)
1819 #define UPKHI 1
1820 #define UPKLO 0
1821 #else
1822 #define UPKHI 0
1823 #define UPKLO 1
1824 #endif
1825 #define VUPKPX(suffix, hi) \
1826 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1828 int i; \
1829 ppc_avr_t result; \
1831 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1832 uint16_t e = b->u16[hi ? i : i + 4]; \
1833 uint8_t a = (e >> 15) ? 0xff : 0; \
1834 uint8_t r = (e >> 10) & 0x1f; \
1835 uint8_t g = (e >> 5) & 0x1f; \
1836 uint8_t b = e & 0x1f; \
1838 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1840 *r = result; \
1842 VUPKPX(lpx, UPKLO)
1843 VUPKPX(hpx, UPKHI)
1844 #undef VUPKPX
1846 #define VUPK(suffix, unpacked, packee, hi) \
1847 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1849 int i; \
1850 ppc_avr_t result; \
1852 if (hi) { \
1853 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1854 result.unpacked[i] = b->packee[i]; \
1856 } else { \
1857 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1858 i++) { \
1859 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1862 *r = result; \
1864 VUPK(hsb, s16, s8, UPKHI)
1865 VUPK(hsh, s32, s16, UPKHI)
1866 VUPK(hsw, s64, s32, UPKHI)
1867 VUPK(lsb, s16, s8, UPKLO)
1868 VUPK(lsh, s32, s16, UPKLO)
1869 VUPK(lsw, s64, s32, UPKLO)
1870 #undef VUPK
1871 #undef UPKHI
1872 #undef UPKLO
1874 #define VGENERIC_DO(name, element) \
1875 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1877 int i; \
1879 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1880 r->element[i] = name(b->element[i]); \
1884 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1885 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1887 VGENERIC_DO(clzb, u8)
1888 VGENERIC_DO(clzh, u16)
1890 #undef clzb
1891 #undef clzh
1893 #define ctzb(v) ((v) ? ctz32(v) : 8)
1894 #define ctzh(v) ((v) ? ctz32(v) : 16)
1895 #define ctzw(v) ctz32((v))
1896 #define ctzd(v) ctz64((v))
1898 VGENERIC_DO(ctzb, u8)
1899 VGENERIC_DO(ctzh, u16)
1900 VGENERIC_DO(ctzw, u32)
1901 VGENERIC_DO(ctzd, u64)
1903 #undef ctzb
1904 #undef ctzh
1905 #undef ctzw
1906 #undef ctzd
1908 #define popcntb(v) ctpop8(v)
1909 #define popcnth(v) ctpop16(v)
1910 #define popcntw(v) ctpop32(v)
1911 #define popcntd(v) ctpop64(v)
1913 VGENERIC_DO(popcntb, u8)
1914 VGENERIC_DO(popcnth, u16)
1915 VGENERIC_DO(popcntw, u32)
1916 VGENERIC_DO(popcntd, u64)
1918 #undef popcntb
1919 #undef popcnth
1920 #undef popcntw
1921 #undef popcntd
1923 #undef VGENERIC_DO
1925 #if defined(HOST_WORDS_BIGENDIAN)
1926 #define QW_ONE { .u64 = { 0, 1 } }
1927 #else
1928 #define QW_ONE { .u64 = { 1, 0 } }
1929 #endif
1931 #ifndef CONFIG_INT128
1933 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1935 t->u64[0] = ~a.u64[0];
1936 t->u64[1] = ~a.u64[1];
1939 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1941 if (a.VsrD(0) < b.VsrD(0)) {
1942 return -1;
1943 } else if (a.VsrD(0) > b.VsrD(0)) {
1944 return 1;
1945 } else if (a.VsrD(1) < b.VsrD(1)) {
1946 return -1;
1947 } else if (a.VsrD(1) > b.VsrD(1)) {
1948 return 1;
1949 } else {
1950 return 0;
1954 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1956 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1957 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1958 (~a.VsrD(1) < b.VsrD(1));
1961 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1963 ppc_avr_t not_a;
1964 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1965 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1966 (~a.VsrD(1) < b.VsrD(1));
1967 avr_qw_not(&not_a, a);
1968 return avr_qw_cmpu(not_a, b) < 0;
1971 #endif
1973 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1975 #ifdef CONFIG_INT128
1976 r->u128 = a->u128 + b->u128;
1977 #else
1978 avr_qw_add(r, *a, *b);
1979 #endif
1982 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1984 #ifdef CONFIG_INT128
1985 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1986 #else
1988 if (c->VsrD(1) & 1) {
1989 ppc_avr_t tmp;
1991 tmp.VsrD(0) = 0;
1992 tmp.VsrD(1) = c->VsrD(1) & 1;
1993 avr_qw_add(&tmp, *a, tmp);
1994 avr_qw_add(r, tmp, *b);
1995 } else {
1996 avr_qw_add(r, *a, *b);
1998 #endif
2001 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2003 #ifdef CONFIG_INT128
2004 r->u128 = (~a->u128 < b->u128);
2005 #else
2006 ppc_avr_t not_a;
2008 avr_qw_not(&not_a, *a);
2010 r->VsrD(0) = 0;
2011 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2012 #endif
2015 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2017 #ifdef CONFIG_INT128
2018 int carry_out = (~a->u128 < b->u128);
2019 if (!carry_out && (c->u128 & 1)) {
2020 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2021 ((a->u128 != 0) || (b->u128 != 0));
2023 r->u128 = carry_out;
2024 #else
2026 int carry_in = c->VsrD(1) & 1;
2027 int carry_out = 0;
2028 ppc_avr_t tmp;
2030 carry_out = avr_qw_addc(&tmp, *a, *b);
2032 if (!carry_out && carry_in) {
2033 ppc_avr_t one = QW_ONE;
2034 carry_out = avr_qw_addc(&tmp, tmp, one);
2036 r->VsrD(0) = 0;
2037 r->VsrD(1) = carry_out;
2038 #endif
2041 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2043 #ifdef CONFIG_INT128
2044 r->u128 = a->u128 - b->u128;
2045 #else
2046 ppc_avr_t tmp;
2047 ppc_avr_t one = QW_ONE;
2049 avr_qw_not(&tmp, *b);
2050 avr_qw_add(&tmp, *a, tmp);
2051 avr_qw_add(r, tmp, one);
2052 #endif
2055 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2057 #ifdef CONFIG_INT128
2058 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2059 #else
2060 ppc_avr_t tmp, sum;
2062 avr_qw_not(&tmp, *b);
2063 avr_qw_add(&sum, *a, tmp);
2065 tmp.VsrD(0) = 0;
2066 tmp.VsrD(1) = c->VsrD(1) & 1;
2067 avr_qw_add(r, sum, tmp);
2068 #endif
2071 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2073 #ifdef CONFIG_INT128
2074 r->u128 = (~a->u128 < ~b->u128) ||
2075 (a->u128 + ~b->u128 == (__uint128_t)-1);
2076 #else
2077 int carry = (avr_qw_cmpu(*a, *b) > 0);
2078 if (!carry) {
2079 ppc_avr_t tmp;
2080 avr_qw_not(&tmp, *b);
2081 avr_qw_add(&tmp, *a, tmp);
2082 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2084 r->VsrD(0) = 0;
2085 r->VsrD(1) = carry;
2086 #endif
2089 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2091 #ifdef CONFIG_INT128
2092 r->u128 =
2093 (~a->u128 < ~b->u128) ||
2094 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2095 #else
2096 int carry_in = c->VsrD(1) & 1;
2097 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2098 if (!carry_out && carry_in) {
2099 ppc_avr_t tmp;
2100 avr_qw_not(&tmp, *b);
2101 avr_qw_add(&tmp, *a, tmp);
2102 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2105 r->VsrD(0) = 0;
2106 r->VsrD(1) = carry_out;
2107 #endif
2110 #define BCD_PLUS_PREF_1 0xC
2111 #define BCD_PLUS_PREF_2 0xF
2112 #define BCD_PLUS_ALT_1 0xA
2113 #define BCD_NEG_PREF 0xD
2114 #define BCD_NEG_ALT 0xB
2115 #define BCD_PLUS_ALT_2 0xE
2116 #define NATIONAL_PLUS 0x2B
2117 #define NATIONAL_NEG 0x2D
2119 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2121 static int bcd_get_sgn(ppc_avr_t *bcd)
2123 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2124 case BCD_PLUS_PREF_1:
2125 case BCD_PLUS_PREF_2:
2126 case BCD_PLUS_ALT_1:
2127 case BCD_PLUS_ALT_2:
2129 return 1;
2132 case BCD_NEG_PREF:
2133 case BCD_NEG_ALT:
2135 return -1;
2138 default:
2140 return 0;
2145 static int bcd_preferred_sgn(int sgn, int ps)
2147 if (sgn >= 0) {
2148 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2149 } else {
2150 return BCD_NEG_PREF;
2154 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2156 uint8_t result;
2157 if (n & 1) {
2158 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2159 } else {
2160 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2163 if (unlikely(result > 9)) {
2164 *invalid = true;
2166 return result;
2169 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2171 if (n & 1) {
2172 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2173 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2174 } else {
2175 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2176 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2180 static bool bcd_is_valid(ppc_avr_t *bcd)
2182 int i;
2183 int invalid = 0;
2185 if (bcd_get_sgn(bcd) == 0) {
2186 return false;
2189 for (i = 1; i < 32; i++) {
2190 bcd_get_digit(bcd, i, &invalid);
2191 if (unlikely(invalid)) {
2192 return false;
2195 return true;
2198 static int bcd_cmp_zero(ppc_avr_t *bcd)
2200 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2201 return CRF_EQ;
2202 } else {
2203 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2207 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2209 return reg->VsrH(7 - n);
2212 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2214 reg->VsrH(7 - n) = val;
2217 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2219 int i;
2220 int invalid = 0;
2221 for (i = 31; i > 0; i--) {
2222 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2223 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2224 if (unlikely(invalid)) {
2225 return 0; /* doesn't matter */
2226 } else if (dig_a > dig_b) {
2227 return 1;
2228 } else if (dig_a < dig_b) {
2229 return -1;
2233 return 0;
2236 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2237 int *overflow)
2239 int carry = 0;
2240 int i;
2241 int is_zero = 1;
2243 for (i = 1; i <= 31; i++) {
2244 uint8_t digit = bcd_get_digit(a, i, invalid) +
2245 bcd_get_digit(b, i, invalid) + carry;
2246 is_zero &= (digit == 0);
2247 if (digit > 9) {
2248 carry = 1;
2249 digit -= 10;
2250 } else {
2251 carry = 0;
2254 bcd_put_digit(t, digit, i);
2257 *overflow = carry;
2258 return is_zero;
2261 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2262 int *overflow)
2264 int carry = 0;
2265 int i;
2267 for (i = 1; i <= 31; i++) {
2268 uint8_t digit = bcd_get_digit(a, i, invalid) -
2269 bcd_get_digit(b, i, invalid) + carry;
2270 if (digit & 0x80) {
2271 carry = -1;
2272 digit += 10;
2273 } else {
2274 carry = 0;
2277 bcd_put_digit(t, digit, i);
2280 *overflow = carry;
2283 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2286 int sgna = bcd_get_sgn(a);
2287 int sgnb = bcd_get_sgn(b);
2288 int invalid = (sgna == 0) || (sgnb == 0);
2289 int overflow = 0;
2290 int zero = 0;
2291 uint32_t cr = 0;
2292 ppc_avr_t result = { .u64 = { 0, 0 } };
2294 if (!invalid) {
2295 if (sgna == sgnb) {
2296 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2297 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2298 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2299 } else {
2300 int magnitude = bcd_cmp_mag(a, b);
2301 if (magnitude > 0) {
2302 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2303 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2304 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2305 } else if (magnitude < 0) {
2306 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2307 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2308 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2309 } else {
2310 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2311 cr = CRF_EQ;
2316 if (unlikely(invalid)) {
2317 result.VsrD(0) = result.VsrD(1) = -1;
2318 cr = CRF_SO;
2319 } else if (overflow) {
2320 cr |= CRF_SO;
2321 } else if (zero) {
2322 cr |= CRF_EQ;
2325 *r = result;
2327 return cr;
2330 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2332 ppc_avr_t bcopy = *b;
2333 int sgnb = bcd_get_sgn(b);
2334 if (sgnb < 0) {
2335 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2336 } else if (sgnb > 0) {
2337 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2339 /* else invalid ... defer to bcdadd code for proper handling */
2341 return helper_bcdadd(r, a, &bcopy, ps);
2344 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2346 int i;
2347 int cr = 0;
2348 uint16_t national = 0;
2349 uint16_t sgnb = get_national_digit(b, 0);
2350 ppc_avr_t ret = { .u64 = { 0, 0 } };
2351 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2353 for (i = 1; i < 8; i++) {
2354 national = get_national_digit(b, i);
2355 if (unlikely(national < 0x30 || national > 0x39)) {
2356 invalid = 1;
2357 break;
2360 bcd_put_digit(&ret, national & 0xf, i);
2363 if (sgnb == NATIONAL_PLUS) {
2364 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2365 } else {
2366 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2369 cr = bcd_cmp_zero(&ret);
2371 if (unlikely(invalid)) {
2372 cr = CRF_SO;
2375 *r = ret;
2377 return cr;
2380 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2382 int i;
2383 int cr = 0;
2384 int sgnb = bcd_get_sgn(b);
2385 int invalid = (sgnb == 0);
2386 ppc_avr_t ret = { .u64 = { 0, 0 } };
2388 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2390 for (i = 1; i < 8; i++) {
2391 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2393 if (unlikely(invalid)) {
2394 break;
2397 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2399 cr = bcd_cmp_zero(b);
2401 if (ox_flag) {
2402 cr |= CRF_SO;
2405 if (unlikely(invalid)) {
2406 cr = CRF_SO;
2409 *r = ret;
2411 return cr;
2414 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2416 int i;
2417 int cr = 0;
2418 int invalid = 0;
2419 int zone_digit = 0;
2420 int zone_lead = ps ? 0xF : 0x3;
2421 int digit = 0;
2422 ppc_avr_t ret = { .u64 = { 0, 0 } };
2423 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2425 if (unlikely((sgnb < 0xA) && ps)) {
2426 invalid = 1;
2429 for (i = 0; i < 16; i++) {
2430 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2431 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2432 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2433 invalid = 1;
2434 break;
2437 bcd_put_digit(&ret, digit, i + 1);
2440 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2441 (!ps && (sgnb & 0x4))) {
2442 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2443 } else {
2444 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2447 cr = bcd_cmp_zero(&ret);
2449 if (unlikely(invalid)) {
2450 cr = CRF_SO;
2453 *r = ret;
2455 return cr;
2458 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2460 int i;
2461 int cr = 0;
2462 uint8_t digit = 0;
2463 int sgnb = bcd_get_sgn(b);
2464 int zone_lead = (ps) ? 0xF0 : 0x30;
2465 int invalid = (sgnb == 0);
2466 ppc_avr_t ret = { .u64 = { 0, 0 } };
2468 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2470 for (i = 0; i < 16; i++) {
2471 digit = bcd_get_digit(b, i + 1, &invalid);
2473 if (unlikely(invalid)) {
2474 break;
2477 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2480 if (ps) {
2481 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2482 } else {
2483 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2486 cr = bcd_cmp_zero(b);
2488 if (ox_flag) {
2489 cr |= CRF_SO;
2492 if (unlikely(invalid)) {
2493 cr = CRF_SO;
2496 *r = ret;
2498 return cr;
2501 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2503 int i;
2504 int cr = 0;
2505 uint64_t lo_value;
2506 uint64_t hi_value;
2507 ppc_avr_t ret = { .u64 = { 0, 0 } };
2509 if (b->VsrSD(0) < 0) {
2510 lo_value = -b->VsrSD(1);
2511 hi_value = ~b->VsrD(0) + !lo_value;
2512 bcd_put_digit(&ret, 0xD, 0);
2513 } else {
2514 lo_value = b->VsrD(1);
2515 hi_value = b->VsrD(0);
2516 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2519 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2520 lo_value > 9999999999999999ULL) {
2521 cr = CRF_SO;
2524 for (i = 1; i < 16; hi_value /= 10, i++) {
2525 bcd_put_digit(&ret, hi_value % 10, i);
2528 for (; i < 32; lo_value /= 10, i++) {
2529 bcd_put_digit(&ret, lo_value % 10, i);
2532 cr |= bcd_cmp_zero(&ret);
2534 *r = ret;
2536 return cr;
2539 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2541 uint8_t i;
2542 int cr;
2543 uint64_t carry;
2544 uint64_t unused;
2545 uint64_t lo_value;
2546 uint64_t hi_value = 0;
2547 int sgnb = bcd_get_sgn(b);
2548 int invalid = (sgnb == 0);
2550 lo_value = bcd_get_digit(b, 31, &invalid);
2551 for (i = 30; i > 0; i--) {
2552 mulu64(&lo_value, &carry, lo_value, 10ULL);
2553 mulu64(&hi_value, &unused, hi_value, 10ULL);
2554 lo_value += bcd_get_digit(b, i, &invalid);
2555 hi_value += carry;
2557 if (unlikely(invalid)) {
2558 break;
2562 if (sgnb == -1) {
2563 r->VsrSD(1) = -lo_value;
2564 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2565 } else {
2566 r->VsrSD(1) = lo_value;
2567 r->VsrSD(0) = hi_value;
2570 cr = bcd_cmp_zero(b);
2572 if (unlikely(invalid)) {
2573 cr = CRF_SO;
2576 return cr;
2579 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2581 int i;
2582 int invalid = 0;
2584 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2585 return CRF_SO;
2588 *r = *a;
2589 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2591 for (i = 1; i < 32; i++) {
2592 bcd_get_digit(a, i, &invalid);
2593 bcd_get_digit(b, i, &invalid);
2594 if (unlikely(invalid)) {
2595 return CRF_SO;
2599 return bcd_cmp_zero(r);
2602 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2604 int sgnb = bcd_get_sgn(b);
2606 *r = *b;
2607 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2609 if (bcd_is_valid(b) == false) {
2610 return CRF_SO;
2613 return bcd_cmp_zero(r);
2616 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2618 int cr;
2619 int i = a->VsrSB(7);
2620 bool ox_flag = false;
2621 int sgnb = bcd_get_sgn(b);
2622 ppc_avr_t ret = *b;
2623 ret.VsrD(1) &= ~0xf;
2625 if (bcd_is_valid(b) == false) {
2626 return CRF_SO;
2629 if (unlikely(i > 31)) {
2630 i = 31;
2631 } else if (unlikely(i < -31)) {
2632 i = -31;
2635 if (i > 0) {
2636 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2637 } else {
2638 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2640 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2642 *r = ret;
2644 cr = bcd_cmp_zero(r);
2645 if (ox_flag) {
2646 cr |= CRF_SO;
2649 return cr;
2652 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2654 int cr;
2655 int i;
2656 int invalid = 0;
2657 bool ox_flag = false;
2658 ppc_avr_t ret = *b;
2660 for (i = 0; i < 32; i++) {
2661 bcd_get_digit(b, i, &invalid);
2663 if (unlikely(invalid)) {
2664 return CRF_SO;
2668 i = a->VsrSB(7);
2669 if (i >= 32) {
2670 ox_flag = true;
2671 ret.VsrD(1) = ret.VsrD(0) = 0;
2672 } else if (i <= -32) {
2673 ret.VsrD(1) = ret.VsrD(0) = 0;
2674 } else if (i > 0) {
2675 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2676 } else {
2677 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2679 *r = ret;
2681 cr = bcd_cmp_zero(r);
2682 if (ox_flag) {
2683 cr |= CRF_SO;
2686 return cr;
2689 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2691 int cr;
2692 int unused = 0;
2693 int invalid = 0;
2694 bool ox_flag = false;
2695 int sgnb = bcd_get_sgn(b);
2696 ppc_avr_t ret = *b;
2697 ret.VsrD(1) &= ~0xf;
2699 int i = a->VsrSB(7);
2700 ppc_avr_t bcd_one;
2702 bcd_one.VsrD(0) = 0;
2703 bcd_one.VsrD(1) = 0x10;
2705 if (bcd_is_valid(b) == false) {
2706 return CRF_SO;
2709 if (unlikely(i > 31)) {
2710 i = 31;
2711 } else if (unlikely(i < -31)) {
2712 i = -31;
2715 if (i > 0) {
2716 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2717 } else {
2718 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2720 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2721 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2724 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2726 cr = bcd_cmp_zero(&ret);
2727 if (ox_flag) {
2728 cr |= CRF_SO;
2730 *r = ret;
2732 return cr;
2735 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2737 uint64_t mask;
2738 uint32_t ox_flag = 0;
2739 int i = a->VsrSH(3) + 1;
2740 ppc_avr_t ret = *b;
2742 if (bcd_is_valid(b) == false) {
2743 return CRF_SO;
2746 if (i > 16 && i < 32) {
2747 mask = (uint64_t)-1 >> (128 - i * 4);
2748 if (ret.VsrD(0) & ~mask) {
2749 ox_flag = CRF_SO;
2752 ret.VsrD(0) &= mask;
2753 } else if (i >= 0 && i <= 16) {
2754 mask = (uint64_t)-1 >> (64 - i * 4);
2755 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2756 ox_flag = CRF_SO;
2759 ret.VsrD(1) &= mask;
2760 ret.VsrD(0) = 0;
2762 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2763 *r = ret;
2765 return bcd_cmp_zero(&ret) | ox_flag;
2768 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2770 int i;
2771 uint64_t mask;
2772 uint32_t ox_flag = 0;
2773 int invalid = 0;
2774 ppc_avr_t ret = *b;
2776 for (i = 0; i < 32; i++) {
2777 bcd_get_digit(b, i, &invalid);
2779 if (unlikely(invalid)) {
2780 return CRF_SO;
2784 i = a->VsrSH(3);
2785 if (i > 16 && i < 33) {
2786 mask = (uint64_t)-1 >> (128 - i * 4);
2787 if (ret.VsrD(0) & ~mask) {
2788 ox_flag = CRF_SO;
2791 ret.VsrD(0) &= mask;
2792 } else if (i > 0 && i <= 16) {
2793 mask = (uint64_t)-1 >> (64 - i * 4);
2794 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2795 ox_flag = CRF_SO;
2798 ret.VsrD(1) &= mask;
2799 ret.VsrD(0) = 0;
2800 } else if (i == 0) {
2801 if (ret.VsrD(0) || ret.VsrD(1)) {
2802 ox_flag = CRF_SO;
2804 ret.VsrD(0) = ret.VsrD(1) = 0;
2807 *r = ret;
2808 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2809 return ox_flag | CRF_EQ;
2812 return ox_flag | CRF_GT;
2815 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2817 int i;
2818 VECTOR_FOR_INORDER_I(i, u8) {
2819 r->u8[i] = AES_sbox[a->u8[i]];
2823 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2825 ppc_avr_t result;
2826 int i;
2828 VECTOR_FOR_INORDER_I(i, u32) {
2829 result.VsrW(i) = b->VsrW(i) ^
2830 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2831 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2832 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2833 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2835 *r = result;
2838 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2840 ppc_avr_t result;
2841 int i;
2843 VECTOR_FOR_INORDER_I(i, u8) {
2844 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2846 *r = result;
2849 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2851 /* This differs from what is written in ISA V2.07. The RTL is */
2852 /* incorrect and will be fixed in V2.07B. */
2853 int i;
2854 ppc_avr_t tmp;
2856 VECTOR_FOR_INORDER_I(i, u8) {
2857 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2860 VECTOR_FOR_INORDER_I(i, u32) {
2861 r->VsrW(i) =
2862 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2863 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2864 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2865 AES_imc[tmp.VsrB(4 * i + 3)][3];
2869 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2871 ppc_avr_t result;
2872 int i;
2874 VECTOR_FOR_INORDER_I(i, u8) {
2875 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2877 *r = result;
2880 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2882 int st = (st_six & 0x10) != 0;
2883 int six = st_six & 0xF;
2884 int i;
2886 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2887 if (st == 0) {
2888 if ((six & (0x8 >> i)) == 0) {
2889 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2890 ror32(a->VsrW(i), 18) ^
2891 (a->VsrW(i) >> 3);
2892 } else { /* six.bit[i] == 1 */
2893 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2894 ror32(a->VsrW(i), 19) ^
2895 (a->VsrW(i) >> 10);
2897 } else { /* st == 1 */
2898 if ((six & (0x8 >> i)) == 0) {
2899 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2900 ror32(a->VsrW(i), 13) ^
2901 ror32(a->VsrW(i), 22);
2902 } else { /* six.bit[i] == 1 */
2903 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2904 ror32(a->VsrW(i), 11) ^
2905 ror32(a->VsrW(i), 25);
2911 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2913 int st = (st_six & 0x10) != 0;
2914 int six = st_six & 0xF;
2915 int i;
2917 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2918 if (st == 0) {
2919 if ((six & (0x8 >> (2 * i))) == 0) {
2920 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2921 ror64(a->VsrD(i), 8) ^
2922 (a->VsrD(i) >> 7);
2923 } else { /* six.bit[2*i] == 1 */
2924 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2925 ror64(a->VsrD(i), 61) ^
2926 (a->VsrD(i) >> 6);
2928 } else { /* st == 1 */
2929 if ((six & (0x8 >> (2 * i))) == 0) {
2930 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2931 ror64(a->VsrD(i), 34) ^
2932 ror64(a->VsrD(i), 39);
2933 } else { /* six.bit[2*i] == 1 */
2934 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2935 ror64(a->VsrD(i), 18) ^
2936 ror64(a->VsrD(i), 41);
2942 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2944 ppc_avr_t result;
2945 int i;
2947 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2948 int indexA = c->VsrB(i) >> 4;
2949 int indexB = c->VsrB(i) & 0xF;
2951 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2953 *r = result;
2956 #undef VECTOR_FOR_INORDER_I
2958 /*****************************************************************************/
2959 /* SPE extension helpers */
2960 /* Use a table to make this quicker */
2961 static const uint8_t hbrev[16] = {
2962 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2963 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2966 static inline uint8_t byte_reverse(uint8_t val)
2968 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2971 static inline uint32_t word_reverse(uint32_t val)
2973 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2974 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2977 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2978 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2980 uint32_t a, b, d, mask;
2982 mask = UINT32_MAX >> (32 - MASKBITS);
2983 a = arg1 & mask;
2984 b = arg2 & mask;
2985 d = word_reverse(1 + word_reverse(a | ~b));
2986 return (arg1 & ~mask) | (d & b);
2989 uint32_t helper_cntlsw32(uint32_t val)
2991 if (val & 0x80000000) {
2992 return clz32(~val);
2993 } else {
2994 return clz32(val);
2998 uint32_t helper_cntlzw32(uint32_t val)
3000 return clz32(val);
3003 /* 440 specific */
3004 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3005 target_ulong low, uint32_t update_Rc)
3007 target_ulong mask;
3008 int i;
3010 i = 1;
3011 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3012 if ((high & mask) == 0) {
3013 if (update_Rc) {
3014 env->crf[0] = 0x4;
3016 goto done;
3018 i++;
3020 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3021 if ((low & mask) == 0) {
3022 if (update_Rc) {
3023 env->crf[0] = 0x8;
3025 goto done;
3027 i++;
3029 i = 8;
3030 if (update_Rc) {
3031 env->crf[0] = 0x2;
3033 done:
3034 env->xer = (env->xer & ~0x7F) | i;
3035 if (update_Rc) {
3036 env->crf[0] |= xer_so;
3038 return i;