target/ppc: Optimize emulation of lvsl and lvsr instructions
[qemu/ar7.git] / target / ppc / int_helper.c
blob5dcca5362b37a501c883a38a7d7ebb84c7fafebe
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "exec/helper-proto.h"
26 #include "crypto/aes.h"
27 #include "fpu/softfloat.h"
28 #include "qapi/error.h"
29 #include "qemu/guest-random.h"
31 #include "helper_regs.h"
32 /*****************************************************************************/
33 /* Fixed point operations helpers */
35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 if (unlikely(ov)) {
38 env->so = env->ov = 1;
39 } else {
40 env->ov = 0;
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
47 uint64_t rt = 0;
48 int overflow = 0;
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
64 if (oe) {
65 helper_update_ov_legacy(env, overflow);
68 return (target_ulong)rt;
71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
72 uint32_t oe)
74 int64_t rt = 0;
75 int overflow = 0;
77 int64_t dividend = (int64_t)ra << 32;
78 int64_t divisor = (int64_t)((int32_t)rb);
80 if (unlikely((divisor == 0) ||
81 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
82 overflow = 1;
83 } else {
84 rt = dividend / divisor;
85 overflow = rt != (int32_t)rt;
88 if (unlikely(overflow)) {
89 rt = 0; /* Undefined */
92 if (oe) {
93 helper_update_ov_legacy(env, overflow);
96 return (target_ulong)rt;
99 #if defined(TARGET_PPC64)
101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 uint64_t rt = 0;
104 int overflow = 0;
106 overflow = divu128(&rt, &ra, rb);
108 if (unlikely(overflow)) {
109 rt = 0; /* Undefined */
112 if (oe) {
113 helper_update_ov_legacy(env, overflow);
116 return rt;
119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
121 int64_t rt = 0;
122 int64_t ra = (int64_t)rau;
123 int64_t rb = (int64_t)rbu;
124 int overflow = divs128(&rt, &ra, rb);
126 if (unlikely(overflow)) {
127 rt = 0; /* Undefined */
130 if (oe) {
131 helper_update_ov_legacy(env, overflow);
134 return rt;
137 #endif
140 #if defined(TARGET_PPC64)
141 /* if x = 0xab, returns 0xababababababababa */
142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
145 * subtract 1 from each byte, and with inverse, check if MSB is set at each
146 * byte.
147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
152 /* When you XOR the pattern and there is a match, that byte will be zero */
153 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
157 return hasvalue(rb, ra) ? CRF_GT : 0;
160 #undef pattern
161 #undef haszero
162 #undef hasvalue
165 * Return a random number.
167 uint64_t helper_darn32(void)
169 Error *err = NULL;
170 uint32_t ret;
172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
174 error_get_pretty(err));
175 error_free(err);
176 return -1;
179 return ret;
182 uint64_t helper_darn64(void)
184 Error *err = NULL;
185 uint64_t ret;
187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
189 error_get_pretty(err));
190 error_free(err);
191 return -1;
194 return ret;
197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
199 int i;
200 uint64_t ra = 0;
202 for (i = 0; i < 8; i++) {
203 int index = (rs >> (i * 8)) & 0xFF;
204 if (index < 64) {
205 if (rb & PPC_BIT(index)) {
206 ra |= 1 << i;
210 return ra;
213 #endif
215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
217 target_ulong mask = 0xff;
218 target_ulong ra = 0;
219 int i;
221 for (i = 0; i < sizeof(target_ulong); i++) {
222 if ((rs & mask) == (rb & mask)) {
223 ra |= mask;
225 mask <<= 8;
227 return ra;
230 /* shift right arithmetic helper */
231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
234 int32_t ret;
236 if (likely(!(shift & 0x20))) {
237 if (likely((uint32_t)shift != 0)) {
238 shift &= 0x1f;
239 ret = (int32_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca32 = env->ca = 0;
242 } else {
243 env->ca32 = env->ca = 1;
245 } else {
246 ret = (int32_t)value;
247 env->ca32 = env->ca = 0;
249 } else {
250 ret = (int32_t)value >> 31;
251 env->ca32 = env->ca = (ret != 0);
253 return (target_long)ret;
256 #if defined(TARGET_PPC64)
257 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
258 target_ulong shift)
260 int64_t ret;
262 if (likely(!(shift & 0x40))) {
263 if (likely((uint64_t)shift != 0)) {
264 shift &= 0x3f;
265 ret = (int64_t)value >> shift;
266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
267 env->ca32 = env->ca = 0;
268 } else {
269 env->ca32 = env->ca = 1;
271 } else {
272 ret = (int64_t)value;
273 env->ca32 = env->ca = 0;
275 } else {
276 ret = (int64_t)value >> 63;
277 env->ca32 = env->ca = (ret != 0);
279 return ret;
281 #endif
283 #if defined(TARGET_PPC64)
284 target_ulong helper_popcntb(target_ulong val)
286 /* Note that we don't fold past bytes */
287 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
288 0x5555555555555555ULL);
289 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
290 0x3333333333333333ULL);
291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
292 0x0f0f0f0f0f0f0f0fULL);
293 return val;
296 target_ulong helper_popcntw(target_ulong val)
298 /* Note that we don't fold past words. */
299 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
300 0x5555555555555555ULL);
301 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
302 0x3333333333333333ULL);
303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
304 0x0f0f0f0f0f0f0f0fULL);
305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
306 0x00ff00ff00ff00ffULL);
307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
308 0x0000ffff0000ffffULL);
309 return val;
311 #else
312 target_ulong helper_popcntb(target_ulong val)
314 /* Note that we don't fold past bytes */
315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
318 return val;
320 #endif
322 /*****************************************************************************/
323 /* PowerPC 601 specific instructions (POWER bridge) */
324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
329 (int32_t)arg2 == 0) {
330 env->spr[SPR_MQ] = 0;
331 return INT32_MIN;
332 } else {
333 env->spr[SPR_MQ] = tmp % arg2;
334 return tmp / (int32_t)arg2;
338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
339 target_ulong arg2)
341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
344 (int32_t)arg2 == 0) {
345 env->so = env->ov = 1;
346 env->spr[SPR_MQ] = 0;
347 return INT32_MIN;
348 } else {
349 env->spr[SPR_MQ] = tmp % arg2;
350 tmp /= (int32_t)arg2;
351 if ((int32_t)tmp != tmp) {
352 env->so = env->ov = 1;
353 } else {
354 env->ov = 0;
356 return tmp;
360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
361 target_ulong arg2)
363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
369 return (int32_t)arg1 / (int32_t)arg2;
373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
374 target_ulong arg2)
376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
377 (int32_t)arg2 == 0) {
378 env->so = env->ov = 1;
379 env->spr[SPR_MQ] = 0;
380 return INT32_MIN;
381 } else {
382 env->ov = 0;
383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
384 return (int32_t)arg1 / (int32_t)arg2;
388 /*****************************************************************************/
389 /* 602 specific instructions */
390 /* mfrom is the most crazy instruction ever seen, imho ! */
391 /* Real implementation uses a ROM table. Do the same */
393 * Extremely decomposed:
394 * -arg / 256
395 * return 256 * log10(10 + 1.0) + 0.5
397 #if !defined(CONFIG_USER_ONLY)
398 target_ulong helper_602_mfrom(target_ulong arg)
400 if (likely(arg < 602)) {
401 #include "mfrom_table.inc.c"
402 return mfrom_ROM_table[arg];
403 } else {
404 return 0;
407 #endif
409 /*****************************************************************************/
410 /* Altivec extension helpers */
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414 #else
415 #define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
417 #endif
419 /* Saturating arithmetic helpers. */
420 #define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
423 to_type r; \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
434 return r; \
436 #define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
439 to_type r; \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
447 return r; \
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459 #undef SATCVT
460 #undef SATCVTU
462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
464 env->vscr = vscr & ~(1u << VSCR_SAT);
465 /* Which bit we set is completely arbitrary, but clear the rest. */
466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
467 env->vscr_sat.u64[1] = 0;
468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
471 uint32_t helper_mfvscr(CPUPPCState *env)
473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
474 return env->vscr | (sat << VSCR_SAT);
477 static inline void set_vscr_sat(CPUPPCState *env)
479 /* The choice of non-zero value is arbitrary. */
480 env->vscr_sat.u32[0] = 1;
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
485 int i;
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
492 /* vprtybw */
493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
495 int i;
496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
498 res ^= res >> 8;
499 r->u32[i] = res & 1;
503 /* vprtybd */
504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
509 res ^= res >> 16;
510 res ^= res >> 8;
511 r->u64[i] = res & 1;
515 /* vprtybq */
516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
518 uint64_t res = b->u64[0] ^ b->u64[1];
519 res ^= res >> 32;
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->VsrD(1) = res & 1;
523 r->VsrD(0) = 0;
526 #define VARITH_DO(name, op, element) \
527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
529 int i; \
531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
532 r->element[i] = a->element[i] op b->element[i]; \
535 VARITH_DO(muluwm, *, u32)
536 #undef VARITH_DO
537 #undef VARITH
539 #define VARITHFP(suffix, func) \
540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
541 ppc_avr_t *b) \
543 int i; \
545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
549 VARITHFP(addfp, float32_add)
550 VARITHFP(subfp, float32_sub)
551 VARITHFP(minfp, float32_min)
552 VARITHFP(maxfp, float32_max)
553 #undef VARITHFP
555 #define VARITHFPFMA(suffix, type) \
556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
557 ppc_avr_t *b, ppc_avr_t *c) \
559 int i; \
560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
562 type, &env->vec_status); \
565 VARITHFPFMA(maddfp, 0);
566 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
567 #undef VARITHFPFMA
569 #define VARITHSAT_CASE(type, op, cvt, element) \
571 type result = (type)a->element[i] op (type)b->element[i]; \
572 r->element[i] = cvt(result, &sat); \
575 #define VARITHSAT_DO(name, op, optype, cvt, element) \
576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
579 int sat = 0; \
580 int i; \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 VARITHSAT_CASE(optype, op, cvt, element); \
585 if (sat) { \
586 vscr_sat->u32[0] = 1; \
589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
601 #undef VARITHSAT_CASE
602 #undef VARITHSAT_DO
603 #undef VARITHSAT_SIGNED
604 #undef VARITHSAT_UNSIGNED
606 #define VAVG_DO(name, element, etype) \
607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
609 int i; \
611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
613 r->element[i] = x >> 1; \
617 #define VAVG(type, signed_element, signed_type, unsigned_element, \
618 unsigned_type) \
619 VAVG_DO(avgs##type, signed_element, signed_type) \
620 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
621 VAVG(b, s8, int16_t, u8, uint16_t)
622 VAVG(h, s16, int32_t, u16, uint32_t)
623 VAVG(w, s32, int64_t, u32, uint64_t)
624 #undef VAVG_DO
625 #undef VAVG
627 #define VABSDU_DO(name, element) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
630 int i; \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 r->element[i] = (a->element[i] > b->element[i]) ? \
634 (a->element[i] - b->element[i]) : \
635 (b->element[i] - a->element[i]); \
640 * VABSDU - Vector absolute difference unsigned
641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
642 * element - element type to access from vector
644 #define VABSDU(type, element) \
645 VABSDU_DO(absdu##type, element)
646 VABSDU(b, u8)
647 VABSDU(h, u16)
648 VABSDU(w, u32)
649 #undef VABSDU_DO
650 #undef VABSDU
652 #define VCF(suffix, cvt, element) \
653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
654 ppc_avr_t *b, uint32_t uim) \
656 int i; \
658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
659 float32 t = cvt(b->element[i], &env->vec_status); \
660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
663 VCF(ux, uint32_to_float32, u32)
664 VCF(sx, int32_to_float32, s32)
665 #undef VCF
667 #define VCMP_DO(suffix, compare, element, record) \
668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
669 ppc_avr_t *a, ppc_avr_t *b) \
671 uint64_t ones = (uint64_t)-1; \
672 uint64_t all = ones; \
673 uint64_t none = 0; \
674 int i; \
676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
677 uint64_t result = (a->element[i] compare b->element[i] ? \
678 ones : 0x0); \
679 switch (sizeof(a->element[0])) { \
680 case 8: \
681 r->u64[i] = result; \
682 break; \
683 case 4: \
684 r->u32[i] = result; \
685 break; \
686 case 2: \
687 r->u16[i] = result; \
688 break; \
689 case 1: \
690 r->u8[i] = result; \
691 break; \
693 all &= result; \
694 none |= result; \
696 if (record) { \
697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
700 #define VCMP(suffix, compare, element) \
701 VCMP_DO(suffix, compare, element, 0) \
702 VCMP_DO(suffix##_dot, compare, element, 1)
703 VCMP(equb, ==, u8)
704 VCMP(equh, ==, u16)
705 VCMP(equw, ==, u32)
706 VCMP(equd, ==, u64)
707 VCMP(gtub, >, u8)
708 VCMP(gtuh, >, u16)
709 VCMP(gtuw, >, u32)
710 VCMP(gtud, >, u64)
711 VCMP(gtsb, >, s8)
712 VCMP(gtsh, >, s16)
713 VCMP(gtsw, >, s32)
714 VCMP(gtsd, >, s64)
715 #undef VCMP_DO
716 #undef VCMP
718 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
719 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
720 ppc_avr_t *a, ppc_avr_t *b) \
722 etype ones = (etype)-1; \
723 etype all = ones; \
724 etype result, none = 0; \
725 int i; \
727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
728 if (cmpzero) { \
729 result = ((a->element[i] == 0) \
730 || (b->element[i] == 0) \
731 || (a->element[i] != b->element[i]) ? \
732 ones : 0x0); \
733 } else { \
734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
736 r->element[i] = result; \
737 all &= result; \
738 none |= result; \
740 if (record) { \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
746 * VCMPNEZ - Vector compare not equal to zero
747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
748 * element - element type to access from vector
750 #define VCMPNE(suffix, element, etype, cmpzero) \
751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
753 VCMPNE(zb, u8, uint8_t, 1)
754 VCMPNE(zh, u16, uint16_t, 1)
755 VCMPNE(zw, u32, uint32_t, 1)
756 VCMPNE(b, u8, uint8_t, 0)
757 VCMPNE(h, u16, uint16_t, 0)
758 VCMPNE(w, u32, uint32_t, 0)
759 #undef VCMPNE_DO
760 #undef VCMPNE
762 #define VCMPFP_DO(suffix, compare, order, record) \
763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
764 ppc_avr_t *a, ppc_avr_t *b) \
766 uint32_t ones = (uint32_t)-1; \
767 uint32_t all = ones; \
768 uint32_t none = 0; \
769 int i; \
771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
772 uint32_t result; \
773 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
774 &env->vec_status); \
775 if (rel == float_relation_unordered) { \
776 result = 0; \
777 } else if (rel compare order) { \
778 result = ones; \
779 } else { \
780 result = 0; \
782 r->u32[i] = result; \
783 all &= result; \
784 none |= result; \
786 if (record) { \
787 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
790 #define VCMPFP(suffix, compare, order) \
791 VCMPFP_DO(suffix, compare, order, 0) \
792 VCMPFP_DO(suffix##_dot, compare, order, 1)
793 VCMPFP(eqfp, ==, float_relation_equal)
794 VCMPFP(gefp, !=, float_relation_less)
795 VCMPFP(gtfp, ==, float_relation_greater)
796 #undef VCMPFP_DO
797 #undef VCMPFP
799 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
800 ppc_avr_t *a, ppc_avr_t *b, int record)
802 int i;
803 int all_in = 0;
805 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
806 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
807 &env->vec_status);
808 if (le_rel == float_relation_unordered) {
809 r->u32[i] = 0xc0000000;
810 all_in = 1;
811 } else {
812 float32 bneg = float32_chs(b->f32[i]);
813 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
814 &env->vec_status);
815 int le = le_rel != float_relation_greater;
816 int ge = ge_rel != float_relation_less;
818 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
819 all_in |= (!le | !ge);
822 if (record) {
823 env->crf[6] = (all_in == 0) << 1;
827 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
829 vcmpbfp_internal(env, r, a, b, 0);
832 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
833 ppc_avr_t *b)
835 vcmpbfp_internal(env, r, a, b, 1);
838 #define VCT(suffix, satcvt, element) \
839 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
840 ppc_avr_t *b, uint32_t uim) \
842 int i; \
843 int sat = 0; \
844 float_status s = env->vec_status; \
846 set_float_rounding_mode(float_round_to_zero, &s); \
847 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
848 if (float32_is_any_nan(b->f32[i])) { \
849 r->element[i] = 0; \
850 } else { \
851 float64 t = float32_to_float64(b->f32[i], &s); \
852 int64_t j; \
854 t = float64_scalbn(t, uim, &s); \
855 j = float64_to_int64(t, &s); \
856 r->element[i] = satcvt(j, &sat); \
859 if (sat) { \
860 set_vscr_sat(env); \
863 VCT(uxs, cvtsduw, u32)
864 VCT(sxs, cvtsdsw, s32)
865 #undef VCT
867 target_ulong helper_vclzlsbb(ppc_avr_t *r)
869 target_ulong count = 0;
870 int i;
871 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
872 if (r->VsrB(i) & 0x01) {
873 break;
875 count++;
877 return count;
880 target_ulong helper_vctzlsbb(ppc_avr_t *r)
882 target_ulong count = 0;
883 int i;
884 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
885 if (r->VsrB(i) & 0x01) {
886 break;
888 count++;
890 return count;
893 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
896 int sat = 0;
897 int i;
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 int32_t prod = a->s16[i] * b->s16[i];
901 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
903 r->s16[i] = cvtswsh(t, &sat);
906 if (sat) {
907 set_vscr_sat(env);
911 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
912 ppc_avr_t *b, ppc_avr_t *c)
914 int sat = 0;
915 int i;
917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
918 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
919 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
920 r->s16[i] = cvtswsh(t, &sat);
923 if (sat) {
924 set_vscr_sat(env);
928 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
930 int i;
932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
933 int32_t prod = a->s16[i] * b->s16[i];
934 r->s16[i] = (int16_t) (prod + c->s16[i]);
938 #define VMRG_DO(name, element, access, ofs) \
939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
941 ppc_avr_t result; \
942 int i, half = ARRAY_SIZE(r->element) / 2; \
944 for (i = 0; i < half; i++) { \
945 result.access(i * 2 + 0) = a->access(i + ofs); \
946 result.access(i * 2 + 1) = b->access(i + ofs); \
948 *r = result; \
951 #define VMRG(suffix, element, access) \
952 VMRG_DO(mrgl##suffix, element, access, half) \
953 VMRG_DO(mrgh##suffix, element, access, 0)
954 VMRG(b, u8, VsrB)
955 VMRG(h, u16, VsrH)
956 VMRG(w, u32, VsrW)
957 #undef VMRG_DO
958 #undef VMRG
960 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
961 ppc_avr_t *b, ppc_avr_t *c)
963 int32_t prod[16];
964 int i;
966 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
967 prod[i] = (int32_t)a->s8[i] * b->u8[i];
970 VECTOR_FOR_INORDER_I(i, s32) {
971 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
972 prod[4 * i + 2] + prod[4 * i + 3];
976 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
977 ppc_avr_t *b, ppc_avr_t *c)
979 int32_t prod[8];
980 int i;
982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
983 prod[i] = a->s16[i] * b->s16[i];
986 VECTOR_FOR_INORDER_I(i, s32) {
987 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
991 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
992 ppc_avr_t *b, ppc_avr_t *c)
994 int32_t prod[8];
995 int i;
996 int sat = 0;
998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
999 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1002 VECTOR_FOR_INORDER_I(i, s32) {
1003 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1005 r->u32[i] = cvtsdsw(t, &sat);
1008 if (sat) {
1009 set_vscr_sat(env);
1013 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1014 ppc_avr_t *b, ppc_avr_t *c)
1016 uint16_t prod[16];
1017 int i;
1019 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1020 prod[i] = a->u8[i] * b->u8[i];
1023 VECTOR_FOR_INORDER_I(i, u32) {
1024 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1025 prod[4 * i + 2] + prod[4 * i + 3];
1029 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030 ppc_avr_t *b, ppc_avr_t *c)
1032 uint32_t prod[8];
1033 int i;
1035 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1036 prod[i] = a->u16[i] * b->u16[i];
1039 VECTOR_FOR_INORDER_I(i, u32) {
1040 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1044 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1045 ppc_avr_t *b, ppc_avr_t *c)
1047 uint32_t prod[8];
1048 int i;
1049 int sat = 0;
1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052 prod[i] = a->u16[i] * b->u16[i];
1055 VECTOR_FOR_INORDER_I(i, s32) {
1056 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1058 r->u32[i] = cvtuduw(t, &sat);
1061 if (sat) {
1062 set_vscr_sat(env);
1066 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1067 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1069 int i; \
1071 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1072 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1073 (cast)b->mul_access(i); \
1077 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1080 int i; \
1082 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1083 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1084 (cast)b->mul_access(i + 1); \
1088 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1089 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1090 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1091 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1092 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1093 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1094 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1095 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1096 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1097 #undef VMUL_DO_EVN
1098 #undef VMUL_DO_ODD
1099 #undef VMUL
1101 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1102 ppc_avr_t *c)
1104 ppc_avr_t result;
1105 int i;
1107 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1108 int s = c->VsrB(i) & 0x1f;
1109 int index = s & 0xf;
1111 if (s & 0x10) {
1112 result.VsrB(i) = b->VsrB(index);
1113 } else {
1114 result.VsrB(i) = a->VsrB(index);
1117 *r = result;
1120 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1121 ppc_avr_t *c)
1123 ppc_avr_t result;
1124 int i;
1126 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1127 int s = c->VsrB(i) & 0x1f;
1128 int index = 15 - (s & 0xf);
1130 if (s & 0x10) {
1131 result.VsrB(i) = a->VsrB(index);
1132 } else {
1133 result.VsrB(i) = b->VsrB(index);
1136 *r = result;
1139 #if defined(HOST_WORDS_BIGENDIAN)
1140 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1141 #define VBPERMD_INDEX(i) (i)
1142 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1143 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1144 #else
1145 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1146 #define VBPERMD_INDEX(i) (1 - i)
1147 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1148 #define EXTRACT_BIT(avr, i, index) \
1149 (extract64((avr)->u64[1 - i], 63 - index, 1))
1150 #endif
1152 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1154 int i, j;
1155 ppc_avr_t result = { .u64 = { 0, 0 } };
1156 VECTOR_FOR_INORDER_I(i, u64) {
1157 for (j = 0; j < 8; j++) {
1158 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1159 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1160 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1164 *r = result;
1167 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1169 int i;
1170 uint64_t perm = 0;
1172 VECTOR_FOR_INORDER_I(i, u8) {
1173 int index = VBPERMQ_INDEX(b, i);
1175 if (index < 128) {
1176 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1177 if (a->u64[VBPERMQ_DW(index)] & mask) {
1178 perm |= (0x8000 >> i);
1183 r->VsrD(0) = perm;
1184 r->VsrD(1) = 0;
1187 #undef VBPERMQ_INDEX
1188 #undef VBPERMQ_DW
1190 static const uint64_t VGBBD_MASKS[256] = {
1191 0x0000000000000000ull, /* 00 */
1192 0x0000000000000080ull, /* 01 */
1193 0x0000000000008000ull, /* 02 */
1194 0x0000000000008080ull, /* 03 */
1195 0x0000000000800000ull, /* 04 */
1196 0x0000000000800080ull, /* 05 */
1197 0x0000000000808000ull, /* 06 */
1198 0x0000000000808080ull, /* 07 */
1199 0x0000000080000000ull, /* 08 */
1200 0x0000000080000080ull, /* 09 */
1201 0x0000000080008000ull, /* 0A */
1202 0x0000000080008080ull, /* 0B */
1203 0x0000000080800000ull, /* 0C */
1204 0x0000000080800080ull, /* 0D */
1205 0x0000000080808000ull, /* 0E */
1206 0x0000000080808080ull, /* 0F */
1207 0x0000008000000000ull, /* 10 */
1208 0x0000008000000080ull, /* 11 */
1209 0x0000008000008000ull, /* 12 */
1210 0x0000008000008080ull, /* 13 */
1211 0x0000008000800000ull, /* 14 */
1212 0x0000008000800080ull, /* 15 */
1213 0x0000008000808000ull, /* 16 */
1214 0x0000008000808080ull, /* 17 */
1215 0x0000008080000000ull, /* 18 */
1216 0x0000008080000080ull, /* 19 */
1217 0x0000008080008000ull, /* 1A */
1218 0x0000008080008080ull, /* 1B */
1219 0x0000008080800000ull, /* 1C */
1220 0x0000008080800080ull, /* 1D */
1221 0x0000008080808000ull, /* 1E */
1222 0x0000008080808080ull, /* 1F */
1223 0x0000800000000000ull, /* 20 */
1224 0x0000800000000080ull, /* 21 */
1225 0x0000800000008000ull, /* 22 */
1226 0x0000800000008080ull, /* 23 */
1227 0x0000800000800000ull, /* 24 */
1228 0x0000800000800080ull, /* 25 */
1229 0x0000800000808000ull, /* 26 */
1230 0x0000800000808080ull, /* 27 */
1231 0x0000800080000000ull, /* 28 */
1232 0x0000800080000080ull, /* 29 */
1233 0x0000800080008000ull, /* 2A */
1234 0x0000800080008080ull, /* 2B */
1235 0x0000800080800000ull, /* 2C */
1236 0x0000800080800080ull, /* 2D */
1237 0x0000800080808000ull, /* 2E */
1238 0x0000800080808080ull, /* 2F */
1239 0x0000808000000000ull, /* 30 */
1240 0x0000808000000080ull, /* 31 */
1241 0x0000808000008000ull, /* 32 */
1242 0x0000808000008080ull, /* 33 */
1243 0x0000808000800000ull, /* 34 */
1244 0x0000808000800080ull, /* 35 */
1245 0x0000808000808000ull, /* 36 */
1246 0x0000808000808080ull, /* 37 */
1247 0x0000808080000000ull, /* 38 */
1248 0x0000808080000080ull, /* 39 */
1249 0x0000808080008000ull, /* 3A */
1250 0x0000808080008080ull, /* 3B */
1251 0x0000808080800000ull, /* 3C */
1252 0x0000808080800080ull, /* 3D */
1253 0x0000808080808000ull, /* 3E */
1254 0x0000808080808080ull, /* 3F */
1255 0x0080000000000000ull, /* 40 */
1256 0x0080000000000080ull, /* 41 */
1257 0x0080000000008000ull, /* 42 */
1258 0x0080000000008080ull, /* 43 */
1259 0x0080000000800000ull, /* 44 */
1260 0x0080000000800080ull, /* 45 */
1261 0x0080000000808000ull, /* 46 */
1262 0x0080000000808080ull, /* 47 */
1263 0x0080000080000000ull, /* 48 */
1264 0x0080000080000080ull, /* 49 */
1265 0x0080000080008000ull, /* 4A */
1266 0x0080000080008080ull, /* 4B */
1267 0x0080000080800000ull, /* 4C */
1268 0x0080000080800080ull, /* 4D */
1269 0x0080000080808000ull, /* 4E */
1270 0x0080000080808080ull, /* 4F */
1271 0x0080008000000000ull, /* 50 */
1272 0x0080008000000080ull, /* 51 */
1273 0x0080008000008000ull, /* 52 */
1274 0x0080008000008080ull, /* 53 */
1275 0x0080008000800000ull, /* 54 */
1276 0x0080008000800080ull, /* 55 */
1277 0x0080008000808000ull, /* 56 */
1278 0x0080008000808080ull, /* 57 */
1279 0x0080008080000000ull, /* 58 */
1280 0x0080008080000080ull, /* 59 */
1281 0x0080008080008000ull, /* 5A */
1282 0x0080008080008080ull, /* 5B */
1283 0x0080008080800000ull, /* 5C */
1284 0x0080008080800080ull, /* 5D */
1285 0x0080008080808000ull, /* 5E */
1286 0x0080008080808080ull, /* 5F */
1287 0x0080800000000000ull, /* 60 */
1288 0x0080800000000080ull, /* 61 */
1289 0x0080800000008000ull, /* 62 */
1290 0x0080800000008080ull, /* 63 */
1291 0x0080800000800000ull, /* 64 */
1292 0x0080800000800080ull, /* 65 */
1293 0x0080800000808000ull, /* 66 */
1294 0x0080800000808080ull, /* 67 */
1295 0x0080800080000000ull, /* 68 */
1296 0x0080800080000080ull, /* 69 */
1297 0x0080800080008000ull, /* 6A */
1298 0x0080800080008080ull, /* 6B */
1299 0x0080800080800000ull, /* 6C */
1300 0x0080800080800080ull, /* 6D */
1301 0x0080800080808000ull, /* 6E */
1302 0x0080800080808080ull, /* 6F */
1303 0x0080808000000000ull, /* 70 */
1304 0x0080808000000080ull, /* 71 */
1305 0x0080808000008000ull, /* 72 */
1306 0x0080808000008080ull, /* 73 */
1307 0x0080808000800000ull, /* 74 */
1308 0x0080808000800080ull, /* 75 */
1309 0x0080808000808000ull, /* 76 */
1310 0x0080808000808080ull, /* 77 */
1311 0x0080808080000000ull, /* 78 */
1312 0x0080808080000080ull, /* 79 */
1313 0x0080808080008000ull, /* 7A */
1314 0x0080808080008080ull, /* 7B */
1315 0x0080808080800000ull, /* 7C */
1316 0x0080808080800080ull, /* 7D */
1317 0x0080808080808000ull, /* 7E */
1318 0x0080808080808080ull, /* 7F */
1319 0x8000000000000000ull, /* 80 */
1320 0x8000000000000080ull, /* 81 */
1321 0x8000000000008000ull, /* 82 */
1322 0x8000000000008080ull, /* 83 */
1323 0x8000000000800000ull, /* 84 */
1324 0x8000000000800080ull, /* 85 */
1325 0x8000000000808000ull, /* 86 */
1326 0x8000000000808080ull, /* 87 */
1327 0x8000000080000000ull, /* 88 */
1328 0x8000000080000080ull, /* 89 */
1329 0x8000000080008000ull, /* 8A */
1330 0x8000000080008080ull, /* 8B */
1331 0x8000000080800000ull, /* 8C */
1332 0x8000000080800080ull, /* 8D */
1333 0x8000000080808000ull, /* 8E */
1334 0x8000000080808080ull, /* 8F */
1335 0x8000008000000000ull, /* 90 */
1336 0x8000008000000080ull, /* 91 */
1337 0x8000008000008000ull, /* 92 */
1338 0x8000008000008080ull, /* 93 */
1339 0x8000008000800000ull, /* 94 */
1340 0x8000008000800080ull, /* 95 */
1341 0x8000008000808000ull, /* 96 */
1342 0x8000008000808080ull, /* 97 */
1343 0x8000008080000000ull, /* 98 */
1344 0x8000008080000080ull, /* 99 */
1345 0x8000008080008000ull, /* 9A */
1346 0x8000008080008080ull, /* 9B */
1347 0x8000008080800000ull, /* 9C */
1348 0x8000008080800080ull, /* 9D */
1349 0x8000008080808000ull, /* 9E */
1350 0x8000008080808080ull, /* 9F */
1351 0x8000800000000000ull, /* A0 */
1352 0x8000800000000080ull, /* A1 */
1353 0x8000800000008000ull, /* A2 */
1354 0x8000800000008080ull, /* A3 */
1355 0x8000800000800000ull, /* A4 */
1356 0x8000800000800080ull, /* A5 */
1357 0x8000800000808000ull, /* A6 */
1358 0x8000800000808080ull, /* A7 */
1359 0x8000800080000000ull, /* A8 */
1360 0x8000800080000080ull, /* A9 */
1361 0x8000800080008000ull, /* AA */
1362 0x8000800080008080ull, /* AB */
1363 0x8000800080800000ull, /* AC */
1364 0x8000800080800080ull, /* AD */
1365 0x8000800080808000ull, /* AE */
1366 0x8000800080808080ull, /* AF */
1367 0x8000808000000000ull, /* B0 */
1368 0x8000808000000080ull, /* B1 */
1369 0x8000808000008000ull, /* B2 */
1370 0x8000808000008080ull, /* B3 */
1371 0x8000808000800000ull, /* B4 */
1372 0x8000808000800080ull, /* B5 */
1373 0x8000808000808000ull, /* B6 */
1374 0x8000808000808080ull, /* B7 */
1375 0x8000808080000000ull, /* B8 */
1376 0x8000808080000080ull, /* B9 */
1377 0x8000808080008000ull, /* BA */
1378 0x8000808080008080ull, /* BB */
1379 0x8000808080800000ull, /* BC */
1380 0x8000808080800080ull, /* BD */
1381 0x8000808080808000ull, /* BE */
1382 0x8000808080808080ull, /* BF */
1383 0x8080000000000000ull, /* C0 */
1384 0x8080000000000080ull, /* C1 */
1385 0x8080000000008000ull, /* C2 */
1386 0x8080000000008080ull, /* C3 */
1387 0x8080000000800000ull, /* C4 */
1388 0x8080000000800080ull, /* C5 */
1389 0x8080000000808000ull, /* C6 */
1390 0x8080000000808080ull, /* C7 */
1391 0x8080000080000000ull, /* C8 */
1392 0x8080000080000080ull, /* C9 */
1393 0x8080000080008000ull, /* CA */
1394 0x8080000080008080ull, /* CB */
1395 0x8080000080800000ull, /* CC */
1396 0x8080000080800080ull, /* CD */
1397 0x8080000080808000ull, /* CE */
1398 0x8080000080808080ull, /* CF */
1399 0x8080008000000000ull, /* D0 */
1400 0x8080008000000080ull, /* D1 */
1401 0x8080008000008000ull, /* D2 */
1402 0x8080008000008080ull, /* D3 */
1403 0x8080008000800000ull, /* D4 */
1404 0x8080008000800080ull, /* D5 */
1405 0x8080008000808000ull, /* D6 */
1406 0x8080008000808080ull, /* D7 */
1407 0x8080008080000000ull, /* D8 */
1408 0x8080008080000080ull, /* D9 */
1409 0x8080008080008000ull, /* DA */
1410 0x8080008080008080ull, /* DB */
1411 0x8080008080800000ull, /* DC */
1412 0x8080008080800080ull, /* DD */
1413 0x8080008080808000ull, /* DE */
1414 0x8080008080808080ull, /* DF */
1415 0x8080800000000000ull, /* E0 */
1416 0x8080800000000080ull, /* E1 */
1417 0x8080800000008000ull, /* E2 */
1418 0x8080800000008080ull, /* E3 */
1419 0x8080800000800000ull, /* E4 */
1420 0x8080800000800080ull, /* E5 */
1421 0x8080800000808000ull, /* E6 */
1422 0x8080800000808080ull, /* E7 */
1423 0x8080800080000000ull, /* E8 */
1424 0x8080800080000080ull, /* E9 */
1425 0x8080800080008000ull, /* EA */
1426 0x8080800080008080ull, /* EB */
1427 0x8080800080800000ull, /* EC */
1428 0x8080800080800080ull, /* ED */
1429 0x8080800080808000ull, /* EE */
1430 0x8080800080808080ull, /* EF */
1431 0x8080808000000000ull, /* F0 */
1432 0x8080808000000080ull, /* F1 */
1433 0x8080808000008000ull, /* F2 */
1434 0x8080808000008080ull, /* F3 */
1435 0x8080808000800000ull, /* F4 */
1436 0x8080808000800080ull, /* F5 */
1437 0x8080808000808000ull, /* F6 */
1438 0x8080808000808080ull, /* F7 */
1439 0x8080808080000000ull, /* F8 */
1440 0x8080808080000080ull, /* F9 */
1441 0x8080808080008000ull, /* FA */
1442 0x8080808080008080ull, /* FB */
1443 0x8080808080800000ull, /* FC */
1444 0x8080808080800080ull, /* FD */
1445 0x8080808080808000ull, /* FE */
1446 0x8080808080808080ull, /* FF */
1449 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1451 int i;
1452 uint64_t t[2] = { 0, 0 };
1454 VECTOR_FOR_INORDER_I(i, u8) {
1455 #if defined(HOST_WORDS_BIGENDIAN)
1456 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1457 #else
1458 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7));
1459 #endif
1462 r->u64[0] = t[0];
1463 r->u64[1] = t[1];
1466 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1467 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1469 int i, j; \
1470 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1472 VECTOR_FOR_INORDER_I(i, srcfld) { \
1473 prod[i] = 0; \
1474 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1475 if (a->srcfld[i] & (1ull << j)) { \
1476 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1481 VECTOR_FOR_INORDER_I(i, trgfld) { \
1482 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1486 PMSUM(vpmsumb, u8, u16, uint16_t)
1487 PMSUM(vpmsumh, u16, u32, uint32_t)
1488 PMSUM(vpmsumw, u32, u64, uint64_t)
1490 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1493 #ifdef CONFIG_INT128
1494 int i, j;
1495 __uint128_t prod[2];
1497 VECTOR_FOR_INORDER_I(i, u64) {
1498 prod[i] = 0;
1499 for (j = 0; j < 64; j++) {
1500 if (a->u64[i] & (1ull << j)) {
1501 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1506 r->u128 = prod[0] ^ prod[1];
1508 #else
1509 int i, j;
1510 ppc_avr_t prod[2];
1512 VECTOR_FOR_INORDER_I(i, u64) {
1513 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1514 for (j = 0; j < 64; j++) {
1515 if (a->u64[i] & (1ull << j)) {
1516 ppc_avr_t bshift;
1517 if (j == 0) {
1518 bshift.VsrD(0) = 0;
1519 bshift.VsrD(1) = b->u64[i];
1520 } else {
1521 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1522 bshift.VsrD(1) = b->u64[i] << j;
1524 prod[i].VsrD(1) ^= bshift.VsrD(1);
1525 prod[i].VsrD(0) ^= bshift.VsrD(0);
1530 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1531 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1532 #endif
1536 #if defined(HOST_WORDS_BIGENDIAN)
1537 #define PKBIG 1
1538 #else
1539 #define PKBIG 0
1540 #endif
1541 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1543 int i, j;
1544 ppc_avr_t result;
1545 #if defined(HOST_WORDS_BIGENDIAN)
1546 const ppc_avr_t *x[2] = { a, b };
1547 #else
1548 const ppc_avr_t *x[2] = { b, a };
1549 #endif
1551 VECTOR_FOR_INORDER_I(i, u64) {
1552 VECTOR_FOR_INORDER_I(j, u32) {
1553 uint32_t e = x[i]->u32[j];
1555 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1556 ((e >> 6) & 0x3e0) |
1557 ((e >> 3) & 0x1f));
1560 *r = result;
1563 #define VPK(suffix, from, to, cvt, dosat) \
1564 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1565 ppc_avr_t *a, ppc_avr_t *b) \
1567 int i; \
1568 int sat = 0; \
1569 ppc_avr_t result; \
1570 ppc_avr_t *a0 = PKBIG ? a : b; \
1571 ppc_avr_t *a1 = PKBIG ? b : a; \
1573 VECTOR_FOR_INORDER_I(i, from) { \
1574 result.to[i] = cvt(a0->from[i], &sat); \
1575 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1577 *r = result; \
1578 if (dosat && sat) { \
1579 set_vscr_sat(env); \
1582 #define I(x, y) (x)
1583 VPK(shss, s16, s8, cvtshsb, 1)
1584 VPK(shus, s16, u8, cvtshub, 1)
1585 VPK(swss, s32, s16, cvtswsh, 1)
1586 VPK(swus, s32, u16, cvtswuh, 1)
1587 VPK(sdss, s64, s32, cvtsdsw, 1)
1588 VPK(sdus, s64, u32, cvtsduw, 1)
1589 VPK(uhus, u16, u8, cvtuhub, 1)
1590 VPK(uwus, u32, u16, cvtuwuh, 1)
1591 VPK(udus, u64, u32, cvtuduw, 1)
1592 VPK(uhum, u16, u8, I, 0)
1593 VPK(uwum, u32, u16, I, 0)
1594 VPK(udum, u64, u32, I, 0)
1595 #undef I
1596 #undef VPK
1597 #undef PKBIG
1599 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1601 int i;
1603 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1604 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1608 #define VRFI(suffix, rounding) \
1609 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1610 ppc_avr_t *b) \
1612 int i; \
1613 float_status s = env->vec_status; \
1615 set_float_rounding_mode(rounding, &s); \
1616 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1617 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1620 VRFI(n, float_round_nearest_even)
1621 VRFI(m, float_round_down)
1622 VRFI(p, float_round_up)
1623 VRFI(z, float_round_to_zero)
1624 #undef VRFI
1626 #define VROTATE(suffix, element, mask) \
1627 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1629 int i; \
1631 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1632 unsigned int shift = b->element[i] & mask; \
1633 r->element[i] = (a->element[i] << shift) | \
1634 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1637 VROTATE(b, u8, 0x7)
1638 VROTATE(h, u16, 0xF)
1639 VROTATE(w, u32, 0x1F)
1640 VROTATE(d, u64, 0x3F)
1641 #undef VROTATE
1643 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1645 int i;
1647 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1648 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1650 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1654 #define VRLMI(name, size, element, insert) \
1655 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1657 int i; \
1658 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1659 uint##size##_t src1 = a->element[i]; \
1660 uint##size##_t src2 = b->element[i]; \
1661 uint##size##_t src3 = r->element[i]; \
1662 uint##size##_t begin, end, shift, mask, rot_val; \
1664 shift = extract##size(src2, 0, 6); \
1665 end = extract##size(src2, 8, 6); \
1666 begin = extract##size(src2, 16, 6); \
1667 rot_val = rol##size(src1, shift); \
1668 mask = mask_u##size(begin, end); \
1669 if (insert) { \
1670 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1671 } else { \
1672 r->element[i] = (rot_val & mask); \
1677 VRLMI(vrldmi, 64, u64, 1);
1678 VRLMI(vrlwmi, 32, u32, 1);
1679 VRLMI(vrldnm, 64, u64, 0);
1680 VRLMI(vrlwnm, 32, u32, 0);
1682 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1683 ppc_avr_t *c)
1685 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1686 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1689 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1691 int i;
1693 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1694 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1698 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1700 int i;
1702 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1703 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1707 #if defined(HOST_WORDS_BIGENDIAN)
1708 #define VEXTU_X_DO(name, size, left) \
1709 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1711 int index; \
1712 if (left) { \
1713 index = (a & 0xf) * 8; \
1714 } else { \
1715 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1717 return int128_getlo(int128_rshift(b->s128, index)) & \
1718 MAKE_64BIT_MASK(0, size); \
1720 #else
1721 #define VEXTU_X_DO(name, size, left) \
1722 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1724 int index; \
1725 if (left) { \
1726 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1727 } else { \
1728 index = (a & 0xf) * 8; \
1730 return int128_getlo(int128_rshift(b->s128, index)) & \
1731 MAKE_64BIT_MASK(0, size); \
1733 #endif
1735 VEXTU_X_DO(vextublx, 8, 1)
1736 VEXTU_X_DO(vextuhlx, 16, 1)
1737 VEXTU_X_DO(vextuwlx, 32, 1)
1738 VEXTU_X_DO(vextubrx, 8, 0)
1739 VEXTU_X_DO(vextuhrx, 16, 0)
1740 VEXTU_X_DO(vextuwrx, 32, 0)
1741 #undef VEXTU_X_DO
1744 * The specification says that the results are undefined if all of the
1745 * shift counts are not identical. We check to make sure that they
1746 * are to conform to what real hardware appears to do.
1748 #define VSHIFT(suffix, leftp) \
1749 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1751 int shift = b->VsrB(15) & 0x7; \
1752 int doit = 1; \
1753 int i; \
1755 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1756 doit = doit && ((b->u8[i] & 0x7) == shift); \
1758 if (doit) { \
1759 if (shift == 0) { \
1760 *r = *a; \
1761 } else if (leftp) { \
1762 uint64_t carry = a->VsrD(1) >> (64 - shift); \
1764 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \
1765 r->VsrD(1) = a->VsrD(1) << shift; \
1766 } else { \
1767 uint64_t carry = a->VsrD(0) << (64 - shift); \
1769 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \
1770 r->VsrD(0) = a->VsrD(0) >> shift; \
1774 VSHIFT(l, 1)
1775 VSHIFT(r, 0)
1776 #undef VSHIFT
1778 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1780 int i;
1781 unsigned int shift, bytes, size;
1783 size = ARRAY_SIZE(r->u8);
1784 for (i = 0; i < size; i++) {
1785 shift = b->VsrB(i) & 0x7; /* extract shift value */
1786 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1787 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1788 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1792 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1794 int i;
1795 unsigned int shift, bytes;
1798 * Use reverse order, as destination and source register can be
1799 * same. Its being modified in place saving temporary, reverse
1800 * order will guarantee that computed result is not fed back.
1802 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1803 shift = b->VsrB(i) & 0x7; /* extract shift value */
1804 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1805 /* extract adjacent bytes */
1806 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1810 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1812 int sh = shift & 0xf;
1813 int i;
1814 ppc_avr_t result;
1816 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1817 int index = sh + i;
1818 if (index > 0xf) {
1819 result.VsrB(i) = b->VsrB(index - 0x10);
1820 } else {
1821 result.VsrB(i) = a->VsrB(index);
1824 *r = result;
1827 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1829 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1831 #if defined(HOST_WORDS_BIGENDIAN)
1832 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1833 memset(&r->u8[16 - sh], 0, sh);
1834 #else
1835 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1836 memset(&r->u8[0], 0, sh);
1837 #endif
1840 #if defined(HOST_WORDS_BIGENDIAN)
1841 #define VINSERT(suffix, element) \
1842 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1844 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1845 sizeof(r->element[0])); \
1847 #else
1848 #define VINSERT(suffix, element) \
1849 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1851 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1852 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1854 #endif
1855 VINSERT(b, u8)
1856 VINSERT(h, u16)
1857 VINSERT(w, u32)
1858 VINSERT(d, u64)
1859 #undef VINSERT
1860 #if defined(HOST_WORDS_BIGENDIAN)
1861 #define VEXTRACT(suffix, element) \
1862 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1864 uint32_t es = sizeof(r->element[0]); \
1865 memmove(&r->u8[8 - es], &b->u8[index], es); \
1866 memset(&r->u8[8], 0, 8); \
1867 memset(&r->u8[0], 0, 8 - es); \
1869 #else
1870 #define VEXTRACT(suffix, element) \
1871 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1873 uint32_t es = sizeof(r->element[0]); \
1874 uint32_t s = (16 - index) - es; \
1875 memmove(&r->u8[8], &b->u8[s], es); \
1876 memset(&r->u8[0], 0, 8); \
1877 memset(&r->u8[8 + es], 0, 8 - es); \
1879 #endif
1880 VEXTRACT(ub, u8)
1881 VEXTRACT(uh, u16)
1882 VEXTRACT(uw, u32)
1883 VEXTRACT(d, u64)
1884 #undef VEXTRACT
1886 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1887 ppc_vsr_t *xb, uint32_t index)
1889 ppc_vsr_t t = { };
1890 size_t es = sizeof(uint32_t);
1891 uint32_t ext_index;
1892 int i;
1894 ext_index = index;
1895 for (i = 0; i < es; i++, ext_index++) {
1896 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1899 *xt = t;
1902 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1903 ppc_vsr_t *xb, uint32_t index)
1905 ppc_vsr_t t = *xt;
1906 size_t es = sizeof(uint32_t);
1907 int ins_index, i = 0;
1909 ins_index = index;
1910 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1911 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1914 *xt = t;
1917 #define VEXT_SIGNED(name, element, cast) \
1918 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1920 int i; \
1921 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1922 r->element[i] = (cast)b->element[i]; \
1925 VEXT_SIGNED(vextsb2w, s32, int8_t)
1926 VEXT_SIGNED(vextsb2d, s64, int8_t)
1927 VEXT_SIGNED(vextsh2w, s32, int16_t)
1928 VEXT_SIGNED(vextsh2d, s64, int16_t)
1929 VEXT_SIGNED(vextsw2d, s64, int32_t)
1930 #undef VEXT_SIGNED
1932 #define VNEG(name, element) \
1933 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1935 int i; \
1936 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1937 r->element[i] = -b->element[i]; \
1940 VNEG(vnegw, s32)
1941 VNEG(vnegd, s64)
1942 #undef VNEG
1944 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1946 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1948 #if defined(HOST_WORDS_BIGENDIAN)
1949 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1950 memset(&r->u8[0], 0, sh);
1951 #else
1952 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1953 memset(&r->u8[16 - sh], 0, sh);
1954 #endif
1957 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1959 int i;
1961 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1962 r->u32[i] = a->u32[i] >= b->u32[i];
1966 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1968 int64_t t;
1969 int i, upper;
1970 ppc_avr_t result;
1971 int sat = 0;
1973 upper = ARRAY_SIZE(r->s32) - 1;
1974 t = (int64_t)b->VsrSW(upper);
1975 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1976 t += a->VsrSW(i);
1977 result.VsrSW(i) = 0;
1979 result.VsrSW(upper) = cvtsdsw(t, &sat);
1980 *r = result;
1982 if (sat) {
1983 set_vscr_sat(env);
1987 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1989 int i, j, upper;
1990 ppc_avr_t result;
1991 int sat = 0;
1993 upper = 1;
1994 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1995 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1997 result.VsrD(i) = 0;
1998 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1999 t += a->VsrSW(2 * i + j);
2001 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2004 *r = result;
2005 if (sat) {
2006 set_vscr_sat(env);
2010 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2012 int i, j;
2013 int sat = 0;
2015 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2016 int64_t t = (int64_t)b->s32[i];
2018 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2019 t += a->s8[4 * i + j];
2021 r->s32[i] = cvtsdsw(t, &sat);
2024 if (sat) {
2025 set_vscr_sat(env);
2029 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2031 int sat = 0;
2032 int i;
2034 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2035 int64_t t = (int64_t)b->s32[i];
2037 t += a->s16[2 * i] + a->s16[2 * i + 1];
2038 r->s32[i] = cvtsdsw(t, &sat);
2041 if (sat) {
2042 set_vscr_sat(env);
2046 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2048 int i, j;
2049 int sat = 0;
2051 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2052 uint64_t t = (uint64_t)b->u32[i];
2054 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2055 t += a->u8[4 * i + j];
2057 r->u32[i] = cvtuduw(t, &sat);
2060 if (sat) {
2061 set_vscr_sat(env);
2065 #if defined(HOST_WORDS_BIGENDIAN)
2066 #define UPKHI 1
2067 #define UPKLO 0
2068 #else
2069 #define UPKHI 0
2070 #define UPKLO 1
2071 #endif
2072 #define VUPKPX(suffix, hi) \
2073 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2075 int i; \
2076 ppc_avr_t result; \
2078 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2079 uint16_t e = b->u16[hi ? i : i + 4]; \
2080 uint8_t a = (e >> 15) ? 0xff : 0; \
2081 uint8_t r = (e >> 10) & 0x1f; \
2082 uint8_t g = (e >> 5) & 0x1f; \
2083 uint8_t b = e & 0x1f; \
2085 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2087 *r = result; \
2089 VUPKPX(lpx, UPKLO)
2090 VUPKPX(hpx, UPKHI)
2091 #undef VUPKPX
2093 #define VUPK(suffix, unpacked, packee, hi) \
2094 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2096 int i; \
2097 ppc_avr_t result; \
2099 if (hi) { \
2100 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2101 result.unpacked[i] = b->packee[i]; \
2103 } else { \
2104 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2105 i++) { \
2106 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2109 *r = result; \
2111 VUPK(hsb, s16, s8, UPKHI)
2112 VUPK(hsh, s32, s16, UPKHI)
2113 VUPK(hsw, s64, s32, UPKHI)
2114 VUPK(lsb, s16, s8, UPKLO)
2115 VUPK(lsh, s32, s16, UPKLO)
2116 VUPK(lsw, s64, s32, UPKLO)
2117 #undef VUPK
2118 #undef UPKHI
2119 #undef UPKLO
2121 #define VGENERIC_DO(name, element) \
2122 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2124 int i; \
2126 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2127 r->element[i] = name(b->element[i]); \
2131 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2132 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2133 #define clzw(v) clz32((v))
2134 #define clzd(v) clz64((v))
2136 VGENERIC_DO(clzb, u8)
2137 VGENERIC_DO(clzh, u16)
2138 VGENERIC_DO(clzw, u32)
2139 VGENERIC_DO(clzd, u64)
2141 #undef clzb
2142 #undef clzh
2143 #undef clzw
2144 #undef clzd
2146 #define ctzb(v) ((v) ? ctz32(v) : 8)
2147 #define ctzh(v) ((v) ? ctz32(v) : 16)
2148 #define ctzw(v) ctz32((v))
2149 #define ctzd(v) ctz64((v))
2151 VGENERIC_DO(ctzb, u8)
2152 VGENERIC_DO(ctzh, u16)
2153 VGENERIC_DO(ctzw, u32)
2154 VGENERIC_DO(ctzd, u64)
2156 #undef ctzb
2157 #undef ctzh
2158 #undef ctzw
2159 #undef ctzd
2161 #define popcntb(v) ctpop8(v)
2162 #define popcnth(v) ctpop16(v)
2163 #define popcntw(v) ctpop32(v)
2164 #define popcntd(v) ctpop64(v)
2166 VGENERIC_DO(popcntb, u8)
2167 VGENERIC_DO(popcnth, u16)
2168 VGENERIC_DO(popcntw, u32)
2169 VGENERIC_DO(popcntd, u64)
2171 #undef popcntb
2172 #undef popcnth
2173 #undef popcntw
2174 #undef popcntd
2176 #undef VGENERIC_DO
2178 #if defined(HOST_WORDS_BIGENDIAN)
2179 #define QW_ONE { .u64 = { 0, 1 } }
2180 #else
2181 #define QW_ONE { .u64 = { 1, 0 } }
2182 #endif
2184 #ifndef CONFIG_INT128
2186 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2188 t->u64[0] = ~a.u64[0];
2189 t->u64[1] = ~a.u64[1];
2192 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2194 if (a.VsrD(0) < b.VsrD(0)) {
2195 return -1;
2196 } else if (a.VsrD(0) > b.VsrD(0)) {
2197 return 1;
2198 } else if (a.VsrD(1) < b.VsrD(1)) {
2199 return -1;
2200 } else if (a.VsrD(1) > b.VsrD(1)) {
2201 return 1;
2202 } else {
2203 return 0;
2207 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2209 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2210 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2211 (~a.VsrD(1) < b.VsrD(1));
2214 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2216 ppc_avr_t not_a;
2217 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2218 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2219 (~a.VsrD(1) < b.VsrD(1));
2220 avr_qw_not(&not_a, a);
2221 return avr_qw_cmpu(not_a, b) < 0;
2224 #endif
2226 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2228 #ifdef CONFIG_INT128
2229 r->u128 = a->u128 + b->u128;
2230 #else
2231 avr_qw_add(r, *a, *b);
2232 #endif
2235 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2237 #ifdef CONFIG_INT128
2238 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2239 #else
2241 if (c->VsrD(1) & 1) {
2242 ppc_avr_t tmp;
2244 tmp.VsrD(0) = 0;
2245 tmp.VsrD(1) = c->VsrD(1) & 1;
2246 avr_qw_add(&tmp, *a, tmp);
2247 avr_qw_add(r, tmp, *b);
2248 } else {
2249 avr_qw_add(r, *a, *b);
2251 #endif
2254 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2256 #ifdef CONFIG_INT128
2257 r->u128 = (~a->u128 < b->u128);
2258 #else
2259 ppc_avr_t not_a;
2261 avr_qw_not(&not_a, *a);
2263 r->VsrD(0) = 0;
2264 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2265 #endif
2268 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2270 #ifdef CONFIG_INT128
2271 int carry_out = (~a->u128 < b->u128);
2272 if (!carry_out && (c->u128 & 1)) {
2273 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2274 ((a->u128 != 0) || (b->u128 != 0));
2276 r->u128 = carry_out;
2277 #else
2279 int carry_in = c->VsrD(1) & 1;
2280 int carry_out = 0;
2281 ppc_avr_t tmp;
2283 carry_out = avr_qw_addc(&tmp, *a, *b);
2285 if (!carry_out && carry_in) {
2286 ppc_avr_t one = QW_ONE;
2287 carry_out = avr_qw_addc(&tmp, tmp, one);
2289 r->VsrD(0) = 0;
2290 r->VsrD(1) = carry_out;
2291 #endif
2294 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2296 #ifdef CONFIG_INT128
2297 r->u128 = a->u128 - b->u128;
2298 #else
2299 ppc_avr_t tmp;
2300 ppc_avr_t one = QW_ONE;
2302 avr_qw_not(&tmp, *b);
2303 avr_qw_add(&tmp, *a, tmp);
2304 avr_qw_add(r, tmp, one);
2305 #endif
2308 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2310 #ifdef CONFIG_INT128
2311 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2312 #else
2313 ppc_avr_t tmp, sum;
2315 avr_qw_not(&tmp, *b);
2316 avr_qw_add(&sum, *a, tmp);
2318 tmp.VsrD(0) = 0;
2319 tmp.VsrD(1) = c->VsrD(1) & 1;
2320 avr_qw_add(r, sum, tmp);
2321 #endif
2324 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2326 #ifdef CONFIG_INT128
2327 r->u128 = (~a->u128 < ~b->u128) ||
2328 (a->u128 + ~b->u128 == (__uint128_t)-1);
2329 #else
2330 int carry = (avr_qw_cmpu(*a, *b) > 0);
2331 if (!carry) {
2332 ppc_avr_t tmp;
2333 avr_qw_not(&tmp, *b);
2334 avr_qw_add(&tmp, *a, tmp);
2335 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2337 r->VsrD(0) = 0;
2338 r->VsrD(1) = carry;
2339 #endif
2342 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2344 #ifdef CONFIG_INT128
2345 r->u128 =
2346 (~a->u128 < ~b->u128) ||
2347 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2348 #else
2349 int carry_in = c->VsrD(1) & 1;
2350 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2351 if (!carry_out && carry_in) {
2352 ppc_avr_t tmp;
2353 avr_qw_not(&tmp, *b);
2354 avr_qw_add(&tmp, *a, tmp);
2355 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2358 r->VsrD(0) = 0;
2359 r->VsrD(1) = carry_out;
2360 #endif
2363 #define BCD_PLUS_PREF_1 0xC
2364 #define BCD_PLUS_PREF_2 0xF
2365 #define BCD_PLUS_ALT_1 0xA
2366 #define BCD_NEG_PREF 0xD
2367 #define BCD_NEG_ALT 0xB
2368 #define BCD_PLUS_ALT_2 0xE
2369 #define NATIONAL_PLUS 0x2B
2370 #define NATIONAL_NEG 0x2D
2372 #if defined(HOST_WORDS_BIGENDIAN)
2373 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2374 #else
2375 #define BCD_DIG_BYTE(n) ((n) / 2)
2376 #endif
2378 static int bcd_get_sgn(ppc_avr_t *bcd)
2380 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2381 case BCD_PLUS_PREF_1:
2382 case BCD_PLUS_PREF_2:
2383 case BCD_PLUS_ALT_1:
2384 case BCD_PLUS_ALT_2:
2386 return 1;
2389 case BCD_NEG_PREF:
2390 case BCD_NEG_ALT:
2392 return -1;
2395 default:
2397 return 0;
2402 static int bcd_preferred_sgn(int sgn, int ps)
2404 if (sgn >= 0) {
2405 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2406 } else {
2407 return BCD_NEG_PREF;
2411 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2413 uint8_t result;
2414 if (n & 1) {
2415 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2416 } else {
2417 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2420 if (unlikely(result > 9)) {
2421 *invalid = true;
2423 return result;
2426 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2428 if (n & 1) {
2429 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2430 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4);
2431 } else {
2432 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2433 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2437 static bool bcd_is_valid(ppc_avr_t *bcd)
2439 int i;
2440 int invalid = 0;
2442 if (bcd_get_sgn(bcd) == 0) {
2443 return false;
2446 for (i = 1; i < 32; i++) {
2447 bcd_get_digit(bcd, i, &invalid);
2448 if (unlikely(invalid)) {
2449 return false;
2452 return true;
2455 static int bcd_cmp_zero(ppc_avr_t *bcd)
2457 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2458 return CRF_EQ;
2459 } else {
2460 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2464 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2466 return reg->VsrH(7 - n);
2469 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2471 reg->VsrH(7 - n) = val;
2474 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2476 int i;
2477 int invalid = 0;
2478 for (i = 31; i > 0; i--) {
2479 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2480 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2481 if (unlikely(invalid)) {
2482 return 0; /* doesn't matter */
2483 } else if (dig_a > dig_b) {
2484 return 1;
2485 } else if (dig_a < dig_b) {
2486 return -1;
2490 return 0;
2493 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2494 int *overflow)
2496 int carry = 0;
2497 int i;
2498 for (i = 1; i <= 31; i++) {
2499 uint8_t digit = bcd_get_digit(a, i, invalid) +
2500 bcd_get_digit(b, i, invalid) + carry;
2501 if (digit > 9) {
2502 carry = 1;
2503 digit -= 10;
2504 } else {
2505 carry = 0;
2508 bcd_put_digit(t, digit, i);
2511 *overflow = carry;
2514 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2515 int *overflow)
2517 int carry = 0;
2518 int i;
2520 for (i = 1; i <= 31; i++) {
2521 uint8_t digit = bcd_get_digit(a, i, invalid) -
2522 bcd_get_digit(b, i, invalid) + carry;
2523 if (digit & 0x80) {
2524 carry = -1;
2525 digit += 10;
2526 } else {
2527 carry = 0;
2530 bcd_put_digit(t, digit, i);
2533 *overflow = carry;
2536 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2539 int sgna = bcd_get_sgn(a);
2540 int sgnb = bcd_get_sgn(b);
2541 int invalid = (sgna == 0) || (sgnb == 0);
2542 int overflow = 0;
2543 uint32_t cr = 0;
2544 ppc_avr_t result = { .u64 = { 0, 0 } };
2546 if (!invalid) {
2547 if (sgna == sgnb) {
2548 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2549 bcd_add_mag(&result, a, b, &invalid, &overflow);
2550 cr = bcd_cmp_zero(&result);
2551 } else {
2552 int magnitude = bcd_cmp_mag(a, b);
2553 if (magnitude > 0) {
2554 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2555 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2556 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2557 } else if (magnitude < 0) {
2558 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2559 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2560 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2561 } else {
2562 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2563 cr = CRF_EQ;
2568 if (unlikely(invalid)) {
2569 result.VsrD(0) = result.VsrD(1) = -1;
2570 cr = CRF_SO;
2571 } else if (overflow) {
2572 cr |= CRF_SO;
2575 *r = result;
2577 return cr;
2580 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2582 ppc_avr_t bcopy = *b;
2583 int sgnb = bcd_get_sgn(b);
2584 if (sgnb < 0) {
2585 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2586 } else if (sgnb > 0) {
2587 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2589 /* else invalid ... defer to bcdadd code for proper handling */
2591 return helper_bcdadd(r, a, &bcopy, ps);
2594 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2596 int i;
2597 int cr = 0;
2598 uint16_t national = 0;
2599 uint16_t sgnb = get_national_digit(b, 0);
2600 ppc_avr_t ret = { .u64 = { 0, 0 } };
2601 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2603 for (i = 1; i < 8; i++) {
2604 national = get_national_digit(b, i);
2605 if (unlikely(national < 0x30 || national > 0x39)) {
2606 invalid = 1;
2607 break;
2610 bcd_put_digit(&ret, national & 0xf, i);
2613 if (sgnb == NATIONAL_PLUS) {
2614 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2615 } else {
2616 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2619 cr = bcd_cmp_zero(&ret);
2621 if (unlikely(invalid)) {
2622 cr = CRF_SO;
2625 *r = ret;
2627 return cr;
2630 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2632 int i;
2633 int cr = 0;
2634 int sgnb = bcd_get_sgn(b);
2635 int invalid = (sgnb == 0);
2636 ppc_avr_t ret = { .u64 = { 0, 0 } };
2638 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2640 for (i = 1; i < 8; i++) {
2641 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2643 if (unlikely(invalid)) {
2644 break;
2647 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2649 cr = bcd_cmp_zero(b);
2651 if (ox_flag) {
2652 cr |= CRF_SO;
2655 if (unlikely(invalid)) {
2656 cr = CRF_SO;
2659 *r = ret;
2661 return cr;
2664 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2666 int i;
2667 int cr = 0;
2668 int invalid = 0;
2669 int zone_digit = 0;
2670 int zone_lead = ps ? 0xF : 0x3;
2671 int digit = 0;
2672 ppc_avr_t ret = { .u64 = { 0, 0 } };
2673 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2675 if (unlikely((sgnb < 0xA) && ps)) {
2676 invalid = 1;
2679 for (i = 0; i < 16; i++) {
2680 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2681 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2682 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2683 invalid = 1;
2684 break;
2687 bcd_put_digit(&ret, digit, i + 1);
2690 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2691 (!ps && (sgnb & 0x4))) {
2692 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2693 } else {
2694 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2697 cr = bcd_cmp_zero(&ret);
2699 if (unlikely(invalid)) {
2700 cr = CRF_SO;
2703 *r = ret;
2705 return cr;
2708 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2710 int i;
2711 int cr = 0;
2712 uint8_t digit = 0;
2713 int sgnb = bcd_get_sgn(b);
2714 int zone_lead = (ps) ? 0xF0 : 0x30;
2715 int invalid = (sgnb == 0);
2716 ppc_avr_t ret = { .u64 = { 0, 0 } };
2718 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2720 for (i = 0; i < 16; i++) {
2721 digit = bcd_get_digit(b, i + 1, &invalid);
2723 if (unlikely(invalid)) {
2724 break;
2727 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2730 if (ps) {
2731 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2732 } else {
2733 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2736 cr = bcd_cmp_zero(b);
2738 if (ox_flag) {
2739 cr |= CRF_SO;
2742 if (unlikely(invalid)) {
2743 cr = CRF_SO;
2746 *r = ret;
2748 return cr;
2751 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2753 int i;
2754 int cr = 0;
2755 uint64_t lo_value;
2756 uint64_t hi_value;
2757 ppc_avr_t ret = { .u64 = { 0, 0 } };
2759 if (b->VsrSD(0) < 0) {
2760 lo_value = -b->VsrSD(1);
2761 hi_value = ~b->VsrD(0) + !lo_value;
2762 bcd_put_digit(&ret, 0xD, 0);
2763 } else {
2764 lo_value = b->VsrD(1);
2765 hi_value = b->VsrD(0);
2766 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2769 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2770 lo_value > 9999999999999999ULL) {
2771 cr = CRF_SO;
2774 for (i = 1; i < 16; hi_value /= 10, i++) {
2775 bcd_put_digit(&ret, hi_value % 10, i);
2778 for (; i < 32; lo_value /= 10, i++) {
2779 bcd_put_digit(&ret, lo_value % 10, i);
2782 cr |= bcd_cmp_zero(&ret);
2784 *r = ret;
2786 return cr;
2789 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2791 uint8_t i;
2792 int cr;
2793 uint64_t carry;
2794 uint64_t unused;
2795 uint64_t lo_value;
2796 uint64_t hi_value = 0;
2797 int sgnb = bcd_get_sgn(b);
2798 int invalid = (sgnb == 0);
2800 lo_value = bcd_get_digit(b, 31, &invalid);
2801 for (i = 30; i > 0; i--) {
2802 mulu64(&lo_value, &carry, lo_value, 10ULL);
2803 mulu64(&hi_value, &unused, hi_value, 10ULL);
2804 lo_value += bcd_get_digit(b, i, &invalid);
2805 hi_value += carry;
2807 if (unlikely(invalid)) {
2808 break;
2812 if (sgnb == -1) {
2813 r->VsrSD(1) = -lo_value;
2814 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2815 } else {
2816 r->VsrSD(1) = lo_value;
2817 r->VsrSD(0) = hi_value;
2820 cr = bcd_cmp_zero(b);
2822 if (unlikely(invalid)) {
2823 cr = CRF_SO;
2826 return cr;
2829 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2831 int i;
2832 int invalid = 0;
2834 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2835 return CRF_SO;
2838 *r = *a;
2839 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2841 for (i = 1; i < 32; i++) {
2842 bcd_get_digit(a, i, &invalid);
2843 bcd_get_digit(b, i, &invalid);
2844 if (unlikely(invalid)) {
2845 return CRF_SO;
2849 return bcd_cmp_zero(r);
2852 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2854 int sgnb = bcd_get_sgn(b);
2856 *r = *b;
2857 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2859 if (bcd_is_valid(b) == false) {
2860 return CRF_SO;
2863 return bcd_cmp_zero(r);
2866 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2868 int cr;
2869 #if defined(HOST_WORDS_BIGENDIAN)
2870 int i = a->s8[7];
2871 #else
2872 int i = a->s8[8];
2873 #endif
2874 bool ox_flag = false;
2875 int sgnb = bcd_get_sgn(b);
2876 ppc_avr_t ret = *b;
2877 ret.VsrD(1) &= ~0xf;
2879 if (bcd_is_valid(b) == false) {
2880 return CRF_SO;
2883 if (unlikely(i > 31)) {
2884 i = 31;
2885 } else if (unlikely(i < -31)) {
2886 i = -31;
2889 if (i > 0) {
2890 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2891 } else {
2892 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2894 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2896 *r = ret;
2898 cr = bcd_cmp_zero(r);
2899 if (ox_flag) {
2900 cr |= CRF_SO;
2903 return cr;
2906 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2908 int cr;
2909 int i;
2910 int invalid = 0;
2911 bool ox_flag = false;
2912 ppc_avr_t ret = *b;
2914 for (i = 0; i < 32; i++) {
2915 bcd_get_digit(b, i, &invalid);
2917 if (unlikely(invalid)) {
2918 return CRF_SO;
2922 #if defined(HOST_WORDS_BIGENDIAN)
2923 i = a->s8[7];
2924 #else
2925 i = a->s8[8];
2926 #endif
2927 if (i >= 32) {
2928 ox_flag = true;
2929 ret.VsrD(1) = ret.VsrD(0) = 0;
2930 } else if (i <= -32) {
2931 ret.VsrD(1) = ret.VsrD(0) = 0;
2932 } else if (i > 0) {
2933 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2934 } else {
2935 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2937 *r = ret;
2939 cr = bcd_cmp_zero(r);
2940 if (ox_flag) {
2941 cr |= CRF_SO;
2944 return cr;
2947 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2949 int cr;
2950 int unused = 0;
2951 int invalid = 0;
2952 bool ox_flag = false;
2953 int sgnb = bcd_get_sgn(b);
2954 ppc_avr_t ret = *b;
2955 ret.VsrD(1) &= ~0xf;
2957 #if defined(HOST_WORDS_BIGENDIAN)
2958 int i = a->s8[7];
2959 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
2960 #else
2961 int i = a->s8[8];
2962 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
2963 #endif
2965 if (bcd_is_valid(b) == false) {
2966 return CRF_SO;
2969 if (unlikely(i > 31)) {
2970 i = 31;
2971 } else if (unlikely(i < -31)) {
2972 i = -31;
2975 if (i > 0) {
2976 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2977 } else {
2978 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2980 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2981 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2984 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2986 cr = bcd_cmp_zero(&ret);
2987 if (ox_flag) {
2988 cr |= CRF_SO;
2990 *r = ret;
2992 return cr;
2995 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2997 uint64_t mask;
2998 uint32_t ox_flag = 0;
2999 #if defined(HOST_WORDS_BIGENDIAN)
3000 int i = a->s16[3] + 1;
3001 #else
3002 int i = a->s16[4] + 1;
3003 #endif
3004 ppc_avr_t ret = *b;
3006 if (bcd_is_valid(b) == false) {
3007 return CRF_SO;
3010 if (i > 16 && i < 32) {
3011 mask = (uint64_t)-1 >> (128 - i * 4);
3012 if (ret.VsrD(0) & ~mask) {
3013 ox_flag = CRF_SO;
3016 ret.VsrD(0) &= mask;
3017 } else if (i >= 0 && i <= 16) {
3018 mask = (uint64_t)-1 >> (64 - i * 4);
3019 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3020 ox_flag = CRF_SO;
3023 ret.VsrD(1) &= mask;
3024 ret.VsrD(0) = 0;
3026 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3027 *r = ret;
3029 return bcd_cmp_zero(&ret) | ox_flag;
3032 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3034 int i;
3035 uint64_t mask;
3036 uint32_t ox_flag = 0;
3037 int invalid = 0;
3038 ppc_avr_t ret = *b;
3040 for (i = 0; i < 32; i++) {
3041 bcd_get_digit(b, i, &invalid);
3043 if (unlikely(invalid)) {
3044 return CRF_SO;
3048 #if defined(HOST_WORDS_BIGENDIAN)
3049 i = a->s16[3];
3050 #else
3051 i = a->s16[4];
3052 #endif
3053 if (i > 16 && i < 33) {
3054 mask = (uint64_t)-1 >> (128 - i * 4);
3055 if (ret.VsrD(0) & ~mask) {
3056 ox_flag = CRF_SO;
3059 ret.VsrD(0) &= mask;
3060 } else if (i > 0 && i <= 16) {
3061 mask = (uint64_t)-1 >> (64 - i * 4);
3062 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3063 ox_flag = CRF_SO;
3066 ret.VsrD(1) &= mask;
3067 ret.VsrD(0) = 0;
3068 } else if (i == 0) {
3069 if (ret.VsrD(0) || ret.VsrD(1)) {
3070 ox_flag = CRF_SO;
3072 ret.VsrD(0) = ret.VsrD(1) = 0;
3075 *r = ret;
3076 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3077 return ox_flag | CRF_EQ;
3080 return ox_flag | CRF_GT;
3083 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3085 int i;
3086 VECTOR_FOR_INORDER_I(i, u8) {
3087 r->u8[i] = AES_sbox[a->u8[i]];
3091 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3093 ppc_avr_t result;
3094 int i;
3096 VECTOR_FOR_INORDER_I(i, u32) {
3097 result.VsrW(i) = b->VsrW(i) ^
3098 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3099 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3100 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3101 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3103 *r = result;
3106 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3108 ppc_avr_t result;
3109 int i;
3111 VECTOR_FOR_INORDER_I(i, u8) {
3112 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3114 *r = result;
3117 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3119 /* This differs from what is written in ISA V2.07. The RTL is */
3120 /* incorrect and will be fixed in V2.07B. */
3121 int i;
3122 ppc_avr_t tmp;
3124 VECTOR_FOR_INORDER_I(i, u8) {
3125 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3128 VECTOR_FOR_INORDER_I(i, u32) {
3129 r->VsrW(i) =
3130 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3131 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3132 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3133 AES_imc[tmp.VsrB(4 * i + 3)][3];
3137 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3139 ppc_avr_t result;
3140 int i;
3142 VECTOR_FOR_INORDER_I(i, u8) {
3143 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3145 *r = result;
3148 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3150 int st = (st_six & 0x10) != 0;
3151 int six = st_six & 0xF;
3152 int i;
3154 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3155 if (st == 0) {
3156 if ((six & (0x8 >> i)) == 0) {
3157 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3158 ror32(a->VsrW(i), 18) ^
3159 (a->VsrW(i) >> 3);
3160 } else { /* six.bit[i] == 1 */
3161 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3162 ror32(a->VsrW(i), 19) ^
3163 (a->VsrW(i) >> 10);
3165 } else { /* st == 1 */
3166 if ((six & (0x8 >> i)) == 0) {
3167 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3168 ror32(a->VsrW(i), 13) ^
3169 ror32(a->VsrW(i), 22);
3170 } else { /* six.bit[i] == 1 */
3171 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3172 ror32(a->VsrW(i), 11) ^
3173 ror32(a->VsrW(i), 25);
3179 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3181 int st = (st_six & 0x10) != 0;
3182 int six = st_six & 0xF;
3183 int i;
3185 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3186 if (st == 0) {
3187 if ((six & (0x8 >> (2 * i))) == 0) {
3188 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3189 ror64(a->VsrD(i), 8) ^
3190 (a->VsrD(i) >> 7);
3191 } else { /* six.bit[2*i] == 1 */
3192 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3193 ror64(a->VsrD(i), 61) ^
3194 (a->VsrD(i) >> 6);
3196 } else { /* st == 1 */
3197 if ((six & (0x8 >> (2 * i))) == 0) {
3198 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3199 ror64(a->VsrD(i), 34) ^
3200 ror64(a->VsrD(i), 39);
3201 } else { /* six.bit[2*i] == 1 */
3202 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3203 ror64(a->VsrD(i), 18) ^
3204 ror64(a->VsrD(i), 41);
3210 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3212 ppc_avr_t result;
3213 int i;
3215 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3216 int indexA = c->VsrB(i) >> 4;
3217 int indexB = c->VsrB(i) & 0xF;
3219 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3221 *r = result;
3224 #undef VECTOR_FOR_INORDER_I
3226 /*****************************************************************************/
3227 /* SPE extension helpers */
3228 /* Use a table to make this quicker */
3229 static const uint8_t hbrev[16] = {
3230 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3231 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3234 static inline uint8_t byte_reverse(uint8_t val)
3236 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3239 static inline uint32_t word_reverse(uint32_t val)
3241 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3242 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3245 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3246 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3248 uint32_t a, b, d, mask;
3250 mask = UINT32_MAX >> (32 - MASKBITS);
3251 a = arg1 & mask;
3252 b = arg2 & mask;
3253 d = word_reverse(1 + word_reverse(a | ~b));
3254 return (arg1 & ~mask) | (d & b);
3257 uint32_t helper_cntlsw32(uint32_t val)
3259 if (val & 0x80000000) {
3260 return clz32(~val);
3261 } else {
3262 return clz32(val);
3266 uint32_t helper_cntlzw32(uint32_t val)
3268 return clz32(val);
3271 /* 440 specific */
3272 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3273 target_ulong low, uint32_t update_Rc)
3275 target_ulong mask;
3276 int i;
3278 i = 1;
3279 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3280 if ((high & mask) == 0) {
3281 if (update_Rc) {
3282 env->crf[0] = 0x4;
3284 goto done;
3286 i++;
3288 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3289 if ((low & mask) == 0) {
3290 if (update_Rc) {
3291 env->crf[0] = 0x8;
3293 goto done;
3295 i++;
3297 i = 8;
3298 if (update_Rc) {
3299 env->crf[0] = 0x2;
3301 done:
3302 env->xer = (env->xer & ~0x7F) | i;
3303 if (update_Rc) {
3304 env->crf[0] |= xer_so;
3306 return i;