target/ppc: Optimize emulation of vclzd instruction
[qemu/ar7.git] / target / ppc / int_helper.c
blobb82765db3360681fffb7334ab3fde0e7c2168e82
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "exec/helper-proto.h"
26 #include "crypto/aes.h"
27 #include "fpu/softfloat.h"
28 #include "qapi/error.h"
29 #include "qemu/guest-random.h"
31 #include "helper_regs.h"
32 /*****************************************************************************/
33 /* Fixed point operations helpers */
35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
37 if (unlikely(ov)) {
38 env->so = env->ov = 1;
39 } else {
40 env->ov = 0;
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
45 uint32_t oe)
47 uint64_t rt = 0;
48 int overflow = 0;
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
53 if (unlikely(divisor == 0)) {
54 overflow = 1;
55 } else {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
64 if (oe) {
65 helper_update_ov_legacy(env, overflow);
68 return (target_ulong)rt;
71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
72 uint32_t oe)
74 int64_t rt = 0;
75 int overflow = 0;
77 int64_t dividend = (int64_t)ra << 32;
78 int64_t divisor = (int64_t)((int32_t)rb);
80 if (unlikely((divisor == 0) ||
81 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
82 overflow = 1;
83 } else {
84 rt = dividend / divisor;
85 overflow = rt != (int32_t)rt;
88 if (unlikely(overflow)) {
89 rt = 0; /* Undefined */
92 if (oe) {
93 helper_update_ov_legacy(env, overflow);
96 return (target_ulong)rt;
99 #if defined(TARGET_PPC64)
101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 uint64_t rt = 0;
104 int overflow = 0;
106 overflow = divu128(&rt, &ra, rb);
108 if (unlikely(overflow)) {
109 rt = 0; /* Undefined */
112 if (oe) {
113 helper_update_ov_legacy(env, overflow);
116 return rt;
119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
121 int64_t rt = 0;
122 int64_t ra = (int64_t)rau;
123 int64_t rb = (int64_t)rbu;
124 int overflow = divs128(&rt, &ra, rb);
126 if (unlikely(overflow)) {
127 rt = 0; /* Undefined */
130 if (oe) {
131 helper_update_ov_legacy(env, overflow);
134 return rt;
137 #endif
140 #if defined(TARGET_PPC64)
141 /* if x = 0xab, returns 0xababababababababa */
142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
145 * subtract 1 from each byte, and with inverse, check if MSB is set at each
146 * byte.
147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
152 /* When you XOR the pattern and there is a match, that byte will be zero */
153 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
157 return hasvalue(rb, ra) ? CRF_GT : 0;
160 #undef pattern
161 #undef haszero
162 #undef hasvalue
165 * Return a random number.
167 uint64_t helper_darn32(void)
169 Error *err = NULL;
170 uint32_t ret;
172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
174 error_get_pretty(err));
175 error_free(err);
176 return -1;
179 return ret;
182 uint64_t helper_darn64(void)
184 Error *err = NULL;
185 uint64_t ret;
187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
189 error_get_pretty(err));
190 error_free(err);
191 return -1;
194 return ret;
197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
199 int i;
200 uint64_t ra = 0;
202 for (i = 0; i < 8; i++) {
203 int index = (rs >> (i * 8)) & 0xFF;
204 if (index < 64) {
205 if (rb & PPC_BIT(index)) {
206 ra |= 1 << i;
210 return ra;
213 #endif
215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
217 target_ulong mask = 0xff;
218 target_ulong ra = 0;
219 int i;
221 for (i = 0; i < sizeof(target_ulong); i++) {
222 if ((rs & mask) == (rb & mask)) {
223 ra |= mask;
225 mask <<= 8;
227 return ra;
230 /* shift right arithmetic helper */
231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
232 target_ulong shift)
234 int32_t ret;
236 if (likely(!(shift & 0x20))) {
237 if (likely((uint32_t)shift != 0)) {
238 shift &= 0x1f;
239 ret = (int32_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca32 = env->ca = 0;
242 } else {
243 env->ca32 = env->ca = 1;
245 } else {
246 ret = (int32_t)value;
247 env->ca32 = env->ca = 0;
249 } else {
250 ret = (int32_t)value >> 31;
251 env->ca32 = env->ca = (ret != 0);
253 return (target_long)ret;
256 #if defined(TARGET_PPC64)
257 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
258 target_ulong shift)
260 int64_t ret;
262 if (likely(!(shift & 0x40))) {
263 if (likely((uint64_t)shift != 0)) {
264 shift &= 0x3f;
265 ret = (int64_t)value >> shift;
266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
267 env->ca32 = env->ca = 0;
268 } else {
269 env->ca32 = env->ca = 1;
271 } else {
272 ret = (int64_t)value;
273 env->ca32 = env->ca = 0;
275 } else {
276 ret = (int64_t)value >> 63;
277 env->ca32 = env->ca = (ret != 0);
279 return ret;
281 #endif
283 #if defined(TARGET_PPC64)
284 target_ulong helper_popcntb(target_ulong val)
286 /* Note that we don't fold past bytes */
287 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
288 0x5555555555555555ULL);
289 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
290 0x3333333333333333ULL);
291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
292 0x0f0f0f0f0f0f0f0fULL);
293 return val;
296 target_ulong helper_popcntw(target_ulong val)
298 /* Note that we don't fold past words. */
299 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
300 0x5555555555555555ULL);
301 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
302 0x3333333333333333ULL);
303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
304 0x0f0f0f0f0f0f0f0fULL);
305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
306 0x00ff00ff00ff00ffULL);
307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
308 0x0000ffff0000ffffULL);
309 return val;
311 #else
312 target_ulong helper_popcntb(target_ulong val)
314 /* Note that we don't fold past bytes */
315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
318 return val;
320 #endif
322 /*****************************************************************************/
323 /* PowerPC 601 specific instructions (POWER bridge) */
324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
329 (int32_t)arg2 == 0) {
330 env->spr[SPR_MQ] = 0;
331 return INT32_MIN;
332 } else {
333 env->spr[SPR_MQ] = tmp % arg2;
334 return tmp / (int32_t)arg2;
338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
339 target_ulong arg2)
341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
344 (int32_t)arg2 == 0) {
345 env->so = env->ov = 1;
346 env->spr[SPR_MQ] = 0;
347 return INT32_MIN;
348 } else {
349 env->spr[SPR_MQ] = tmp % arg2;
350 tmp /= (int32_t)arg2;
351 if ((int32_t)tmp != tmp) {
352 env->so = env->ov = 1;
353 } else {
354 env->ov = 0;
356 return tmp;
360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
361 target_ulong arg2)
363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->spr[SPR_MQ] = 0;
366 return INT32_MIN;
367 } else {
368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
369 return (int32_t)arg1 / (int32_t)arg2;
373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
374 target_ulong arg2)
376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
377 (int32_t)arg2 == 0) {
378 env->so = env->ov = 1;
379 env->spr[SPR_MQ] = 0;
380 return INT32_MIN;
381 } else {
382 env->ov = 0;
383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
384 return (int32_t)arg1 / (int32_t)arg2;
388 /*****************************************************************************/
389 /* 602 specific instructions */
390 /* mfrom is the most crazy instruction ever seen, imho ! */
391 /* Real implementation uses a ROM table. Do the same */
393 * Extremely decomposed:
394 * -arg / 256
395 * return 256 * log10(10 + 1.0) + 0.5
397 #if !defined(CONFIG_USER_ONLY)
398 target_ulong helper_602_mfrom(target_ulong arg)
400 if (likely(arg < 602)) {
401 #include "mfrom_table.inc.c"
402 return mfrom_ROM_table[arg];
403 } else {
404 return 0;
407 #endif
409 /*****************************************************************************/
410 /* Altivec extension helpers */
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414 #else
415 #define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
417 #endif
419 /* Saturating arithmetic helpers. */
420 #define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
423 to_type r; \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
434 return r; \
436 #define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
439 to_type r; \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
447 return r; \
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459 #undef SATCVT
460 #undef SATCVTU
462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
464 env->vscr = vscr & ~(1u << VSCR_SAT);
465 /* Which bit we set is completely arbitrary, but clear the rest. */
466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
467 env->vscr_sat.u64[1] = 0;
468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
471 uint32_t helper_mfvscr(CPUPPCState *env)
473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
474 return env->vscr | (sat << VSCR_SAT);
477 static inline void set_vscr_sat(CPUPPCState *env)
479 /* The choice of non-zero value is arbitrary. */
480 env->vscr_sat.u32[0] = 1;
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
485 int i;
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
492 /* vprtybw */
493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
495 int i;
496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
498 res ^= res >> 8;
499 r->u32[i] = res & 1;
503 /* vprtybd */
504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
506 int i;
507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
509 res ^= res >> 16;
510 res ^= res >> 8;
511 r->u64[i] = res & 1;
515 /* vprtybq */
516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
518 uint64_t res = b->u64[0] ^ b->u64[1];
519 res ^= res >> 32;
520 res ^= res >> 16;
521 res ^= res >> 8;
522 r->VsrD(1) = res & 1;
523 r->VsrD(0) = 0;
526 #define VARITH_DO(name, op, element) \
527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
529 int i; \
531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
532 r->element[i] = a->element[i] op b->element[i]; \
535 VARITH_DO(muluwm, *, u32)
536 #undef VARITH_DO
537 #undef VARITH
539 #define VARITHFP(suffix, func) \
540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
541 ppc_avr_t *b) \
543 int i; \
545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
549 VARITHFP(addfp, float32_add)
550 VARITHFP(subfp, float32_sub)
551 VARITHFP(minfp, float32_min)
552 VARITHFP(maxfp, float32_max)
553 #undef VARITHFP
555 #define VARITHFPFMA(suffix, type) \
556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
557 ppc_avr_t *b, ppc_avr_t *c) \
559 int i; \
560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
562 type, &env->vec_status); \
565 VARITHFPFMA(maddfp, 0);
566 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
567 #undef VARITHFPFMA
569 #define VARITHSAT_CASE(type, op, cvt, element) \
571 type result = (type)a->element[i] op (type)b->element[i]; \
572 r->element[i] = cvt(result, &sat); \
575 #define VARITHSAT_DO(name, op, optype, cvt, element) \
576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
579 int sat = 0; \
580 int i; \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 VARITHSAT_CASE(optype, op, cvt, element); \
585 if (sat) { \
586 vscr_sat->u32[0] = 1; \
589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
601 #undef VARITHSAT_CASE
602 #undef VARITHSAT_DO
603 #undef VARITHSAT_SIGNED
604 #undef VARITHSAT_UNSIGNED
606 #define VAVG_DO(name, element, etype) \
607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
609 int i; \
611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
613 r->element[i] = x >> 1; \
617 #define VAVG(type, signed_element, signed_type, unsigned_element, \
618 unsigned_type) \
619 VAVG_DO(avgs##type, signed_element, signed_type) \
620 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
621 VAVG(b, s8, int16_t, u8, uint16_t)
622 VAVG(h, s16, int32_t, u16, uint32_t)
623 VAVG(w, s32, int64_t, u32, uint64_t)
624 #undef VAVG_DO
625 #undef VAVG
627 #define VABSDU_DO(name, element) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
630 int i; \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 r->element[i] = (a->element[i] > b->element[i]) ? \
634 (a->element[i] - b->element[i]) : \
635 (b->element[i] - a->element[i]); \
640 * VABSDU - Vector absolute difference unsigned
641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
642 * element - element type to access from vector
644 #define VABSDU(type, element) \
645 VABSDU_DO(absdu##type, element)
646 VABSDU(b, u8)
647 VABSDU(h, u16)
648 VABSDU(w, u32)
649 #undef VABSDU_DO
650 #undef VABSDU
652 #define VCF(suffix, cvt, element) \
653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
654 ppc_avr_t *b, uint32_t uim) \
656 int i; \
658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
659 float32 t = cvt(b->element[i], &env->vec_status); \
660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
663 VCF(ux, uint32_to_float32, u32)
664 VCF(sx, int32_to_float32, s32)
665 #undef VCF
667 #define VCMP_DO(suffix, compare, element, record) \
668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
669 ppc_avr_t *a, ppc_avr_t *b) \
671 uint64_t ones = (uint64_t)-1; \
672 uint64_t all = ones; \
673 uint64_t none = 0; \
674 int i; \
676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
677 uint64_t result = (a->element[i] compare b->element[i] ? \
678 ones : 0x0); \
679 switch (sizeof(a->element[0])) { \
680 case 8: \
681 r->u64[i] = result; \
682 break; \
683 case 4: \
684 r->u32[i] = result; \
685 break; \
686 case 2: \
687 r->u16[i] = result; \
688 break; \
689 case 1: \
690 r->u8[i] = result; \
691 break; \
693 all &= result; \
694 none |= result; \
696 if (record) { \
697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
700 #define VCMP(suffix, compare, element) \
701 VCMP_DO(suffix, compare, element, 0) \
702 VCMP_DO(suffix##_dot, compare, element, 1)
703 VCMP(equb, ==, u8)
704 VCMP(equh, ==, u16)
705 VCMP(equw, ==, u32)
706 VCMP(equd, ==, u64)
707 VCMP(gtub, >, u8)
708 VCMP(gtuh, >, u16)
709 VCMP(gtuw, >, u32)
710 VCMP(gtud, >, u64)
711 VCMP(gtsb, >, s8)
712 VCMP(gtsh, >, s16)
713 VCMP(gtsw, >, s32)
714 VCMP(gtsd, >, s64)
715 #undef VCMP_DO
716 #undef VCMP
718 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
719 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
720 ppc_avr_t *a, ppc_avr_t *b) \
722 etype ones = (etype)-1; \
723 etype all = ones; \
724 etype result, none = 0; \
725 int i; \
727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
728 if (cmpzero) { \
729 result = ((a->element[i] == 0) \
730 || (b->element[i] == 0) \
731 || (a->element[i] != b->element[i]) ? \
732 ones : 0x0); \
733 } else { \
734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
736 r->element[i] = result; \
737 all &= result; \
738 none |= result; \
740 if (record) { \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
746 * VCMPNEZ - Vector compare not equal to zero
747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
748 * element - element type to access from vector
750 #define VCMPNE(suffix, element, etype, cmpzero) \
751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
753 VCMPNE(zb, u8, uint8_t, 1)
754 VCMPNE(zh, u16, uint16_t, 1)
755 VCMPNE(zw, u32, uint32_t, 1)
756 VCMPNE(b, u8, uint8_t, 0)
757 VCMPNE(h, u16, uint16_t, 0)
758 VCMPNE(w, u32, uint32_t, 0)
759 #undef VCMPNE_DO
760 #undef VCMPNE
762 #define VCMPFP_DO(suffix, compare, order, record) \
763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
764 ppc_avr_t *a, ppc_avr_t *b) \
766 uint32_t ones = (uint32_t)-1; \
767 uint32_t all = ones; \
768 uint32_t none = 0; \
769 int i; \
771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
772 uint32_t result; \
773 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
774 &env->vec_status); \
775 if (rel == float_relation_unordered) { \
776 result = 0; \
777 } else if (rel compare order) { \
778 result = ones; \
779 } else { \
780 result = 0; \
782 r->u32[i] = result; \
783 all &= result; \
784 none |= result; \
786 if (record) { \
787 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
790 #define VCMPFP(suffix, compare, order) \
791 VCMPFP_DO(suffix, compare, order, 0) \
792 VCMPFP_DO(suffix##_dot, compare, order, 1)
793 VCMPFP(eqfp, ==, float_relation_equal)
794 VCMPFP(gefp, !=, float_relation_less)
795 VCMPFP(gtfp, ==, float_relation_greater)
796 #undef VCMPFP_DO
797 #undef VCMPFP
799 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
800 ppc_avr_t *a, ppc_avr_t *b, int record)
802 int i;
803 int all_in = 0;
805 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
806 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
807 &env->vec_status);
808 if (le_rel == float_relation_unordered) {
809 r->u32[i] = 0xc0000000;
810 all_in = 1;
811 } else {
812 float32 bneg = float32_chs(b->f32[i]);
813 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
814 &env->vec_status);
815 int le = le_rel != float_relation_greater;
816 int ge = ge_rel != float_relation_less;
818 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
819 all_in |= (!le | !ge);
822 if (record) {
823 env->crf[6] = (all_in == 0) << 1;
827 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
829 vcmpbfp_internal(env, r, a, b, 0);
832 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
833 ppc_avr_t *b)
835 vcmpbfp_internal(env, r, a, b, 1);
838 #define VCT(suffix, satcvt, element) \
839 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
840 ppc_avr_t *b, uint32_t uim) \
842 int i; \
843 int sat = 0; \
844 float_status s = env->vec_status; \
846 set_float_rounding_mode(float_round_to_zero, &s); \
847 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
848 if (float32_is_any_nan(b->f32[i])) { \
849 r->element[i] = 0; \
850 } else { \
851 float64 t = float32_to_float64(b->f32[i], &s); \
852 int64_t j; \
854 t = float64_scalbn(t, uim, &s); \
855 j = float64_to_int64(t, &s); \
856 r->element[i] = satcvt(j, &sat); \
859 if (sat) { \
860 set_vscr_sat(env); \
863 VCT(uxs, cvtsduw, u32)
864 VCT(sxs, cvtsdsw, s32)
865 #undef VCT
867 target_ulong helper_vclzlsbb(ppc_avr_t *r)
869 target_ulong count = 0;
870 int i;
871 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
872 if (r->VsrB(i) & 0x01) {
873 break;
875 count++;
877 return count;
880 target_ulong helper_vctzlsbb(ppc_avr_t *r)
882 target_ulong count = 0;
883 int i;
884 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
885 if (r->VsrB(i) & 0x01) {
886 break;
888 count++;
890 return count;
893 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
896 int sat = 0;
897 int i;
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 int32_t prod = a->s16[i] * b->s16[i];
901 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
903 r->s16[i] = cvtswsh(t, &sat);
906 if (sat) {
907 set_vscr_sat(env);
911 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
912 ppc_avr_t *b, ppc_avr_t *c)
914 int sat = 0;
915 int i;
917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
918 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
919 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
920 r->s16[i] = cvtswsh(t, &sat);
923 if (sat) {
924 set_vscr_sat(env);
928 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
930 int i;
932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
933 int32_t prod = a->s16[i] * b->s16[i];
934 r->s16[i] = (int16_t) (prod + c->s16[i]);
938 #define VMRG_DO(name, element, access, ofs) \
939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
941 ppc_avr_t result; \
942 int i, half = ARRAY_SIZE(r->element) / 2; \
944 for (i = 0; i < half; i++) { \
945 result.access(i * 2 + 0) = a->access(i + ofs); \
946 result.access(i * 2 + 1) = b->access(i + ofs); \
948 *r = result; \
951 #define VMRG(suffix, element, access) \
952 VMRG_DO(mrgl##suffix, element, access, half) \
953 VMRG_DO(mrgh##suffix, element, access, 0)
954 VMRG(b, u8, VsrB)
955 VMRG(h, u16, VsrH)
956 VMRG(w, u32, VsrW)
957 #undef VMRG_DO
958 #undef VMRG
960 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
961 ppc_avr_t *b, ppc_avr_t *c)
963 int32_t prod[16];
964 int i;
966 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
967 prod[i] = (int32_t)a->s8[i] * b->u8[i];
970 VECTOR_FOR_INORDER_I(i, s32) {
971 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
972 prod[4 * i + 2] + prod[4 * i + 3];
976 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
977 ppc_avr_t *b, ppc_avr_t *c)
979 int32_t prod[8];
980 int i;
982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
983 prod[i] = a->s16[i] * b->s16[i];
986 VECTOR_FOR_INORDER_I(i, s32) {
987 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
991 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
992 ppc_avr_t *b, ppc_avr_t *c)
994 int32_t prod[8];
995 int i;
996 int sat = 0;
998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
999 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1002 VECTOR_FOR_INORDER_I(i, s32) {
1003 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1005 r->u32[i] = cvtsdsw(t, &sat);
1008 if (sat) {
1009 set_vscr_sat(env);
1013 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1014 ppc_avr_t *b, ppc_avr_t *c)
1016 uint16_t prod[16];
1017 int i;
1019 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1020 prod[i] = a->u8[i] * b->u8[i];
1023 VECTOR_FOR_INORDER_I(i, u32) {
1024 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1025 prod[4 * i + 2] + prod[4 * i + 3];
1029 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030 ppc_avr_t *b, ppc_avr_t *c)
1032 uint32_t prod[8];
1033 int i;
1035 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1036 prod[i] = a->u16[i] * b->u16[i];
1039 VECTOR_FOR_INORDER_I(i, u32) {
1040 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1044 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1045 ppc_avr_t *b, ppc_avr_t *c)
1047 uint32_t prod[8];
1048 int i;
1049 int sat = 0;
1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052 prod[i] = a->u16[i] * b->u16[i];
1055 VECTOR_FOR_INORDER_I(i, s32) {
1056 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1058 r->u32[i] = cvtuduw(t, &sat);
1061 if (sat) {
1062 set_vscr_sat(env);
1066 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1067 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1069 int i; \
1071 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1072 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1073 (cast)b->mul_access(i); \
1077 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1080 int i; \
1082 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1083 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1084 (cast)b->mul_access(i + 1); \
1088 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1089 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1090 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1091 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1092 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1093 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1094 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1095 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1096 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1097 #undef VMUL_DO_EVN
1098 #undef VMUL_DO_ODD
1099 #undef VMUL
1101 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1102 ppc_avr_t *c)
1104 ppc_avr_t result;
1105 int i;
1107 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1108 int s = c->VsrB(i) & 0x1f;
1109 int index = s & 0xf;
1111 if (s & 0x10) {
1112 result.VsrB(i) = b->VsrB(index);
1113 } else {
1114 result.VsrB(i) = a->VsrB(index);
1117 *r = result;
1120 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1121 ppc_avr_t *c)
1123 ppc_avr_t result;
1124 int i;
1126 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1127 int s = c->VsrB(i) & 0x1f;
1128 int index = 15 - (s & 0xf);
1130 if (s & 0x10) {
1131 result.VsrB(i) = a->VsrB(index);
1132 } else {
1133 result.VsrB(i) = b->VsrB(index);
1136 *r = result;
1139 #if defined(HOST_WORDS_BIGENDIAN)
1140 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1141 #define VBPERMD_INDEX(i) (i)
1142 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1143 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1144 #else
1145 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1146 #define VBPERMD_INDEX(i) (1 - i)
1147 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1148 #define EXTRACT_BIT(avr, i, index) \
1149 (extract64((avr)->u64[1 - i], 63 - index, 1))
1150 #endif
1152 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1154 int i, j;
1155 ppc_avr_t result = { .u64 = { 0, 0 } };
1156 VECTOR_FOR_INORDER_I(i, u64) {
1157 for (j = 0; j < 8; j++) {
1158 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1159 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1160 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1164 *r = result;
1167 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1169 int i;
1170 uint64_t perm = 0;
1172 VECTOR_FOR_INORDER_I(i, u8) {
1173 int index = VBPERMQ_INDEX(b, i);
1175 if (index < 128) {
1176 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1177 if (a->u64[VBPERMQ_DW(index)] & mask) {
1178 perm |= (0x8000 >> i);
1183 r->VsrD(0) = perm;
1184 r->VsrD(1) = 0;
1187 #undef VBPERMQ_INDEX
1188 #undef VBPERMQ_DW
1190 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1191 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1193 int i, j; \
1194 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1196 VECTOR_FOR_INORDER_I(i, srcfld) { \
1197 prod[i] = 0; \
1198 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1199 if (a->srcfld[i] & (1ull << j)) { \
1200 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1205 VECTOR_FOR_INORDER_I(i, trgfld) { \
1206 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1210 PMSUM(vpmsumb, u8, u16, uint16_t)
1211 PMSUM(vpmsumh, u16, u32, uint32_t)
1212 PMSUM(vpmsumw, u32, u64, uint64_t)
1214 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1217 #ifdef CONFIG_INT128
1218 int i, j;
1219 __uint128_t prod[2];
1221 VECTOR_FOR_INORDER_I(i, u64) {
1222 prod[i] = 0;
1223 for (j = 0; j < 64; j++) {
1224 if (a->u64[i] & (1ull << j)) {
1225 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1230 r->u128 = prod[0] ^ prod[1];
1232 #else
1233 int i, j;
1234 ppc_avr_t prod[2];
1236 VECTOR_FOR_INORDER_I(i, u64) {
1237 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1238 for (j = 0; j < 64; j++) {
1239 if (a->u64[i] & (1ull << j)) {
1240 ppc_avr_t bshift;
1241 if (j == 0) {
1242 bshift.VsrD(0) = 0;
1243 bshift.VsrD(1) = b->u64[i];
1244 } else {
1245 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1246 bshift.VsrD(1) = b->u64[i] << j;
1248 prod[i].VsrD(1) ^= bshift.VsrD(1);
1249 prod[i].VsrD(0) ^= bshift.VsrD(0);
1254 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1255 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1256 #endif
1260 #if defined(HOST_WORDS_BIGENDIAN)
1261 #define PKBIG 1
1262 #else
1263 #define PKBIG 0
1264 #endif
1265 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1267 int i, j;
1268 ppc_avr_t result;
1269 #if defined(HOST_WORDS_BIGENDIAN)
1270 const ppc_avr_t *x[2] = { a, b };
1271 #else
1272 const ppc_avr_t *x[2] = { b, a };
1273 #endif
1275 VECTOR_FOR_INORDER_I(i, u64) {
1276 VECTOR_FOR_INORDER_I(j, u32) {
1277 uint32_t e = x[i]->u32[j];
1279 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1280 ((e >> 6) & 0x3e0) |
1281 ((e >> 3) & 0x1f));
1284 *r = result;
1287 #define VPK(suffix, from, to, cvt, dosat) \
1288 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1289 ppc_avr_t *a, ppc_avr_t *b) \
1291 int i; \
1292 int sat = 0; \
1293 ppc_avr_t result; \
1294 ppc_avr_t *a0 = PKBIG ? a : b; \
1295 ppc_avr_t *a1 = PKBIG ? b : a; \
1297 VECTOR_FOR_INORDER_I(i, from) { \
1298 result.to[i] = cvt(a0->from[i], &sat); \
1299 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1301 *r = result; \
1302 if (dosat && sat) { \
1303 set_vscr_sat(env); \
1306 #define I(x, y) (x)
1307 VPK(shss, s16, s8, cvtshsb, 1)
1308 VPK(shus, s16, u8, cvtshub, 1)
1309 VPK(swss, s32, s16, cvtswsh, 1)
1310 VPK(swus, s32, u16, cvtswuh, 1)
1311 VPK(sdss, s64, s32, cvtsdsw, 1)
1312 VPK(sdus, s64, u32, cvtsduw, 1)
1313 VPK(uhus, u16, u8, cvtuhub, 1)
1314 VPK(uwus, u32, u16, cvtuwuh, 1)
1315 VPK(udus, u64, u32, cvtuduw, 1)
1316 VPK(uhum, u16, u8, I, 0)
1317 VPK(uwum, u32, u16, I, 0)
1318 VPK(udum, u64, u32, I, 0)
1319 #undef I
1320 #undef VPK
1321 #undef PKBIG
1323 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1325 int i;
1327 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1328 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1332 #define VRFI(suffix, rounding) \
1333 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1334 ppc_avr_t *b) \
1336 int i; \
1337 float_status s = env->vec_status; \
1339 set_float_rounding_mode(rounding, &s); \
1340 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1341 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1344 VRFI(n, float_round_nearest_even)
1345 VRFI(m, float_round_down)
1346 VRFI(p, float_round_up)
1347 VRFI(z, float_round_to_zero)
1348 #undef VRFI
1350 #define VROTATE(suffix, element, mask) \
1351 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1353 int i; \
1355 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1356 unsigned int shift = b->element[i] & mask; \
1357 r->element[i] = (a->element[i] << shift) | \
1358 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1361 VROTATE(b, u8, 0x7)
1362 VROTATE(h, u16, 0xF)
1363 VROTATE(w, u32, 0x1F)
1364 VROTATE(d, u64, 0x3F)
1365 #undef VROTATE
1367 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1369 int i;
1371 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1372 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1374 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1378 #define VRLMI(name, size, element, insert) \
1379 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1381 int i; \
1382 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1383 uint##size##_t src1 = a->element[i]; \
1384 uint##size##_t src2 = b->element[i]; \
1385 uint##size##_t src3 = r->element[i]; \
1386 uint##size##_t begin, end, shift, mask, rot_val; \
1388 shift = extract##size(src2, 0, 6); \
1389 end = extract##size(src2, 8, 6); \
1390 begin = extract##size(src2, 16, 6); \
1391 rot_val = rol##size(src1, shift); \
1392 mask = mask_u##size(begin, end); \
1393 if (insert) { \
1394 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1395 } else { \
1396 r->element[i] = (rot_val & mask); \
1401 VRLMI(vrldmi, 64, u64, 1);
1402 VRLMI(vrlwmi, 32, u32, 1);
1403 VRLMI(vrldnm, 64, u64, 0);
1404 VRLMI(vrlwnm, 32, u32, 0);
1406 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1407 ppc_avr_t *c)
1409 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1410 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1413 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1415 int i;
1417 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1418 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1422 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1424 int i;
1426 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1427 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1431 #if defined(HOST_WORDS_BIGENDIAN)
1432 #define VEXTU_X_DO(name, size, left) \
1433 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1435 int index; \
1436 if (left) { \
1437 index = (a & 0xf) * 8; \
1438 } else { \
1439 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1441 return int128_getlo(int128_rshift(b->s128, index)) & \
1442 MAKE_64BIT_MASK(0, size); \
1444 #else
1445 #define VEXTU_X_DO(name, size, left) \
1446 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1448 int index; \
1449 if (left) { \
1450 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1451 } else { \
1452 index = (a & 0xf) * 8; \
1454 return int128_getlo(int128_rshift(b->s128, index)) & \
1455 MAKE_64BIT_MASK(0, size); \
1457 #endif
1459 VEXTU_X_DO(vextublx, 8, 1)
1460 VEXTU_X_DO(vextuhlx, 16, 1)
1461 VEXTU_X_DO(vextuwlx, 32, 1)
1462 VEXTU_X_DO(vextubrx, 8, 0)
1463 VEXTU_X_DO(vextuhrx, 16, 0)
1464 VEXTU_X_DO(vextuwrx, 32, 0)
1465 #undef VEXTU_X_DO
1467 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1469 int i;
1470 unsigned int shift, bytes, size;
1472 size = ARRAY_SIZE(r->u8);
1473 for (i = 0; i < size; i++) {
1474 shift = b->VsrB(i) & 0x7; /* extract shift value */
1475 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1476 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1477 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1481 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1483 int i;
1484 unsigned int shift, bytes;
1487 * Use reverse order, as destination and source register can be
1488 * same. Its being modified in place saving temporary, reverse
1489 * order will guarantee that computed result is not fed back.
1491 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1492 shift = b->VsrB(i) & 0x7; /* extract shift value */
1493 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1494 /* extract adjacent bytes */
1495 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1499 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1501 int sh = shift & 0xf;
1502 int i;
1503 ppc_avr_t result;
1505 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1506 int index = sh + i;
1507 if (index > 0xf) {
1508 result.VsrB(i) = b->VsrB(index - 0x10);
1509 } else {
1510 result.VsrB(i) = a->VsrB(index);
1513 *r = result;
1516 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1518 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1520 #if defined(HOST_WORDS_BIGENDIAN)
1521 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1522 memset(&r->u8[16 - sh], 0, sh);
1523 #else
1524 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1525 memset(&r->u8[0], 0, sh);
1526 #endif
1529 #if defined(HOST_WORDS_BIGENDIAN)
1530 #define VINSERT(suffix, element) \
1531 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1533 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1534 sizeof(r->element[0])); \
1536 #else
1537 #define VINSERT(suffix, element) \
1538 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1540 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1541 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1543 #endif
1544 VINSERT(b, u8)
1545 VINSERT(h, u16)
1546 VINSERT(w, u32)
1547 VINSERT(d, u64)
1548 #undef VINSERT
1549 #if defined(HOST_WORDS_BIGENDIAN)
1550 #define VEXTRACT(suffix, element) \
1551 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1553 uint32_t es = sizeof(r->element[0]); \
1554 memmove(&r->u8[8 - es], &b->u8[index], es); \
1555 memset(&r->u8[8], 0, 8); \
1556 memset(&r->u8[0], 0, 8 - es); \
1558 #else
1559 #define VEXTRACT(suffix, element) \
1560 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1562 uint32_t es = sizeof(r->element[0]); \
1563 uint32_t s = (16 - index) - es; \
1564 memmove(&r->u8[8], &b->u8[s], es); \
1565 memset(&r->u8[0], 0, 8); \
1566 memset(&r->u8[8 + es], 0, 8 - es); \
1568 #endif
1569 VEXTRACT(ub, u8)
1570 VEXTRACT(uh, u16)
1571 VEXTRACT(uw, u32)
1572 VEXTRACT(d, u64)
1573 #undef VEXTRACT
1575 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1576 ppc_vsr_t *xb, uint32_t index)
1578 ppc_vsr_t t = { };
1579 size_t es = sizeof(uint32_t);
1580 uint32_t ext_index;
1581 int i;
1583 ext_index = index;
1584 for (i = 0; i < es; i++, ext_index++) {
1585 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1588 *xt = t;
1591 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1592 ppc_vsr_t *xb, uint32_t index)
1594 ppc_vsr_t t = *xt;
1595 size_t es = sizeof(uint32_t);
1596 int ins_index, i = 0;
1598 ins_index = index;
1599 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1600 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1603 *xt = t;
1606 #define VEXT_SIGNED(name, element, cast) \
1607 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1609 int i; \
1610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1611 r->element[i] = (cast)b->element[i]; \
1614 VEXT_SIGNED(vextsb2w, s32, int8_t)
1615 VEXT_SIGNED(vextsb2d, s64, int8_t)
1616 VEXT_SIGNED(vextsh2w, s32, int16_t)
1617 VEXT_SIGNED(vextsh2d, s64, int16_t)
1618 VEXT_SIGNED(vextsw2d, s64, int32_t)
1619 #undef VEXT_SIGNED
1621 #define VNEG(name, element) \
1622 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1624 int i; \
1625 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1626 r->element[i] = -b->element[i]; \
1629 VNEG(vnegw, s32)
1630 VNEG(vnegd, s64)
1631 #undef VNEG
1633 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1635 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1637 #if defined(HOST_WORDS_BIGENDIAN)
1638 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1639 memset(&r->u8[0], 0, sh);
1640 #else
1641 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1642 memset(&r->u8[16 - sh], 0, sh);
1643 #endif
1646 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1648 int i;
1650 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1651 r->u32[i] = a->u32[i] >= b->u32[i];
1655 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1657 int64_t t;
1658 int i, upper;
1659 ppc_avr_t result;
1660 int sat = 0;
1662 upper = ARRAY_SIZE(r->s32) - 1;
1663 t = (int64_t)b->VsrSW(upper);
1664 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1665 t += a->VsrSW(i);
1666 result.VsrSW(i) = 0;
1668 result.VsrSW(upper) = cvtsdsw(t, &sat);
1669 *r = result;
1671 if (sat) {
1672 set_vscr_sat(env);
1676 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1678 int i, j, upper;
1679 ppc_avr_t result;
1680 int sat = 0;
1682 upper = 1;
1683 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1684 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1686 result.VsrD(i) = 0;
1687 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1688 t += a->VsrSW(2 * i + j);
1690 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1693 *r = result;
1694 if (sat) {
1695 set_vscr_sat(env);
1699 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1701 int i, j;
1702 int sat = 0;
1704 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1705 int64_t t = (int64_t)b->s32[i];
1707 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1708 t += a->s8[4 * i + j];
1710 r->s32[i] = cvtsdsw(t, &sat);
1713 if (sat) {
1714 set_vscr_sat(env);
1718 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1720 int sat = 0;
1721 int i;
1723 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1724 int64_t t = (int64_t)b->s32[i];
1726 t += a->s16[2 * i] + a->s16[2 * i + 1];
1727 r->s32[i] = cvtsdsw(t, &sat);
1730 if (sat) {
1731 set_vscr_sat(env);
1735 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1737 int i, j;
1738 int sat = 0;
1740 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1741 uint64_t t = (uint64_t)b->u32[i];
1743 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1744 t += a->u8[4 * i + j];
1746 r->u32[i] = cvtuduw(t, &sat);
1749 if (sat) {
1750 set_vscr_sat(env);
1754 #if defined(HOST_WORDS_BIGENDIAN)
1755 #define UPKHI 1
1756 #define UPKLO 0
1757 #else
1758 #define UPKHI 0
1759 #define UPKLO 1
1760 #endif
1761 #define VUPKPX(suffix, hi) \
1762 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1764 int i; \
1765 ppc_avr_t result; \
1767 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1768 uint16_t e = b->u16[hi ? i : i + 4]; \
1769 uint8_t a = (e >> 15) ? 0xff : 0; \
1770 uint8_t r = (e >> 10) & 0x1f; \
1771 uint8_t g = (e >> 5) & 0x1f; \
1772 uint8_t b = e & 0x1f; \
1774 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1776 *r = result; \
1778 VUPKPX(lpx, UPKLO)
1779 VUPKPX(hpx, UPKHI)
1780 #undef VUPKPX
1782 #define VUPK(suffix, unpacked, packee, hi) \
1783 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1785 int i; \
1786 ppc_avr_t result; \
1788 if (hi) { \
1789 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1790 result.unpacked[i] = b->packee[i]; \
1792 } else { \
1793 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1794 i++) { \
1795 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1798 *r = result; \
1800 VUPK(hsb, s16, s8, UPKHI)
1801 VUPK(hsh, s32, s16, UPKHI)
1802 VUPK(hsw, s64, s32, UPKHI)
1803 VUPK(lsb, s16, s8, UPKLO)
1804 VUPK(lsh, s32, s16, UPKLO)
1805 VUPK(lsw, s64, s32, UPKLO)
1806 #undef VUPK
1807 #undef UPKHI
1808 #undef UPKLO
1810 #define VGENERIC_DO(name, element) \
1811 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1813 int i; \
1815 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1816 r->element[i] = name(b->element[i]); \
1820 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1821 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1822 #define clzw(v) clz32((v))
1824 VGENERIC_DO(clzb, u8)
1825 VGENERIC_DO(clzh, u16)
1826 VGENERIC_DO(clzw, u32)
1828 #undef clzb
1829 #undef clzh
1830 #undef clzw
1832 #define ctzb(v) ((v) ? ctz32(v) : 8)
1833 #define ctzh(v) ((v) ? ctz32(v) : 16)
1834 #define ctzw(v) ctz32((v))
1835 #define ctzd(v) ctz64((v))
1837 VGENERIC_DO(ctzb, u8)
1838 VGENERIC_DO(ctzh, u16)
1839 VGENERIC_DO(ctzw, u32)
1840 VGENERIC_DO(ctzd, u64)
1842 #undef ctzb
1843 #undef ctzh
1844 #undef ctzw
1845 #undef ctzd
1847 #define popcntb(v) ctpop8(v)
1848 #define popcnth(v) ctpop16(v)
1849 #define popcntw(v) ctpop32(v)
1850 #define popcntd(v) ctpop64(v)
1852 VGENERIC_DO(popcntb, u8)
1853 VGENERIC_DO(popcnth, u16)
1854 VGENERIC_DO(popcntw, u32)
1855 VGENERIC_DO(popcntd, u64)
1857 #undef popcntb
1858 #undef popcnth
1859 #undef popcntw
1860 #undef popcntd
1862 #undef VGENERIC_DO
1864 #if defined(HOST_WORDS_BIGENDIAN)
1865 #define QW_ONE { .u64 = { 0, 1 } }
1866 #else
1867 #define QW_ONE { .u64 = { 1, 0 } }
1868 #endif
1870 #ifndef CONFIG_INT128
1872 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1874 t->u64[0] = ~a.u64[0];
1875 t->u64[1] = ~a.u64[1];
1878 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1880 if (a.VsrD(0) < b.VsrD(0)) {
1881 return -1;
1882 } else if (a.VsrD(0) > b.VsrD(0)) {
1883 return 1;
1884 } else if (a.VsrD(1) < b.VsrD(1)) {
1885 return -1;
1886 } else if (a.VsrD(1) > b.VsrD(1)) {
1887 return 1;
1888 } else {
1889 return 0;
1893 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1895 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1896 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1897 (~a.VsrD(1) < b.VsrD(1));
1900 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1902 ppc_avr_t not_a;
1903 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1904 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1905 (~a.VsrD(1) < b.VsrD(1));
1906 avr_qw_not(&not_a, a);
1907 return avr_qw_cmpu(not_a, b) < 0;
1910 #endif
1912 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1914 #ifdef CONFIG_INT128
1915 r->u128 = a->u128 + b->u128;
1916 #else
1917 avr_qw_add(r, *a, *b);
1918 #endif
1921 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1923 #ifdef CONFIG_INT128
1924 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1925 #else
1927 if (c->VsrD(1) & 1) {
1928 ppc_avr_t tmp;
1930 tmp.VsrD(0) = 0;
1931 tmp.VsrD(1) = c->VsrD(1) & 1;
1932 avr_qw_add(&tmp, *a, tmp);
1933 avr_qw_add(r, tmp, *b);
1934 } else {
1935 avr_qw_add(r, *a, *b);
1937 #endif
1940 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1942 #ifdef CONFIG_INT128
1943 r->u128 = (~a->u128 < b->u128);
1944 #else
1945 ppc_avr_t not_a;
1947 avr_qw_not(&not_a, *a);
1949 r->VsrD(0) = 0;
1950 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1951 #endif
1954 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1956 #ifdef CONFIG_INT128
1957 int carry_out = (~a->u128 < b->u128);
1958 if (!carry_out && (c->u128 & 1)) {
1959 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1960 ((a->u128 != 0) || (b->u128 != 0));
1962 r->u128 = carry_out;
1963 #else
1965 int carry_in = c->VsrD(1) & 1;
1966 int carry_out = 0;
1967 ppc_avr_t tmp;
1969 carry_out = avr_qw_addc(&tmp, *a, *b);
1971 if (!carry_out && carry_in) {
1972 ppc_avr_t one = QW_ONE;
1973 carry_out = avr_qw_addc(&tmp, tmp, one);
1975 r->VsrD(0) = 0;
1976 r->VsrD(1) = carry_out;
1977 #endif
1980 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1982 #ifdef CONFIG_INT128
1983 r->u128 = a->u128 - b->u128;
1984 #else
1985 ppc_avr_t tmp;
1986 ppc_avr_t one = QW_ONE;
1988 avr_qw_not(&tmp, *b);
1989 avr_qw_add(&tmp, *a, tmp);
1990 avr_qw_add(r, tmp, one);
1991 #endif
1994 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1996 #ifdef CONFIG_INT128
1997 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1998 #else
1999 ppc_avr_t tmp, sum;
2001 avr_qw_not(&tmp, *b);
2002 avr_qw_add(&sum, *a, tmp);
2004 tmp.VsrD(0) = 0;
2005 tmp.VsrD(1) = c->VsrD(1) & 1;
2006 avr_qw_add(r, sum, tmp);
2007 #endif
2010 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2012 #ifdef CONFIG_INT128
2013 r->u128 = (~a->u128 < ~b->u128) ||
2014 (a->u128 + ~b->u128 == (__uint128_t)-1);
2015 #else
2016 int carry = (avr_qw_cmpu(*a, *b) > 0);
2017 if (!carry) {
2018 ppc_avr_t tmp;
2019 avr_qw_not(&tmp, *b);
2020 avr_qw_add(&tmp, *a, tmp);
2021 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2023 r->VsrD(0) = 0;
2024 r->VsrD(1) = carry;
2025 #endif
2028 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2030 #ifdef CONFIG_INT128
2031 r->u128 =
2032 (~a->u128 < ~b->u128) ||
2033 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2034 #else
2035 int carry_in = c->VsrD(1) & 1;
2036 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2037 if (!carry_out && carry_in) {
2038 ppc_avr_t tmp;
2039 avr_qw_not(&tmp, *b);
2040 avr_qw_add(&tmp, *a, tmp);
2041 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2044 r->VsrD(0) = 0;
2045 r->VsrD(1) = carry_out;
2046 #endif
2049 #define BCD_PLUS_PREF_1 0xC
2050 #define BCD_PLUS_PREF_2 0xF
2051 #define BCD_PLUS_ALT_1 0xA
2052 #define BCD_NEG_PREF 0xD
2053 #define BCD_NEG_ALT 0xB
2054 #define BCD_PLUS_ALT_2 0xE
2055 #define NATIONAL_PLUS 0x2B
2056 #define NATIONAL_NEG 0x2D
2058 #if defined(HOST_WORDS_BIGENDIAN)
2059 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2060 #else
2061 #define BCD_DIG_BYTE(n) ((n) / 2)
2062 #endif
2064 static int bcd_get_sgn(ppc_avr_t *bcd)
2066 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2067 case BCD_PLUS_PREF_1:
2068 case BCD_PLUS_PREF_2:
2069 case BCD_PLUS_ALT_1:
2070 case BCD_PLUS_ALT_2:
2072 return 1;
2075 case BCD_NEG_PREF:
2076 case BCD_NEG_ALT:
2078 return -1;
2081 default:
2083 return 0;
2088 static int bcd_preferred_sgn(int sgn, int ps)
2090 if (sgn >= 0) {
2091 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2092 } else {
2093 return BCD_NEG_PREF;
2097 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2099 uint8_t result;
2100 if (n & 1) {
2101 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2102 } else {
2103 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2106 if (unlikely(result > 9)) {
2107 *invalid = true;
2109 return result;
2112 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2114 if (n & 1) {
2115 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2116 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4);
2117 } else {
2118 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2119 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2123 static bool bcd_is_valid(ppc_avr_t *bcd)
2125 int i;
2126 int invalid = 0;
2128 if (bcd_get_sgn(bcd) == 0) {
2129 return false;
2132 for (i = 1; i < 32; i++) {
2133 bcd_get_digit(bcd, i, &invalid);
2134 if (unlikely(invalid)) {
2135 return false;
2138 return true;
2141 static int bcd_cmp_zero(ppc_avr_t *bcd)
2143 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2144 return CRF_EQ;
2145 } else {
2146 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2150 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2152 return reg->VsrH(7 - n);
2155 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2157 reg->VsrH(7 - n) = val;
2160 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2162 int i;
2163 int invalid = 0;
2164 for (i = 31; i > 0; i--) {
2165 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2166 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2167 if (unlikely(invalid)) {
2168 return 0; /* doesn't matter */
2169 } else if (dig_a > dig_b) {
2170 return 1;
2171 } else if (dig_a < dig_b) {
2172 return -1;
2176 return 0;
2179 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2180 int *overflow)
2182 int carry = 0;
2183 int i;
2184 for (i = 1; i <= 31; i++) {
2185 uint8_t digit = bcd_get_digit(a, i, invalid) +
2186 bcd_get_digit(b, i, invalid) + carry;
2187 if (digit > 9) {
2188 carry = 1;
2189 digit -= 10;
2190 } else {
2191 carry = 0;
2194 bcd_put_digit(t, digit, i);
2197 *overflow = carry;
2200 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2201 int *overflow)
2203 int carry = 0;
2204 int i;
2206 for (i = 1; i <= 31; i++) {
2207 uint8_t digit = bcd_get_digit(a, i, invalid) -
2208 bcd_get_digit(b, i, invalid) + carry;
2209 if (digit & 0x80) {
2210 carry = -1;
2211 digit += 10;
2212 } else {
2213 carry = 0;
2216 bcd_put_digit(t, digit, i);
2219 *overflow = carry;
2222 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2225 int sgna = bcd_get_sgn(a);
2226 int sgnb = bcd_get_sgn(b);
2227 int invalid = (sgna == 0) || (sgnb == 0);
2228 int overflow = 0;
2229 uint32_t cr = 0;
2230 ppc_avr_t result = { .u64 = { 0, 0 } };
2232 if (!invalid) {
2233 if (sgna == sgnb) {
2234 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2235 bcd_add_mag(&result, a, b, &invalid, &overflow);
2236 cr = bcd_cmp_zero(&result);
2237 } else {
2238 int magnitude = bcd_cmp_mag(a, b);
2239 if (magnitude > 0) {
2240 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2241 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2242 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2243 } else if (magnitude < 0) {
2244 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2245 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2246 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2247 } else {
2248 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2249 cr = CRF_EQ;
2254 if (unlikely(invalid)) {
2255 result.VsrD(0) = result.VsrD(1) = -1;
2256 cr = CRF_SO;
2257 } else if (overflow) {
2258 cr |= CRF_SO;
2261 *r = result;
2263 return cr;
2266 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2268 ppc_avr_t bcopy = *b;
2269 int sgnb = bcd_get_sgn(b);
2270 if (sgnb < 0) {
2271 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2272 } else if (sgnb > 0) {
2273 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2275 /* else invalid ... defer to bcdadd code for proper handling */
2277 return helper_bcdadd(r, a, &bcopy, ps);
2280 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2282 int i;
2283 int cr = 0;
2284 uint16_t national = 0;
2285 uint16_t sgnb = get_national_digit(b, 0);
2286 ppc_avr_t ret = { .u64 = { 0, 0 } };
2287 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2289 for (i = 1; i < 8; i++) {
2290 national = get_national_digit(b, i);
2291 if (unlikely(national < 0x30 || national > 0x39)) {
2292 invalid = 1;
2293 break;
2296 bcd_put_digit(&ret, national & 0xf, i);
2299 if (sgnb == NATIONAL_PLUS) {
2300 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2301 } else {
2302 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2305 cr = bcd_cmp_zero(&ret);
2307 if (unlikely(invalid)) {
2308 cr = CRF_SO;
2311 *r = ret;
2313 return cr;
2316 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2318 int i;
2319 int cr = 0;
2320 int sgnb = bcd_get_sgn(b);
2321 int invalid = (sgnb == 0);
2322 ppc_avr_t ret = { .u64 = { 0, 0 } };
2324 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2326 for (i = 1; i < 8; i++) {
2327 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2329 if (unlikely(invalid)) {
2330 break;
2333 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2335 cr = bcd_cmp_zero(b);
2337 if (ox_flag) {
2338 cr |= CRF_SO;
2341 if (unlikely(invalid)) {
2342 cr = CRF_SO;
2345 *r = ret;
2347 return cr;
2350 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2352 int i;
2353 int cr = 0;
2354 int invalid = 0;
2355 int zone_digit = 0;
2356 int zone_lead = ps ? 0xF : 0x3;
2357 int digit = 0;
2358 ppc_avr_t ret = { .u64 = { 0, 0 } };
2359 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2361 if (unlikely((sgnb < 0xA) && ps)) {
2362 invalid = 1;
2365 for (i = 0; i < 16; i++) {
2366 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2367 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2368 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2369 invalid = 1;
2370 break;
2373 bcd_put_digit(&ret, digit, i + 1);
2376 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2377 (!ps && (sgnb & 0x4))) {
2378 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2379 } else {
2380 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2383 cr = bcd_cmp_zero(&ret);
2385 if (unlikely(invalid)) {
2386 cr = CRF_SO;
2389 *r = ret;
2391 return cr;
2394 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2396 int i;
2397 int cr = 0;
2398 uint8_t digit = 0;
2399 int sgnb = bcd_get_sgn(b);
2400 int zone_lead = (ps) ? 0xF0 : 0x30;
2401 int invalid = (sgnb == 0);
2402 ppc_avr_t ret = { .u64 = { 0, 0 } };
2404 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2406 for (i = 0; i < 16; i++) {
2407 digit = bcd_get_digit(b, i + 1, &invalid);
2409 if (unlikely(invalid)) {
2410 break;
2413 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2416 if (ps) {
2417 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2418 } else {
2419 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2422 cr = bcd_cmp_zero(b);
2424 if (ox_flag) {
2425 cr |= CRF_SO;
2428 if (unlikely(invalid)) {
2429 cr = CRF_SO;
2432 *r = ret;
2434 return cr;
2437 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2439 int i;
2440 int cr = 0;
2441 uint64_t lo_value;
2442 uint64_t hi_value;
2443 ppc_avr_t ret = { .u64 = { 0, 0 } };
2445 if (b->VsrSD(0) < 0) {
2446 lo_value = -b->VsrSD(1);
2447 hi_value = ~b->VsrD(0) + !lo_value;
2448 bcd_put_digit(&ret, 0xD, 0);
2449 } else {
2450 lo_value = b->VsrD(1);
2451 hi_value = b->VsrD(0);
2452 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2455 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2456 lo_value > 9999999999999999ULL) {
2457 cr = CRF_SO;
2460 for (i = 1; i < 16; hi_value /= 10, i++) {
2461 bcd_put_digit(&ret, hi_value % 10, i);
2464 for (; i < 32; lo_value /= 10, i++) {
2465 bcd_put_digit(&ret, lo_value % 10, i);
2468 cr |= bcd_cmp_zero(&ret);
2470 *r = ret;
2472 return cr;
2475 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2477 uint8_t i;
2478 int cr;
2479 uint64_t carry;
2480 uint64_t unused;
2481 uint64_t lo_value;
2482 uint64_t hi_value = 0;
2483 int sgnb = bcd_get_sgn(b);
2484 int invalid = (sgnb == 0);
2486 lo_value = bcd_get_digit(b, 31, &invalid);
2487 for (i = 30; i > 0; i--) {
2488 mulu64(&lo_value, &carry, lo_value, 10ULL);
2489 mulu64(&hi_value, &unused, hi_value, 10ULL);
2490 lo_value += bcd_get_digit(b, i, &invalid);
2491 hi_value += carry;
2493 if (unlikely(invalid)) {
2494 break;
2498 if (sgnb == -1) {
2499 r->VsrSD(1) = -lo_value;
2500 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2501 } else {
2502 r->VsrSD(1) = lo_value;
2503 r->VsrSD(0) = hi_value;
2506 cr = bcd_cmp_zero(b);
2508 if (unlikely(invalid)) {
2509 cr = CRF_SO;
2512 return cr;
2515 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2517 int i;
2518 int invalid = 0;
2520 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2521 return CRF_SO;
2524 *r = *a;
2525 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2527 for (i = 1; i < 32; i++) {
2528 bcd_get_digit(a, i, &invalid);
2529 bcd_get_digit(b, i, &invalid);
2530 if (unlikely(invalid)) {
2531 return CRF_SO;
2535 return bcd_cmp_zero(r);
2538 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2540 int sgnb = bcd_get_sgn(b);
2542 *r = *b;
2543 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2545 if (bcd_is_valid(b) == false) {
2546 return CRF_SO;
2549 return bcd_cmp_zero(r);
2552 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2554 int cr;
2555 #if defined(HOST_WORDS_BIGENDIAN)
2556 int i = a->s8[7];
2557 #else
2558 int i = a->s8[8];
2559 #endif
2560 bool ox_flag = false;
2561 int sgnb = bcd_get_sgn(b);
2562 ppc_avr_t ret = *b;
2563 ret.VsrD(1) &= ~0xf;
2565 if (bcd_is_valid(b) == false) {
2566 return CRF_SO;
2569 if (unlikely(i > 31)) {
2570 i = 31;
2571 } else if (unlikely(i < -31)) {
2572 i = -31;
2575 if (i > 0) {
2576 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2577 } else {
2578 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2580 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2582 *r = ret;
2584 cr = bcd_cmp_zero(r);
2585 if (ox_flag) {
2586 cr |= CRF_SO;
2589 return cr;
2592 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2594 int cr;
2595 int i;
2596 int invalid = 0;
2597 bool ox_flag = false;
2598 ppc_avr_t ret = *b;
2600 for (i = 0; i < 32; i++) {
2601 bcd_get_digit(b, i, &invalid);
2603 if (unlikely(invalid)) {
2604 return CRF_SO;
2608 #if defined(HOST_WORDS_BIGENDIAN)
2609 i = a->s8[7];
2610 #else
2611 i = a->s8[8];
2612 #endif
2613 if (i >= 32) {
2614 ox_flag = true;
2615 ret.VsrD(1) = ret.VsrD(0) = 0;
2616 } else if (i <= -32) {
2617 ret.VsrD(1) = ret.VsrD(0) = 0;
2618 } else if (i > 0) {
2619 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2620 } else {
2621 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2623 *r = ret;
2625 cr = bcd_cmp_zero(r);
2626 if (ox_flag) {
2627 cr |= CRF_SO;
2630 return cr;
2633 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2635 int cr;
2636 int unused = 0;
2637 int invalid = 0;
2638 bool ox_flag = false;
2639 int sgnb = bcd_get_sgn(b);
2640 ppc_avr_t ret = *b;
2641 ret.VsrD(1) &= ~0xf;
2643 #if defined(HOST_WORDS_BIGENDIAN)
2644 int i = a->s8[7];
2645 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
2646 #else
2647 int i = a->s8[8];
2648 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
2649 #endif
2651 if (bcd_is_valid(b) == false) {
2652 return CRF_SO;
2655 if (unlikely(i > 31)) {
2656 i = 31;
2657 } else if (unlikely(i < -31)) {
2658 i = -31;
2661 if (i > 0) {
2662 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2663 } else {
2664 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2666 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2667 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2670 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2672 cr = bcd_cmp_zero(&ret);
2673 if (ox_flag) {
2674 cr |= CRF_SO;
2676 *r = ret;
2678 return cr;
2681 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2683 uint64_t mask;
2684 uint32_t ox_flag = 0;
2685 #if defined(HOST_WORDS_BIGENDIAN)
2686 int i = a->s16[3] + 1;
2687 #else
2688 int i = a->s16[4] + 1;
2689 #endif
2690 ppc_avr_t ret = *b;
2692 if (bcd_is_valid(b) == false) {
2693 return CRF_SO;
2696 if (i > 16 && i < 32) {
2697 mask = (uint64_t)-1 >> (128 - i * 4);
2698 if (ret.VsrD(0) & ~mask) {
2699 ox_flag = CRF_SO;
2702 ret.VsrD(0) &= mask;
2703 } else if (i >= 0 && i <= 16) {
2704 mask = (uint64_t)-1 >> (64 - i * 4);
2705 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2706 ox_flag = CRF_SO;
2709 ret.VsrD(1) &= mask;
2710 ret.VsrD(0) = 0;
2712 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2713 *r = ret;
2715 return bcd_cmp_zero(&ret) | ox_flag;
2718 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2720 int i;
2721 uint64_t mask;
2722 uint32_t ox_flag = 0;
2723 int invalid = 0;
2724 ppc_avr_t ret = *b;
2726 for (i = 0; i < 32; i++) {
2727 bcd_get_digit(b, i, &invalid);
2729 if (unlikely(invalid)) {
2730 return CRF_SO;
2734 #if defined(HOST_WORDS_BIGENDIAN)
2735 i = a->s16[3];
2736 #else
2737 i = a->s16[4];
2738 #endif
2739 if (i > 16 && i < 33) {
2740 mask = (uint64_t)-1 >> (128 - i * 4);
2741 if (ret.VsrD(0) & ~mask) {
2742 ox_flag = CRF_SO;
2745 ret.VsrD(0) &= mask;
2746 } else if (i > 0 && i <= 16) {
2747 mask = (uint64_t)-1 >> (64 - i * 4);
2748 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2749 ox_flag = CRF_SO;
2752 ret.VsrD(1) &= mask;
2753 ret.VsrD(0) = 0;
2754 } else if (i == 0) {
2755 if (ret.VsrD(0) || ret.VsrD(1)) {
2756 ox_flag = CRF_SO;
2758 ret.VsrD(0) = ret.VsrD(1) = 0;
2761 *r = ret;
2762 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2763 return ox_flag | CRF_EQ;
2766 return ox_flag | CRF_GT;
2769 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2771 int i;
2772 VECTOR_FOR_INORDER_I(i, u8) {
2773 r->u8[i] = AES_sbox[a->u8[i]];
2777 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2779 ppc_avr_t result;
2780 int i;
2782 VECTOR_FOR_INORDER_I(i, u32) {
2783 result.VsrW(i) = b->VsrW(i) ^
2784 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2785 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2786 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2787 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2789 *r = result;
2792 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2794 ppc_avr_t result;
2795 int i;
2797 VECTOR_FOR_INORDER_I(i, u8) {
2798 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2800 *r = result;
2803 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2805 /* This differs from what is written in ISA V2.07. The RTL is */
2806 /* incorrect and will be fixed in V2.07B. */
2807 int i;
2808 ppc_avr_t tmp;
2810 VECTOR_FOR_INORDER_I(i, u8) {
2811 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2814 VECTOR_FOR_INORDER_I(i, u32) {
2815 r->VsrW(i) =
2816 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2817 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2818 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2819 AES_imc[tmp.VsrB(4 * i + 3)][3];
2823 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2825 ppc_avr_t result;
2826 int i;
2828 VECTOR_FOR_INORDER_I(i, u8) {
2829 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2831 *r = result;
2834 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2836 int st = (st_six & 0x10) != 0;
2837 int six = st_six & 0xF;
2838 int i;
2840 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2841 if (st == 0) {
2842 if ((six & (0x8 >> i)) == 0) {
2843 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2844 ror32(a->VsrW(i), 18) ^
2845 (a->VsrW(i) >> 3);
2846 } else { /* six.bit[i] == 1 */
2847 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2848 ror32(a->VsrW(i), 19) ^
2849 (a->VsrW(i) >> 10);
2851 } else { /* st == 1 */
2852 if ((six & (0x8 >> i)) == 0) {
2853 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2854 ror32(a->VsrW(i), 13) ^
2855 ror32(a->VsrW(i), 22);
2856 } else { /* six.bit[i] == 1 */
2857 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2858 ror32(a->VsrW(i), 11) ^
2859 ror32(a->VsrW(i), 25);
2865 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2867 int st = (st_six & 0x10) != 0;
2868 int six = st_six & 0xF;
2869 int i;
2871 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2872 if (st == 0) {
2873 if ((six & (0x8 >> (2 * i))) == 0) {
2874 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2875 ror64(a->VsrD(i), 8) ^
2876 (a->VsrD(i) >> 7);
2877 } else { /* six.bit[2*i] == 1 */
2878 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2879 ror64(a->VsrD(i), 61) ^
2880 (a->VsrD(i) >> 6);
2882 } else { /* st == 1 */
2883 if ((six & (0x8 >> (2 * i))) == 0) {
2884 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2885 ror64(a->VsrD(i), 34) ^
2886 ror64(a->VsrD(i), 39);
2887 } else { /* six.bit[2*i] == 1 */
2888 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2889 ror64(a->VsrD(i), 18) ^
2890 ror64(a->VsrD(i), 41);
2896 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2898 ppc_avr_t result;
2899 int i;
2901 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2902 int indexA = c->VsrB(i) >> 4;
2903 int indexB = c->VsrB(i) & 0xF;
2905 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2907 *r = result;
2910 #undef VECTOR_FOR_INORDER_I
2912 /*****************************************************************************/
2913 /* SPE extension helpers */
2914 /* Use a table to make this quicker */
2915 static const uint8_t hbrev[16] = {
2916 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2917 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2920 static inline uint8_t byte_reverse(uint8_t val)
2922 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2925 static inline uint32_t word_reverse(uint32_t val)
2927 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2928 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2931 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2932 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2934 uint32_t a, b, d, mask;
2936 mask = UINT32_MAX >> (32 - MASKBITS);
2937 a = arg1 & mask;
2938 b = arg2 & mask;
2939 d = word_reverse(1 + word_reverse(a | ~b));
2940 return (arg1 & ~mask) | (d & b);
2943 uint32_t helper_cntlsw32(uint32_t val)
2945 if (val & 0x80000000) {
2946 return clz32(~val);
2947 } else {
2948 return clz32(val);
2952 uint32_t helper_cntlzw32(uint32_t val)
2954 return clz32(val);
2957 /* 440 specific */
2958 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2959 target_ulong low, uint32_t update_Rc)
2961 target_ulong mask;
2962 int i;
2964 i = 1;
2965 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2966 if ((high & mask) == 0) {
2967 if (update_Rc) {
2968 env->crf[0] = 0x4;
2970 goto done;
2972 i++;
2974 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2975 if ((low & mask) == 0) {
2976 if (update_Rc) {
2977 env->crf[0] = 0x8;
2979 goto done;
2981 i++;
2983 i = 8;
2984 if (update_Rc) {
2985 env->crf[0] = 0x2;
2987 done:
2988 env->xer = (env->xer & ~0x7F) | i;
2989 if (update_Rc) {
2990 env->crf[0] |= xer_so;
2992 return i;