spapr-vlan: Don't touch last entry in buffer list
[qemu/ar7.git] / target-ppc / int_helper.c
blobe5b103b0ec629fa82539a548da0cb2dec04e72bb
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "exec/helper-proto.h"
22 #include "qemu/aes.h"
24 #include "helper_regs.h"
25 /*****************************************************************************/
26 /* Fixed point operations helpers */
27 #if defined(TARGET_PPC64)
29 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
31 int64_t th;
32 uint64_t tl;
34 muls64(&tl, (uint64_t *)&th, arg1, arg2);
36 /* th should either contain all 1 bits or all 0 bits and should
37 * match the sign bit of tl; otherwise we have overflowed. */
39 if ((int64_t)tl < 0) {
40 if (likely(th == -1LL)) {
41 env->ov = 0;
42 } else {
43 env->so = env->ov = 1;
45 } else if (likely(th == 0LL)) {
46 env->ov = 0;
47 } else {
48 env->so = env->ov = 1;
51 return (int64_t)tl;
53 #endif
55 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
56 uint32_t oe)
58 uint64_t rt = 0;
59 int overflow = 0;
61 uint64_t dividend = (uint64_t)ra << 32;
62 uint64_t divisor = (uint32_t)rb;
64 if (unlikely(divisor == 0)) {
65 overflow = 1;
66 } else {
67 rt = dividend / divisor;
68 overflow = rt > UINT32_MAX;
71 if (unlikely(overflow)) {
72 rt = 0; /* Undefined */
75 if (oe) {
76 if (unlikely(overflow)) {
77 env->so = env->ov = 1;
78 } else {
79 env->ov = 0;
83 return (target_ulong)rt;
86 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
87 uint32_t oe)
89 int64_t rt = 0;
90 int overflow = 0;
92 int64_t dividend = (int64_t)ra << 32;
93 int64_t divisor = (int64_t)((int32_t)rb);
95 if (unlikely((divisor == 0) ||
96 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
97 overflow = 1;
98 } else {
99 rt = dividend / divisor;
100 overflow = rt != (int32_t)rt;
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
115 return (target_ulong)rt;
118 #if defined(TARGET_PPC64)
120 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
122 uint64_t rt = 0;
123 int overflow = 0;
125 overflow = divu128(&rt, &ra, rb);
127 if (unlikely(overflow)) {
128 rt = 0; /* Undefined */
131 if (oe) {
132 if (unlikely(overflow)) {
133 env->so = env->ov = 1;
134 } else {
135 env->ov = 0;
139 return rt;
142 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
144 int64_t rt = 0;
145 int64_t ra = (int64_t)rau;
146 int64_t rb = (int64_t)rbu;
147 int overflow = divs128(&rt, &ra, rb);
149 if (unlikely(overflow)) {
150 rt = 0; /* Undefined */
153 if (oe) {
155 if (unlikely(overflow)) {
156 env->so = env->ov = 1;
157 } else {
158 env->ov = 0;
162 return rt;
165 #endif
168 target_ulong helper_cntlzw(target_ulong t)
170 return clz32(t);
173 #if defined(TARGET_PPC64)
174 target_ulong helper_cntlzd(target_ulong t)
176 return clz64(t);
178 #endif
180 #if defined(TARGET_PPC64)
182 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
184 int i;
185 uint64_t ra = 0;
187 for (i = 0; i < 8; i++) {
188 int index = (rs >> (i*8)) & 0xFF;
189 if (index < 64) {
190 if (rb & (1ull << (63-index))) {
191 ra |= 1 << i;
195 return ra;
198 #endif
200 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
202 target_ulong mask = 0xff;
203 target_ulong ra = 0;
204 int i;
206 for (i = 0; i < sizeof(target_ulong); i++) {
207 if ((rs & mask) == (rb & mask)) {
208 ra |= mask;
210 mask <<= 8;
212 return ra;
215 /* shift right arithmetic helper */
216 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
217 target_ulong shift)
219 int32_t ret;
221 if (likely(!(shift & 0x20))) {
222 if (likely((uint32_t)shift != 0)) {
223 shift &= 0x1f;
224 ret = (int32_t)value >> shift;
225 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
226 env->ca = 0;
227 } else {
228 env->ca = 1;
230 } else {
231 ret = (int32_t)value;
232 env->ca = 0;
234 } else {
235 ret = (int32_t)value >> 31;
236 env->ca = (ret != 0);
238 return (target_long)ret;
241 #if defined(TARGET_PPC64)
242 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
243 target_ulong shift)
245 int64_t ret;
247 if (likely(!(shift & 0x40))) {
248 if (likely((uint64_t)shift != 0)) {
249 shift &= 0x3f;
250 ret = (int64_t)value >> shift;
251 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
252 env->ca = 0;
253 } else {
254 env->ca = 1;
256 } else {
257 ret = (int64_t)value;
258 env->ca = 0;
260 } else {
261 ret = (int64_t)value >> 63;
262 env->ca = (ret != 0);
264 return ret;
266 #endif
268 #if defined(TARGET_PPC64)
269 target_ulong helper_popcntb(target_ulong val)
271 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
272 0x5555555555555555ULL);
273 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
274 0x3333333333333333ULL);
275 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
276 0x0f0f0f0f0f0f0f0fULL);
277 return val;
280 target_ulong helper_popcntw(target_ulong val)
282 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
283 0x5555555555555555ULL);
284 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
285 0x3333333333333333ULL);
286 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
287 0x0f0f0f0f0f0f0f0fULL);
288 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
289 0x00ff00ff00ff00ffULL);
290 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
291 0x0000ffff0000ffffULL);
292 return val;
295 target_ulong helper_popcntd(target_ulong val)
297 return ctpop64(val);
299 #else
300 target_ulong helper_popcntb(target_ulong val)
302 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
303 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
304 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
305 return val;
308 target_ulong helper_popcntw(target_ulong val)
310 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
311 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
312 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
313 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
314 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
315 return val;
317 #endif
319 /*****************************************************************************/
320 /* PowerPC 601 specific instructions (POWER bridge) */
321 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
323 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
325 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
326 (int32_t)arg2 == 0) {
327 env->spr[SPR_MQ] = 0;
328 return INT32_MIN;
329 } else {
330 env->spr[SPR_MQ] = tmp % arg2;
331 return tmp / (int32_t)arg2;
335 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
336 target_ulong arg2)
338 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
340 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
341 (int32_t)arg2 == 0) {
342 env->so = env->ov = 1;
343 env->spr[SPR_MQ] = 0;
344 return INT32_MIN;
345 } else {
346 env->spr[SPR_MQ] = tmp % arg2;
347 tmp /= (int32_t)arg2;
348 if ((int32_t)tmp != tmp) {
349 env->so = env->ov = 1;
350 } else {
351 env->ov = 0;
353 return tmp;
357 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
358 target_ulong arg2)
360 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
361 (int32_t)arg2 == 0) {
362 env->spr[SPR_MQ] = 0;
363 return INT32_MIN;
364 } else {
365 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
366 return (int32_t)arg1 / (int32_t)arg2;
370 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
371 target_ulong arg2)
373 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
374 (int32_t)arg2 == 0) {
375 env->so = env->ov = 1;
376 env->spr[SPR_MQ] = 0;
377 return INT32_MIN;
378 } else {
379 env->ov = 0;
380 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
381 return (int32_t)arg1 / (int32_t)arg2;
385 /*****************************************************************************/
386 /* 602 specific instructions */
387 /* mfrom is the most crazy instruction ever seen, imho ! */
388 /* Real implementation uses a ROM table. Do the same */
389 /* Extremely decomposed:
390 * -arg / 256
391 * return 256 * log10(10 + 1.0) + 0.5
393 #if !defined(CONFIG_USER_ONLY)
394 target_ulong helper_602_mfrom(target_ulong arg)
396 if (likely(arg < 602)) {
397 #include "mfrom_table.c"
398 return mfrom_ROM_table[arg];
399 } else {
400 return 0;
403 #endif
405 /*****************************************************************************/
406 /* Altivec extension helpers */
407 #if defined(HOST_WORDS_BIGENDIAN)
408 #define HI_IDX 0
409 #define LO_IDX 1
410 #define AVRB(i) u8[i]
411 #define AVRW(i) u32[i]
412 #else
413 #define HI_IDX 1
414 #define LO_IDX 0
415 #define AVRB(i) u8[15-(i)]
416 #define AVRW(i) u32[3-(i)]
417 #endif
419 #if defined(HOST_WORDS_BIGENDIAN)
420 #define VECTOR_FOR_INORDER_I(index, element) \
421 for (index = 0; index < ARRAY_SIZE(r->element); index++)
422 #else
423 #define VECTOR_FOR_INORDER_I(index, element) \
424 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
425 #endif
427 /* Saturating arithmetic helpers. */
428 #define SATCVT(from, to, from_type, to_type, min, max) \
429 static inline to_type cvt##from##to(from_type x, int *sat) \
431 to_type r; \
433 if (x < (from_type)min) { \
434 r = min; \
435 *sat = 1; \
436 } else if (x > (from_type)max) { \
437 r = max; \
438 *sat = 1; \
439 } else { \
440 r = x; \
442 return r; \
444 #define SATCVTU(from, to, from_type, to_type, min, max) \
445 static inline to_type cvt##from##to(from_type x, int *sat) \
447 to_type r; \
449 if (x > (from_type)max) { \
450 r = max; \
451 *sat = 1; \
452 } else { \
453 r = x; \
455 return r; \
457 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
458 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
459 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
461 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
462 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
463 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
464 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
465 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
466 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
467 #undef SATCVT
468 #undef SATCVTU
470 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
472 int i, j = (sh & 0xf);
474 VECTOR_FOR_INORDER_I(i, u8) {
475 r->u8[i] = j++;
479 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
481 int i, j = 0x10 - (sh & 0xf);
483 VECTOR_FOR_INORDER_I(i, u8) {
484 r->u8[i] = j++;
488 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
490 #if defined(HOST_WORDS_BIGENDIAN)
491 env->vscr = r->u32[3];
492 #else
493 env->vscr = r->u32[0];
494 #endif
495 set_flush_to_zero(vscr_nj, &env->vec_status);
498 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
500 int i;
502 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
503 r->u32[i] = ~a->u32[i] < b->u32[i];
507 #define VARITH_DO(name, op, element) \
508 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
510 int i; \
512 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
513 r->element[i] = a->element[i] op b->element[i]; \
516 #define VARITH(suffix, element) \
517 VARITH_DO(add##suffix, +, element) \
518 VARITH_DO(sub##suffix, -, element)
519 VARITH(ubm, u8)
520 VARITH(uhm, u16)
521 VARITH(uwm, u32)
522 VARITH(udm, u64)
523 VARITH_DO(muluwm, *, u32)
524 #undef VARITH_DO
525 #undef VARITH
527 #define VARITHFP(suffix, func) \
528 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
529 ppc_avr_t *b) \
531 int i; \
533 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
534 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
537 VARITHFP(addfp, float32_add)
538 VARITHFP(subfp, float32_sub)
539 VARITHFP(minfp, float32_min)
540 VARITHFP(maxfp, float32_max)
541 #undef VARITHFP
543 #define VARITHFPFMA(suffix, type) \
544 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
545 ppc_avr_t *b, ppc_avr_t *c) \
547 int i; \
548 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
549 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
550 type, &env->vec_status); \
553 VARITHFPFMA(maddfp, 0);
554 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
555 #undef VARITHFPFMA
557 #define VARITHSAT_CASE(type, op, cvt, element) \
559 type result = (type)a->element[i] op (type)b->element[i]; \
560 r->element[i] = cvt(result, &sat); \
563 #define VARITHSAT_DO(name, op, optype, cvt, element) \
564 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
565 ppc_avr_t *b) \
567 int sat = 0; \
568 int i; \
570 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
571 switch (sizeof(r->element[0])) { \
572 case 1: \
573 VARITHSAT_CASE(optype, op, cvt, element); \
574 break; \
575 case 2: \
576 VARITHSAT_CASE(optype, op, cvt, element); \
577 break; \
578 case 4: \
579 VARITHSAT_CASE(optype, op, cvt, element); \
580 break; \
583 if (sat) { \
584 env->vscr |= (1 << VSCR_SAT); \
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
600 #undef VARITHSAT_DO
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
607 int i; \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
616 unsigned_type) \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
622 #undef VAVG_DO
623 #undef VAVG
625 #define VCF(suffix, cvt, element) \
626 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
627 ppc_avr_t *b, uint32_t uim) \
629 int i; \
631 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
632 float32 t = cvt(b->element[i], &env->vec_status); \
633 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
636 VCF(ux, uint32_to_float32, u32)
637 VCF(sx, int32_to_float32, s32)
638 #undef VCF
640 #define VCMP_DO(suffix, compare, element, record) \
641 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
642 ppc_avr_t *a, ppc_avr_t *b) \
644 uint64_t ones = (uint64_t)-1; \
645 uint64_t all = ones; \
646 uint64_t none = 0; \
647 int i; \
649 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
650 uint64_t result = (a->element[i] compare b->element[i] ? \
651 ones : 0x0); \
652 switch (sizeof(a->element[0])) { \
653 case 8: \
654 r->u64[i] = result; \
655 break; \
656 case 4: \
657 r->u32[i] = result; \
658 break; \
659 case 2: \
660 r->u16[i] = result; \
661 break; \
662 case 1: \
663 r->u8[i] = result; \
664 break; \
666 all &= result; \
667 none |= result; \
669 if (record) { \
670 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
673 #define VCMP(suffix, compare, element) \
674 VCMP_DO(suffix, compare, element, 0) \
675 VCMP_DO(suffix##_dot, compare, element, 1)
676 VCMP(equb, ==, u8)
677 VCMP(equh, ==, u16)
678 VCMP(equw, ==, u32)
679 VCMP(equd, ==, u64)
680 VCMP(gtub, >, u8)
681 VCMP(gtuh, >, u16)
682 VCMP(gtuw, >, u32)
683 VCMP(gtud, >, u64)
684 VCMP(gtsb, >, s8)
685 VCMP(gtsh, >, s16)
686 VCMP(gtsw, >, s32)
687 VCMP(gtsd, >, s64)
688 #undef VCMP_DO
689 #undef VCMP
691 #define VCMPFP_DO(suffix, compare, order, record) \
692 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
693 ppc_avr_t *a, ppc_avr_t *b) \
695 uint32_t ones = (uint32_t)-1; \
696 uint32_t all = ones; \
697 uint32_t none = 0; \
698 int i; \
700 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
701 uint32_t result; \
702 int rel = float32_compare_quiet(a->f[i], b->f[i], \
703 &env->vec_status); \
704 if (rel == float_relation_unordered) { \
705 result = 0; \
706 } else if (rel compare order) { \
707 result = ones; \
708 } else { \
709 result = 0; \
711 r->u32[i] = result; \
712 all &= result; \
713 none |= result; \
715 if (record) { \
716 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
719 #define VCMPFP(suffix, compare, order) \
720 VCMPFP_DO(suffix, compare, order, 0) \
721 VCMPFP_DO(suffix##_dot, compare, order, 1)
722 VCMPFP(eqfp, ==, float_relation_equal)
723 VCMPFP(gefp, !=, float_relation_less)
724 VCMPFP(gtfp, ==, float_relation_greater)
725 #undef VCMPFP_DO
726 #undef VCMPFP
728 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
729 ppc_avr_t *a, ppc_avr_t *b, int record)
731 int i;
732 int all_in = 0;
734 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
735 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
736 if (le_rel == float_relation_unordered) {
737 r->u32[i] = 0xc0000000;
738 /* ALL_IN does not need to be updated here. */
739 } else {
740 float32 bneg = float32_chs(b->f[i]);
741 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
742 int le = le_rel != float_relation_greater;
743 int ge = ge_rel != float_relation_less;
745 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
746 all_in |= (!le | !ge);
749 if (record) {
750 env->crf[6] = (all_in == 0) << 1;
754 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
756 vcmpbfp_internal(env, r, a, b, 0);
759 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
760 ppc_avr_t *b)
762 vcmpbfp_internal(env, r, a, b, 1);
765 #define VCT(suffix, satcvt, element) \
766 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
767 ppc_avr_t *b, uint32_t uim) \
769 int i; \
770 int sat = 0; \
771 float_status s = env->vec_status; \
773 set_float_rounding_mode(float_round_to_zero, &s); \
774 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
775 if (float32_is_any_nan(b->f[i])) { \
776 r->element[i] = 0; \
777 } else { \
778 float64 t = float32_to_float64(b->f[i], &s); \
779 int64_t j; \
781 t = float64_scalbn(t, uim, &s); \
782 j = float64_to_int64(t, &s); \
783 r->element[i] = satcvt(j, &sat); \
786 if (sat) { \
787 env->vscr |= (1 << VSCR_SAT); \
790 VCT(uxs, cvtsduw, u32)
791 VCT(sxs, cvtsdsw, s32)
792 #undef VCT
794 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
795 ppc_avr_t *b, ppc_avr_t *c)
797 int sat = 0;
798 int i;
800 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
801 int32_t prod = a->s16[i] * b->s16[i];
802 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
804 r->s16[i] = cvtswsh(t, &sat);
807 if (sat) {
808 env->vscr |= (1 << VSCR_SAT);
812 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
813 ppc_avr_t *b, ppc_avr_t *c)
815 int sat = 0;
816 int i;
818 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
819 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
820 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
821 r->s16[i] = cvtswsh(t, &sat);
824 if (sat) {
825 env->vscr |= (1 << VSCR_SAT);
829 #define VMINMAX_DO(name, compare, element) \
830 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
832 int i; \
834 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
835 if (a->element[i] compare b->element[i]) { \
836 r->element[i] = b->element[i]; \
837 } else { \
838 r->element[i] = a->element[i]; \
842 #define VMINMAX(suffix, element) \
843 VMINMAX_DO(min##suffix, >, element) \
844 VMINMAX_DO(max##suffix, <, element)
845 VMINMAX(sb, s8)
846 VMINMAX(sh, s16)
847 VMINMAX(sw, s32)
848 VMINMAX(sd, s64)
849 VMINMAX(ub, u8)
850 VMINMAX(uh, u16)
851 VMINMAX(uw, u32)
852 VMINMAX(ud, u64)
853 #undef VMINMAX_DO
854 #undef VMINMAX
856 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
858 int i;
860 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
861 int32_t prod = a->s16[i] * b->s16[i];
862 r->s16[i] = (int16_t) (prod + c->s16[i]);
866 #define VMRG_DO(name, element, highp) \
867 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
869 ppc_avr_t result; \
870 int i; \
871 size_t n_elems = ARRAY_SIZE(r->element); \
873 for (i = 0; i < n_elems / 2; i++) { \
874 if (highp) { \
875 result.element[i*2+HI_IDX] = a->element[i]; \
876 result.element[i*2+LO_IDX] = b->element[i]; \
877 } else { \
878 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
879 b->element[n_elems - i - 1]; \
880 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
881 a->element[n_elems - i - 1]; \
884 *r = result; \
886 #if defined(HOST_WORDS_BIGENDIAN)
887 #define MRGHI 0
888 #define MRGLO 1
889 #else
890 #define MRGHI 1
891 #define MRGLO 0
892 #endif
893 #define VMRG(suffix, element) \
894 VMRG_DO(mrgl##suffix, element, MRGHI) \
895 VMRG_DO(mrgh##suffix, element, MRGLO)
896 VMRG(b, u8)
897 VMRG(h, u16)
898 VMRG(w, u32)
899 #undef VMRG_DO
900 #undef VMRG
901 #undef MRGHI
902 #undef MRGLO
904 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
905 ppc_avr_t *b, ppc_avr_t *c)
907 int32_t prod[16];
908 int i;
910 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
911 prod[i] = (int32_t)a->s8[i] * b->u8[i];
914 VECTOR_FOR_INORDER_I(i, s32) {
915 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
916 prod[4 * i + 2] + prod[4 * i + 3];
920 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
921 ppc_avr_t *b, ppc_avr_t *c)
923 int32_t prod[8];
924 int i;
926 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
927 prod[i] = a->s16[i] * b->s16[i];
930 VECTOR_FOR_INORDER_I(i, s32) {
931 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
935 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
936 ppc_avr_t *b, ppc_avr_t *c)
938 int32_t prod[8];
939 int i;
940 int sat = 0;
942 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
943 prod[i] = (int32_t)a->s16[i] * b->s16[i];
946 VECTOR_FOR_INORDER_I(i, s32) {
947 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
949 r->u32[i] = cvtsdsw(t, &sat);
952 if (sat) {
953 env->vscr |= (1 << VSCR_SAT);
957 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
958 ppc_avr_t *b, ppc_avr_t *c)
960 uint16_t prod[16];
961 int i;
963 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
964 prod[i] = a->u8[i] * b->u8[i];
967 VECTOR_FOR_INORDER_I(i, u32) {
968 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
969 prod[4 * i + 2] + prod[4 * i + 3];
973 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
974 ppc_avr_t *b, ppc_avr_t *c)
976 uint32_t prod[8];
977 int i;
979 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
980 prod[i] = a->u16[i] * b->u16[i];
983 VECTOR_FOR_INORDER_I(i, u32) {
984 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
988 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
989 ppc_avr_t *b, ppc_avr_t *c)
991 uint32_t prod[8];
992 int i;
993 int sat = 0;
995 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
996 prod[i] = a->u16[i] * b->u16[i];
999 VECTOR_FOR_INORDER_I(i, s32) {
1000 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1002 r->u32[i] = cvtuduw(t, &sat);
1005 if (sat) {
1006 env->vscr |= (1 << VSCR_SAT);
1010 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1011 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1013 int i; \
1015 VECTOR_FOR_INORDER_I(i, prod_element) { \
1016 if (evenp) { \
1017 r->prod_element[i] = \
1018 (cast)a->mul_element[i * 2 + HI_IDX] * \
1019 (cast)b->mul_element[i * 2 + HI_IDX]; \
1020 } else { \
1021 r->prod_element[i] = \
1022 (cast)a->mul_element[i * 2 + LO_IDX] * \
1023 (cast)b->mul_element[i * 2 + LO_IDX]; \
1027 #define VMUL(suffix, mul_element, prod_element, cast) \
1028 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1029 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1030 VMUL(sb, s8, s16, int16_t)
1031 VMUL(sh, s16, s32, int32_t)
1032 VMUL(sw, s32, s64, int64_t)
1033 VMUL(ub, u8, u16, uint16_t)
1034 VMUL(uh, u16, u32, uint32_t)
1035 VMUL(uw, u32, u64, uint64_t)
1036 #undef VMUL_DO
1037 #undef VMUL
1039 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1040 ppc_avr_t *c)
1042 ppc_avr_t result;
1043 int i;
1045 VECTOR_FOR_INORDER_I(i, u8) {
1046 int s = c->u8[i] & 0x1f;
1047 #if defined(HOST_WORDS_BIGENDIAN)
1048 int index = s & 0xf;
1049 #else
1050 int index = 15 - (s & 0xf);
1051 #endif
1053 if (s & 0x10) {
1054 result.u8[i] = b->u8[index];
1055 } else {
1056 result.u8[i] = a->u8[index];
1059 *r = result;
1062 #if defined(HOST_WORDS_BIGENDIAN)
1063 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1064 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1065 #else
1066 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1067 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1068 #endif
1070 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1072 int i;
1073 uint64_t perm = 0;
1075 VECTOR_FOR_INORDER_I(i, u8) {
1076 int index = VBPERMQ_INDEX(b, i);
1078 if (index < 128) {
1079 uint64_t mask = (1ull << (63-(index & 0x3F)));
1080 if (a->u64[VBPERMQ_DW(index)] & mask) {
1081 perm |= (0x8000 >> i);
1086 r->u64[HI_IDX] = perm;
1087 r->u64[LO_IDX] = 0;
1090 #undef VBPERMQ_INDEX
1091 #undef VBPERMQ_DW
1093 static const uint64_t VGBBD_MASKS[256] = {
1094 0x0000000000000000ull, /* 00 */
1095 0x0000000000000080ull, /* 01 */
1096 0x0000000000008000ull, /* 02 */
1097 0x0000000000008080ull, /* 03 */
1098 0x0000000000800000ull, /* 04 */
1099 0x0000000000800080ull, /* 05 */
1100 0x0000000000808000ull, /* 06 */
1101 0x0000000000808080ull, /* 07 */
1102 0x0000000080000000ull, /* 08 */
1103 0x0000000080000080ull, /* 09 */
1104 0x0000000080008000ull, /* 0A */
1105 0x0000000080008080ull, /* 0B */
1106 0x0000000080800000ull, /* 0C */
1107 0x0000000080800080ull, /* 0D */
1108 0x0000000080808000ull, /* 0E */
1109 0x0000000080808080ull, /* 0F */
1110 0x0000008000000000ull, /* 10 */
1111 0x0000008000000080ull, /* 11 */
1112 0x0000008000008000ull, /* 12 */
1113 0x0000008000008080ull, /* 13 */
1114 0x0000008000800000ull, /* 14 */
1115 0x0000008000800080ull, /* 15 */
1116 0x0000008000808000ull, /* 16 */
1117 0x0000008000808080ull, /* 17 */
1118 0x0000008080000000ull, /* 18 */
1119 0x0000008080000080ull, /* 19 */
1120 0x0000008080008000ull, /* 1A */
1121 0x0000008080008080ull, /* 1B */
1122 0x0000008080800000ull, /* 1C */
1123 0x0000008080800080ull, /* 1D */
1124 0x0000008080808000ull, /* 1E */
1125 0x0000008080808080ull, /* 1F */
1126 0x0000800000000000ull, /* 20 */
1127 0x0000800000000080ull, /* 21 */
1128 0x0000800000008000ull, /* 22 */
1129 0x0000800000008080ull, /* 23 */
1130 0x0000800000800000ull, /* 24 */
1131 0x0000800000800080ull, /* 25 */
1132 0x0000800000808000ull, /* 26 */
1133 0x0000800000808080ull, /* 27 */
1134 0x0000800080000000ull, /* 28 */
1135 0x0000800080000080ull, /* 29 */
1136 0x0000800080008000ull, /* 2A */
1137 0x0000800080008080ull, /* 2B */
1138 0x0000800080800000ull, /* 2C */
1139 0x0000800080800080ull, /* 2D */
1140 0x0000800080808000ull, /* 2E */
1141 0x0000800080808080ull, /* 2F */
1142 0x0000808000000000ull, /* 30 */
1143 0x0000808000000080ull, /* 31 */
1144 0x0000808000008000ull, /* 32 */
1145 0x0000808000008080ull, /* 33 */
1146 0x0000808000800000ull, /* 34 */
1147 0x0000808000800080ull, /* 35 */
1148 0x0000808000808000ull, /* 36 */
1149 0x0000808000808080ull, /* 37 */
1150 0x0000808080000000ull, /* 38 */
1151 0x0000808080000080ull, /* 39 */
1152 0x0000808080008000ull, /* 3A */
1153 0x0000808080008080ull, /* 3B */
1154 0x0000808080800000ull, /* 3C */
1155 0x0000808080800080ull, /* 3D */
1156 0x0000808080808000ull, /* 3E */
1157 0x0000808080808080ull, /* 3F */
1158 0x0080000000000000ull, /* 40 */
1159 0x0080000000000080ull, /* 41 */
1160 0x0080000000008000ull, /* 42 */
1161 0x0080000000008080ull, /* 43 */
1162 0x0080000000800000ull, /* 44 */
1163 0x0080000000800080ull, /* 45 */
1164 0x0080000000808000ull, /* 46 */
1165 0x0080000000808080ull, /* 47 */
1166 0x0080000080000000ull, /* 48 */
1167 0x0080000080000080ull, /* 49 */
1168 0x0080000080008000ull, /* 4A */
1169 0x0080000080008080ull, /* 4B */
1170 0x0080000080800000ull, /* 4C */
1171 0x0080000080800080ull, /* 4D */
1172 0x0080000080808000ull, /* 4E */
1173 0x0080000080808080ull, /* 4F */
1174 0x0080008000000000ull, /* 50 */
1175 0x0080008000000080ull, /* 51 */
1176 0x0080008000008000ull, /* 52 */
1177 0x0080008000008080ull, /* 53 */
1178 0x0080008000800000ull, /* 54 */
1179 0x0080008000800080ull, /* 55 */
1180 0x0080008000808000ull, /* 56 */
1181 0x0080008000808080ull, /* 57 */
1182 0x0080008080000000ull, /* 58 */
1183 0x0080008080000080ull, /* 59 */
1184 0x0080008080008000ull, /* 5A */
1185 0x0080008080008080ull, /* 5B */
1186 0x0080008080800000ull, /* 5C */
1187 0x0080008080800080ull, /* 5D */
1188 0x0080008080808000ull, /* 5E */
1189 0x0080008080808080ull, /* 5F */
1190 0x0080800000000000ull, /* 60 */
1191 0x0080800000000080ull, /* 61 */
1192 0x0080800000008000ull, /* 62 */
1193 0x0080800000008080ull, /* 63 */
1194 0x0080800000800000ull, /* 64 */
1195 0x0080800000800080ull, /* 65 */
1196 0x0080800000808000ull, /* 66 */
1197 0x0080800000808080ull, /* 67 */
1198 0x0080800080000000ull, /* 68 */
1199 0x0080800080000080ull, /* 69 */
1200 0x0080800080008000ull, /* 6A */
1201 0x0080800080008080ull, /* 6B */
1202 0x0080800080800000ull, /* 6C */
1203 0x0080800080800080ull, /* 6D */
1204 0x0080800080808000ull, /* 6E */
1205 0x0080800080808080ull, /* 6F */
1206 0x0080808000000000ull, /* 70 */
1207 0x0080808000000080ull, /* 71 */
1208 0x0080808000008000ull, /* 72 */
1209 0x0080808000008080ull, /* 73 */
1210 0x0080808000800000ull, /* 74 */
1211 0x0080808000800080ull, /* 75 */
1212 0x0080808000808000ull, /* 76 */
1213 0x0080808000808080ull, /* 77 */
1214 0x0080808080000000ull, /* 78 */
1215 0x0080808080000080ull, /* 79 */
1216 0x0080808080008000ull, /* 7A */
1217 0x0080808080008080ull, /* 7B */
1218 0x0080808080800000ull, /* 7C */
1219 0x0080808080800080ull, /* 7D */
1220 0x0080808080808000ull, /* 7E */
1221 0x0080808080808080ull, /* 7F */
1222 0x8000000000000000ull, /* 80 */
1223 0x8000000000000080ull, /* 81 */
1224 0x8000000000008000ull, /* 82 */
1225 0x8000000000008080ull, /* 83 */
1226 0x8000000000800000ull, /* 84 */
1227 0x8000000000800080ull, /* 85 */
1228 0x8000000000808000ull, /* 86 */
1229 0x8000000000808080ull, /* 87 */
1230 0x8000000080000000ull, /* 88 */
1231 0x8000000080000080ull, /* 89 */
1232 0x8000000080008000ull, /* 8A */
1233 0x8000000080008080ull, /* 8B */
1234 0x8000000080800000ull, /* 8C */
1235 0x8000000080800080ull, /* 8D */
1236 0x8000000080808000ull, /* 8E */
1237 0x8000000080808080ull, /* 8F */
1238 0x8000008000000000ull, /* 90 */
1239 0x8000008000000080ull, /* 91 */
1240 0x8000008000008000ull, /* 92 */
1241 0x8000008000008080ull, /* 93 */
1242 0x8000008000800000ull, /* 94 */
1243 0x8000008000800080ull, /* 95 */
1244 0x8000008000808000ull, /* 96 */
1245 0x8000008000808080ull, /* 97 */
1246 0x8000008080000000ull, /* 98 */
1247 0x8000008080000080ull, /* 99 */
1248 0x8000008080008000ull, /* 9A */
1249 0x8000008080008080ull, /* 9B */
1250 0x8000008080800000ull, /* 9C */
1251 0x8000008080800080ull, /* 9D */
1252 0x8000008080808000ull, /* 9E */
1253 0x8000008080808080ull, /* 9F */
1254 0x8000800000000000ull, /* A0 */
1255 0x8000800000000080ull, /* A1 */
1256 0x8000800000008000ull, /* A2 */
1257 0x8000800000008080ull, /* A3 */
1258 0x8000800000800000ull, /* A4 */
1259 0x8000800000800080ull, /* A5 */
1260 0x8000800000808000ull, /* A6 */
1261 0x8000800000808080ull, /* A7 */
1262 0x8000800080000000ull, /* A8 */
1263 0x8000800080000080ull, /* A9 */
1264 0x8000800080008000ull, /* AA */
1265 0x8000800080008080ull, /* AB */
1266 0x8000800080800000ull, /* AC */
1267 0x8000800080800080ull, /* AD */
1268 0x8000800080808000ull, /* AE */
1269 0x8000800080808080ull, /* AF */
1270 0x8000808000000000ull, /* B0 */
1271 0x8000808000000080ull, /* B1 */
1272 0x8000808000008000ull, /* B2 */
1273 0x8000808000008080ull, /* B3 */
1274 0x8000808000800000ull, /* B4 */
1275 0x8000808000800080ull, /* B5 */
1276 0x8000808000808000ull, /* B6 */
1277 0x8000808000808080ull, /* B7 */
1278 0x8000808080000000ull, /* B8 */
1279 0x8000808080000080ull, /* B9 */
1280 0x8000808080008000ull, /* BA */
1281 0x8000808080008080ull, /* BB */
1282 0x8000808080800000ull, /* BC */
1283 0x8000808080800080ull, /* BD */
1284 0x8000808080808000ull, /* BE */
1285 0x8000808080808080ull, /* BF */
1286 0x8080000000000000ull, /* C0 */
1287 0x8080000000000080ull, /* C1 */
1288 0x8080000000008000ull, /* C2 */
1289 0x8080000000008080ull, /* C3 */
1290 0x8080000000800000ull, /* C4 */
1291 0x8080000000800080ull, /* C5 */
1292 0x8080000000808000ull, /* C6 */
1293 0x8080000000808080ull, /* C7 */
1294 0x8080000080000000ull, /* C8 */
1295 0x8080000080000080ull, /* C9 */
1296 0x8080000080008000ull, /* CA */
1297 0x8080000080008080ull, /* CB */
1298 0x8080000080800000ull, /* CC */
1299 0x8080000080800080ull, /* CD */
1300 0x8080000080808000ull, /* CE */
1301 0x8080000080808080ull, /* CF */
1302 0x8080008000000000ull, /* D0 */
1303 0x8080008000000080ull, /* D1 */
1304 0x8080008000008000ull, /* D2 */
1305 0x8080008000008080ull, /* D3 */
1306 0x8080008000800000ull, /* D4 */
1307 0x8080008000800080ull, /* D5 */
1308 0x8080008000808000ull, /* D6 */
1309 0x8080008000808080ull, /* D7 */
1310 0x8080008080000000ull, /* D8 */
1311 0x8080008080000080ull, /* D9 */
1312 0x8080008080008000ull, /* DA */
1313 0x8080008080008080ull, /* DB */
1314 0x8080008080800000ull, /* DC */
1315 0x8080008080800080ull, /* DD */
1316 0x8080008080808000ull, /* DE */
1317 0x8080008080808080ull, /* DF */
1318 0x8080800000000000ull, /* E0 */
1319 0x8080800000000080ull, /* E1 */
1320 0x8080800000008000ull, /* E2 */
1321 0x8080800000008080ull, /* E3 */
1322 0x8080800000800000ull, /* E4 */
1323 0x8080800000800080ull, /* E5 */
1324 0x8080800000808000ull, /* E6 */
1325 0x8080800000808080ull, /* E7 */
1326 0x8080800080000000ull, /* E8 */
1327 0x8080800080000080ull, /* E9 */
1328 0x8080800080008000ull, /* EA */
1329 0x8080800080008080ull, /* EB */
1330 0x8080800080800000ull, /* EC */
1331 0x8080800080800080ull, /* ED */
1332 0x8080800080808000ull, /* EE */
1333 0x8080800080808080ull, /* EF */
1334 0x8080808000000000ull, /* F0 */
1335 0x8080808000000080ull, /* F1 */
1336 0x8080808000008000ull, /* F2 */
1337 0x8080808000008080ull, /* F3 */
1338 0x8080808000800000ull, /* F4 */
1339 0x8080808000800080ull, /* F5 */
1340 0x8080808000808000ull, /* F6 */
1341 0x8080808000808080ull, /* F7 */
1342 0x8080808080000000ull, /* F8 */
1343 0x8080808080000080ull, /* F9 */
1344 0x8080808080008000ull, /* FA */
1345 0x8080808080008080ull, /* FB */
1346 0x8080808080800000ull, /* FC */
1347 0x8080808080800080ull, /* FD */
1348 0x8080808080808000ull, /* FE */
1349 0x8080808080808080ull, /* FF */
1352 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1354 int i;
1355 uint64_t t[2] = { 0, 0 };
1357 VECTOR_FOR_INORDER_I(i, u8) {
1358 #if defined(HOST_WORDS_BIGENDIAN)
1359 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1360 #else
1361 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1362 #endif
1365 r->u64[0] = t[0];
1366 r->u64[1] = t[1];
1369 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1370 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1372 int i, j; \
1373 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1375 VECTOR_FOR_INORDER_I(i, srcfld) { \
1376 prod[i] = 0; \
1377 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1378 if (a->srcfld[i] & (1ull<<j)) { \
1379 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1384 VECTOR_FOR_INORDER_I(i, trgfld) { \
1385 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1389 PMSUM(vpmsumb, u8, u16, uint16_t)
1390 PMSUM(vpmsumh, u16, u32, uint32_t)
1391 PMSUM(vpmsumw, u32, u64, uint64_t)
1393 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1396 #ifdef CONFIG_INT128
1397 int i, j;
1398 __uint128_t prod[2];
1400 VECTOR_FOR_INORDER_I(i, u64) {
1401 prod[i] = 0;
1402 for (j = 0; j < 64; j++) {
1403 if (a->u64[i] & (1ull<<j)) {
1404 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1409 r->u128 = prod[0] ^ prod[1];
1411 #else
1412 int i, j;
1413 ppc_avr_t prod[2];
1415 VECTOR_FOR_INORDER_I(i, u64) {
1416 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1417 for (j = 0; j < 64; j++) {
1418 if (a->u64[i] & (1ull<<j)) {
1419 ppc_avr_t bshift;
1420 if (j == 0) {
1421 bshift.u64[HI_IDX] = 0;
1422 bshift.u64[LO_IDX] = b->u64[i];
1423 } else {
1424 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1425 bshift.u64[LO_IDX] = b->u64[i] << j;
1427 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1428 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1433 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1434 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1435 #endif
1439 #if defined(HOST_WORDS_BIGENDIAN)
1440 #define PKBIG 1
1441 #else
1442 #define PKBIG 0
1443 #endif
1444 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1446 int i, j;
1447 ppc_avr_t result;
1448 #if defined(HOST_WORDS_BIGENDIAN)
1449 const ppc_avr_t *x[2] = { a, b };
1450 #else
1451 const ppc_avr_t *x[2] = { b, a };
1452 #endif
1454 VECTOR_FOR_INORDER_I(i, u64) {
1455 VECTOR_FOR_INORDER_I(j, u32) {
1456 uint32_t e = x[i]->u32[j];
1458 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1459 ((e >> 6) & 0x3e0) |
1460 ((e >> 3) & 0x1f));
1463 *r = result;
1466 #define VPK(suffix, from, to, cvt, dosat) \
1467 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1468 ppc_avr_t *a, ppc_avr_t *b) \
1470 int i; \
1471 int sat = 0; \
1472 ppc_avr_t result; \
1473 ppc_avr_t *a0 = PKBIG ? a : b; \
1474 ppc_avr_t *a1 = PKBIG ? b : a; \
1476 VECTOR_FOR_INORDER_I(i, from) { \
1477 result.to[i] = cvt(a0->from[i], &sat); \
1478 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1480 *r = result; \
1481 if (dosat && sat) { \
1482 env->vscr |= (1 << VSCR_SAT); \
1485 #define I(x, y) (x)
1486 VPK(shss, s16, s8, cvtshsb, 1)
1487 VPK(shus, s16, u8, cvtshub, 1)
1488 VPK(swss, s32, s16, cvtswsh, 1)
1489 VPK(swus, s32, u16, cvtswuh, 1)
1490 VPK(sdss, s64, s32, cvtsdsw, 1)
1491 VPK(sdus, s64, u32, cvtsduw, 1)
1492 VPK(uhus, u16, u8, cvtuhub, 1)
1493 VPK(uwus, u32, u16, cvtuwuh, 1)
1494 VPK(udus, u64, u32, cvtuduw, 1)
1495 VPK(uhum, u16, u8, I, 0)
1496 VPK(uwum, u32, u16, I, 0)
1497 VPK(udum, u64, u32, I, 0)
1498 #undef I
1499 #undef VPK
1500 #undef PKBIG
1502 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1504 int i;
1506 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1507 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1511 #define VRFI(suffix, rounding) \
1512 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1513 ppc_avr_t *b) \
1515 int i; \
1516 float_status s = env->vec_status; \
1518 set_float_rounding_mode(rounding, &s); \
1519 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1520 r->f[i] = float32_round_to_int (b->f[i], &s); \
1523 VRFI(n, float_round_nearest_even)
1524 VRFI(m, float_round_down)
1525 VRFI(p, float_round_up)
1526 VRFI(z, float_round_to_zero)
1527 #undef VRFI
1529 #define VROTATE(suffix, element, mask) \
1530 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1532 int i; \
1534 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1535 unsigned int shift = b->element[i] & mask; \
1536 r->element[i] = (a->element[i] << shift) | \
1537 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1540 VROTATE(b, u8, 0x7)
1541 VROTATE(h, u16, 0xF)
1542 VROTATE(w, u32, 0x1F)
1543 VROTATE(d, u64, 0x3F)
1544 #undef VROTATE
1546 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1548 int i;
1550 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1551 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1553 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1557 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1558 ppc_avr_t *c)
1560 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1561 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1564 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1566 int i;
1568 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1569 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1573 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1575 int i;
1577 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1578 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1582 #if defined(HOST_WORDS_BIGENDIAN)
1583 #define LEFT 0
1584 #define RIGHT 1
1585 #else
1586 #define LEFT 1
1587 #define RIGHT 0
1588 #endif
1589 /* The specification says that the results are undefined if all of the
1590 * shift counts are not identical. We check to make sure that they are
1591 * to conform to what real hardware appears to do. */
1592 #define VSHIFT(suffix, leftp) \
1593 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1595 int shift = b->u8[LO_IDX*15] & 0x7; \
1596 int doit = 1; \
1597 int i; \
1599 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1600 doit = doit && ((b->u8[i] & 0x7) == shift); \
1602 if (doit) { \
1603 if (shift == 0) { \
1604 *r = *a; \
1605 } else if (leftp) { \
1606 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1608 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1609 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1610 } else { \
1611 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1613 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1614 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1618 VSHIFT(l, LEFT)
1619 VSHIFT(r, RIGHT)
1620 #undef VSHIFT
1621 #undef LEFT
1622 #undef RIGHT
1624 #define VSL(suffix, element, mask) \
1625 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1627 int i; \
1629 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1630 unsigned int shift = b->element[i] & mask; \
1632 r->element[i] = a->element[i] << shift; \
1635 VSL(b, u8, 0x7)
1636 VSL(h, u16, 0x0F)
1637 VSL(w, u32, 0x1F)
1638 VSL(d, u64, 0x3F)
1639 #undef VSL
1641 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1643 int sh = shift & 0xf;
1644 int i;
1645 ppc_avr_t result;
1647 #if defined(HOST_WORDS_BIGENDIAN)
1648 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1649 int index = sh + i;
1650 if (index > 0xf) {
1651 result.u8[i] = b->u8[index - 0x10];
1652 } else {
1653 result.u8[i] = a->u8[index];
1656 #else
1657 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1658 int index = (16 - sh) + i;
1659 if (index > 0xf) {
1660 result.u8[i] = a->u8[index - 0x10];
1661 } else {
1662 result.u8[i] = b->u8[index];
1665 #endif
1666 *r = result;
1669 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1671 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1673 #if defined(HOST_WORDS_BIGENDIAN)
1674 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1675 memset(&r->u8[16-sh], 0, sh);
1676 #else
1677 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1678 memset(&r->u8[0], 0, sh);
1679 #endif
1682 /* Experimental testing shows that hardware masks the immediate. */
1683 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1684 #if defined(HOST_WORDS_BIGENDIAN)
1685 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1686 #else
1687 #define SPLAT_ELEMENT(element) \
1688 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1689 #endif
1690 #define VSPLT(suffix, element) \
1691 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1693 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1694 int i; \
1696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1697 r->element[i] = s; \
1700 VSPLT(b, u8)
1701 VSPLT(h, u16)
1702 VSPLT(w, u32)
1703 #undef VSPLT
1704 #undef SPLAT_ELEMENT
1705 #undef _SPLAT_MASKED
1707 #define VSPLTI(suffix, element, splat_type) \
1708 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1710 splat_type x = (int8_t)(splat << 3) >> 3; \
1711 int i; \
1713 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1714 r->element[i] = x; \
1717 VSPLTI(b, s8, int8_t)
1718 VSPLTI(h, s16, int16_t)
1719 VSPLTI(w, s32, int32_t)
1720 #undef VSPLTI
1722 #define VSR(suffix, element, mask) \
1723 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1725 int i; \
1727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1728 unsigned int shift = b->element[i] & mask; \
1729 r->element[i] = a->element[i] >> shift; \
1732 VSR(ab, s8, 0x7)
1733 VSR(ah, s16, 0xF)
1734 VSR(aw, s32, 0x1F)
1735 VSR(ad, s64, 0x3F)
1736 VSR(b, u8, 0x7)
1737 VSR(h, u16, 0xF)
1738 VSR(w, u32, 0x1F)
1739 VSR(d, u64, 0x3F)
1740 #undef VSR
1742 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1744 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1746 #if defined(HOST_WORDS_BIGENDIAN)
1747 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1748 memset(&r->u8[0], 0, sh);
1749 #else
1750 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1751 memset(&r->u8[16 - sh], 0, sh);
1752 #endif
1755 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1757 int i;
1759 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1760 r->u32[i] = a->u32[i] >= b->u32[i];
1764 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1766 int64_t t;
1767 int i, upper;
1768 ppc_avr_t result;
1769 int sat = 0;
1771 #if defined(HOST_WORDS_BIGENDIAN)
1772 upper = ARRAY_SIZE(r->s32)-1;
1773 #else
1774 upper = 0;
1775 #endif
1776 t = (int64_t)b->s32[upper];
1777 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1778 t += a->s32[i];
1779 result.s32[i] = 0;
1781 result.s32[upper] = cvtsdsw(t, &sat);
1782 *r = result;
1784 if (sat) {
1785 env->vscr |= (1 << VSCR_SAT);
1789 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1791 int i, j, upper;
1792 ppc_avr_t result;
1793 int sat = 0;
1795 #if defined(HOST_WORDS_BIGENDIAN)
1796 upper = 1;
1797 #else
1798 upper = 0;
1799 #endif
1800 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1801 int64_t t = (int64_t)b->s32[upper + i * 2];
1803 result.u64[i] = 0;
1804 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1805 t += a->s32[2 * i + j];
1807 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1810 *r = result;
1811 if (sat) {
1812 env->vscr |= (1 << VSCR_SAT);
1816 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1818 int i, j;
1819 int sat = 0;
1821 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1822 int64_t t = (int64_t)b->s32[i];
1824 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1825 t += a->s8[4 * i + j];
1827 r->s32[i] = cvtsdsw(t, &sat);
1830 if (sat) {
1831 env->vscr |= (1 << VSCR_SAT);
1835 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1837 int sat = 0;
1838 int i;
1840 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1841 int64_t t = (int64_t)b->s32[i];
1843 t += a->s16[2 * i] + a->s16[2 * i + 1];
1844 r->s32[i] = cvtsdsw(t, &sat);
1847 if (sat) {
1848 env->vscr |= (1 << VSCR_SAT);
1852 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1854 int i, j;
1855 int sat = 0;
1857 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1858 uint64_t t = (uint64_t)b->u32[i];
1860 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1861 t += a->u8[4 * i + j];
1863 r->u32[i] = cvtuduw(t, &sat);
1866 if (sat) {
1867 env->vscr |= (1 << VSCR_SAT);
1871 #if defined(HOST_WORDS_BIGENDIAN)
1872 #define UPKHI 1
1873 #define UPKLO 0
1874 #else
1875 #define UPKHI 0
1876 #define UPKLO 1
1877 #endif
1878 #define VUPKPX(suffix, hi) \
1879 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1881 int i; \
1882 ppc_avr_t result; \
1884 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1885 uint16_t e = b->u16[hi ? i : i+4]; \
1886 uint8_t a = (e >> 15) ? 0xff : 0; \
1887 uint8_t r = (e >> 10) & 0x1f; \
1888 uint8_t g = (e >> 5) & 0x1f; \
1889 uint8_t b = e & 0x1f; \
1891 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1893 *r = result; \
1895 VUPKPX(lpx, UPKLO)
1896 VUPKPX(hpx, UPKHI)
1897 #undef VUPKPX
1899 #define VUPK(suffix, unpacked, packee, hi) \
1900 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1902 int i; \
1903 ppc_avr_t result; \
1905 if (hi) { \
1906 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1907 result.unpacked[i] = b->packee[i]; \
1909 } else { \
1910 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1911 i++) { \
1912 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1915 *r = result; \
1917 VUPK(hsb, s16, s8, UPKHI)
1918 VUPK(hsh, s32, s16, UPKHI)
1919 VUPK(hsw, s64, s32, UPKHI)
1920 VUPK(lsb, s16, s8, UPKLO)
1921 VUPK(lsh, s32, s16, UPKLO)
1922 VUPK(lsw, s64, s32, UPKLO)
1923 #undef VUPK
1924 #undef UPKHI
1925 #undef UPKLO
1927 #define VGENERIC_DO(name, element) \
1928 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1930 int i; \
1932 VECTOR_FOR_INORDER_I(i, element) { \
1933 r->element[i] = name(b->element[i]); \
1937 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1938 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1939 #define clzw(v) clz32((v))
1940 #define clzd(v) clz64((v))
1942 VGENERIC_DO(clzb, u8)
1943 VGENERIC_DO(clzh, u16)
1944 VGENERIC_DO(clzw, u32)
1945 VGENERIC_DO(clzd, u64)
1947 #undef clzb
1948 #undef clzh
1949 #undef clzw
1950 #undef clzd
1952 #define popcntb(v) ctpop8(v)
1953 #define popcnth(v) ctpop16(v)
1954 #define popcntw(v) ctpop32(v)
1955 #define popcntd(v) ctpop64(v)
1957 VGENERIC_DO(popcntb, u8)
1958 VGENERIC_DO(popcnth, u16)
1959 VGENERIC_DO(popcntw, u32)
1960 VGENERIC_DO(popcntd, u64)
1962 #undef popcntb
1963 #undef popcnth
1964 #undef popcntw
1965 #undef popcntd
1967 #undef VGENERIC_DO
1969 #if defined(HOST_WORDS_BIGENDIAN)
1970 #define QW_ONE { .u64 = { 0, 1 } }
1971 #else
1972 #define QW_ONE { .u64 = { 1, 0 } }
1973 #endif
1975 #ifndef CONFIG_INT128
1977 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1979 t->u64[0] = ~a.u64[0];
1980 t->u64[1] = ~a.u64[1];
1983 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1985 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1986 return -1;
1987 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1988 return 1;
1989 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1990 return -1;
1991 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1992 return 1;
1993 } else {
1994 return 0;
1998 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2000 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2001 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2002 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2005 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2007 ppc_avr_t not_a;
2008 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2009 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2010 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2011 avr_qw_not(&not_a, a);
2012 return avr_qw_cmpu(not_a, b) < 0;
2015 #endif
2017 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2019 #ifdef CONFIG_INT128
2020 r->u128 = a->u128 + b->u128;
2021 #else
2022 avr_qw_add(r, *a, *b);
2023 #endif
2026 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2028 #ifdef CONFIG_INT128
2029 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2030 #else
2032 if (c->u64[LO_IDX] & 1) {
2033 ppc_avr_t tmp;
2035 tmp.u64[HI_IDX] = 0;
2036 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2037 avr_qw_add(&tmp, *a, tmp);
2038 avr_qw_add(r, tmp, *b);
2039 } else {
2040 avr_qw_add(r, *a, *b);
2042 #endif
2045 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2047 #ifdef CONFIG_INT128
2048 r->u128 = (~a->u128 < b->u128);
2049 #else
2050 ppc_avr_t not_a;
2052 avr_qw_not(&not_a, *a);
2054 r->u64[HI_IDX] = 0;
2055 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2056 #endif
2059 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2061 #ifdef CONFIG_INT128
2062 int carry_out = (~a->u128 < b->u128);
2063 if (!carry_out && (c->u128 & 1)) {
2064 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2065 ((a->u128 != 0) || (b->u128 != 0));
2067 r->u128 = carry_out;
2068 #else
2070 int carry_in = c->u64[LO_IDX] & 1;
2071 int carry_out = 0;
2072 ppc_avr_t tmp;
2074 carry_out = avr_qw_addc(&tmp, *a, *b);
2076 if (!carry_out && carry_in) {
2077 ppc_avr_t one = QW_ONE;
2078 carry_out = avr_qw_addc(&tmp, tmp, one);
2080 r->u64[HI_IDX] = 0;
2081 r->u64[LO_IDX] = carry_out;
2082 #endif
2085 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2087 #ifdef CONFIG_INT128
2088 r->u128 = a->u128 - b->u128;
2089 #else
2090 ppc_avr_t tmp;
2091 ppc_avr_t one = QW_ONE;
2093 avr_qw_not(&tmp, *b);
2094 avr_qw_add(&tmp, *a, tmp);
2095 avr_qw_add(r, tmp, one);
2096 #endif
2099 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2101 #ifdef CONFIG_INT128
2102 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2103 #else
2104 ppc_avr_t tmp, sum;
2106 avr_qw_not(&tmp, *b);
2107 avr_qw_add(&sum, *a, tmp);
2109 tmp.u64[HI_IDX] = 0;
2110 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2111 avr_qw_add(r, sum, tmp);
2112 #endif
2115 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2117 #ifdef CONFIG_INT128
2118 r->u128 = (~a->u128 < ~b->u128) ||
2119 (a->u128 + ~b->u128 == (__uint128_t)-1);
2120 #else
2121 int carry = (avr_qw_cmpu(*a, *b) > 0);
2122 if (!carry) {
2123 ppc_avr_t tmp;
2124 avr_qw_not(&tmp, *b);
2125 avr_qw_add(&tmp, *a, tmp);
2126 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2128 r->u64[HI_IDX] = 0;
2129 r->u64[LO_IDX] = carry;
2130 #endif
2133 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2135 #ifdef CONFIG_INT128
2136 r->u128 =
2137 (~a->u128 < ~b->u128) ||
2138 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2139 #else
2140 int carry_in = c->u64[LO_IDX] & 1;
2141 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2142 if (!carry_out && carry_in) {
2143 ppc_avr_t tmp;
2144 avr_qw_not(&tmp, *b);
2145 avr_qw_add(&tmp, *a, tmp);
2146 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2149 r->u64[HI_IDX] = 0;
2150 r->u64[LO_IDX] = carry_out;
2151 #endif
2154 #define BCD_PLUS_PREF_1 0xC
2155 #define BCD_PLUS_PREF_2 0xF
2156 #define BCD_PLUS_ALT_1 0xA
2157 #define BCD_NEG_PREF 0xD
2158 #define BCD_NEG_ALT 0xB
2159 #define BCD_PLUS_ALT_2 0xE
2161 #if defined(HOST_WORDS_BIGENDIAN)
2162 #define BCD_DIG_BYTE(n) (15 - (n/2))
2163 #else
2164 #define BCD_DIG_BYTE(n) (n/2)
2165 #endif
2167 static int bcd_get_sgn(ppc_avr_t *bcd)
2169 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2170 case BCD_PLUS_PREF_1:
2171 case BCD_PLUS_PREF_2:
2172 case BCD_PLUS_ALT_1:
2173 case BCD_PLUS_ALT_2:
2175 return 1;
2178 case BCD_NEG_PREF:
2179 case BCD_NEG_ALT:
2181 return -1;
2184 default:
2186 return 0;
2191 static int bcd_preferred_sgn(int sgn, int ps)
2193 if (sgn >= 0) {
2194 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2195 } else {
2196 return BCD_NEG_PREF;
2200 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2202 uint8_t result;
2203 if (n & 1) {
2204 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2205 } else {
2206 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2209 if (unlikely(result > 9)) {
2210 *invalid = true;
2212 return result;
2215 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2217 if (n & 1) {
2218 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2219 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2220 } else {
2221 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2222 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2226 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2228 int i;
2229 int invalid = 0;
2230 for (i = 31; i > 0; i--) {
2231 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2232 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2233 if (unlikely(invalid)) {
2234 return 0; /* doesn't matter */
2235 } else if (dig_a > dig_b) {
2236 return 1;
2237 } else if (dig_a < dig_b) {
2238 return -1;
2242 return 0;
2245 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2246 int *overflow)
2248 int carry = 0;
2249 int i;
2250 int is_zero = 1;
2251 for (i = 1; i <= 31; i++) {
2252 uint8_t digit = bcd_get_digit(a, i, invalid) +
2253 bcd_get_digit(b, i, invalid) + carry;
2254 is_zero &= (digit == 0);
2255 if (digit > 9) {
2256 carry = 1;
2257 digit -= 10;
2258 } else {
2259 carry = 0;
2262 bcd_put_digit(t, digit, i);
2264 if (unlikely(*invalid)) {
2265 return -1;
2269 *overflow = carry;
2270 return is_zero;
2273 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2274 int *overflow)
2276 int carry = 0;
2277 int i;
2278 int is_zero = 1;
2279 for (i = 1; i <= 31; i++) {
2280 uint8_t digit = bcd_get_digit(a, i, invalid) -
2281 bcd_get_digit(b, i, invalid) + carry;
2282 is_zero &= (digit == 0);
2283 if (digit & 0x80) {
2284 carry = -1;
2285 digit += 10;
2286 } else {
2287 carry = 0;
2290 bcd_put_digit(t, digit, i);
2292 if (unlikely(*invalid)) {
2293 return -1;
2297 *overflow = carry;
2298 return is_zero;
2301 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2304 int sgna = bcd_get_sgn(a);
2305 int sgnb = bcd_get_sgn(b);
2306 int invalid = (sgna == 0) || (sgnb == 0);
2307 int overflow = 0;
2308 int zero = 0;
2309 uint32_t cr = 0;
2310 ppc_avr_t result = { .u64 = { 0, 0 } };
2312 if (!invalid) {
2313 if (sgna == sgnb) {
2314 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2315 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2316 cr = (sgna > 0) ? 4 : 8;
2317 } else if (bcd_cmp_mag(a, b) > 0) {
2318 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2319 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2320 cr = (sgna > 0) ? 4 : 8;
2321 } else {
2322 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2323 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2324 cr = (sgnb > 0) ? 4 : 8;
2328 if (unlikely(invalid)) {
2329 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2330 cr = 1;
2331 } else if (overflow) {
2332 cr |= 1;
2333 } else if (zero) {
2334 cr = 2;
2337 *r = result;
2339 return cr;
2342 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2344 ppc_avr_t bcopy = *b;
2345 int sgnb = bcd_get_sgn(b);
2346 if (sgnb < 0) {
2347 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2348 } else if (sgnb > 0) {
2349 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2351 /* else invalid ... defer to bcdadd code for proper handling */
2353 return helper_bcdadd(r, a, &bcopy, ps);
2356 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2358 int i;
2359 VECTOR_FOR_INORDER_I(i, u8) {
2360 r->u8[i] = AES_sbox[a->u8[i]];
2364 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2366 int i;
2368 VECTOR_FOR_INORDER_I(i, u32) {
2369 r->AVRW(i) = b->AVRW(i) ^
2370 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2371 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2372 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2373 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2377 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2379 int i;
2381 VECTOR_FOR_INORDER_I(i, u8) {
2382 r->AVRB(i) = b->AVRB(i) ^ (AES_Te4[a->AVRB(AES_shifts[i])] & 0xFF);
2386 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2388 /* This differs from what is written in ISA V2.07. The RTL is */
2389 /* incorrect and will be fixed in V2.07B. */
2390 int i;
2391 ppc_avr_t tmp;
2393 VECTOR_FOR_INORDER_I(i, u8) {
2394 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2397 VECTOR_FOR_INORDER_I(i, u32) {
2398 r->AVRW(i) =
2399 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2400 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2401 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2402 AES_imc[tmp.AVRB(4*i + 3)][3];
2406 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2408 int i;
2410 VECTOR_FOR_INORDER_I(i, u8) {
2411 r->AVRB(i) = b->AVRB(i) ^ (AES_Td4[a->AVRB(AES_ishifts[i])] & 0xFF);
2415 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2416 #if defined(HOST_WORDS_BIGENDIAN)
2417 #define EL_IDX(i) (i)
2418 #else
2419 #define EL_IDX(i) (3 - (i))
2420 #endif
2422 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2424 int st = (st_six & 0x10) != 0;
2425 int six = st_six & 0xF;
2426 int i;
2428 VECTOR_FOR_INORDER_I(i, u32) {
2429 if (st == 0) {
2430 if ((six & (0x8 >> i)) == 0) {
2431 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2432 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2433 (a->u32[EL_IDX(i)] >> 3);
2434 } else { /* six.bit[i] == 1 */
2435 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2436 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2437 (a->u32[EL_IDX(i)] >> 10);
2439 } else { /* st == 1 */
2440 if ((six & (0x8 >> i)) == 0) {
2441 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2442 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2443 ROTRu32(a->u32[EL_IDX(i)], 22);
2444 } else { /* six.bit[i] == 1 */
2445 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2446 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2447 ROTRu32(a->u32[EL_IDX(i)], 25);
2453 #undef ROTRu32
2454 #undef EL_IDX
2456 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2457 #if defined(HOST_WORDS_BIGENDIAN)
2458 #define EL_IDX(i) (i)
2459 #else
2460 #define EL_IDX(i) (1 - (i))
2461 #endif
2463 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2465 int st = (st_six & 0x10) != 0;
2466 int six = st_six & 0xF;
2467 int i;
2469 VECTOR_FOR_INORDER_I(i, u64) {
2470 if (st == 0) {
2471 if ((six & (0x8 >> (2*i))) == 0) {
2472 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2473 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2474 (a->u64[EL_IDX(i)] >> 7);
2475 } else { /* six.bit[2*i] == 1 */
2476 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2477 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2478 (a->u64[EL_IDX(i)] >> 6);
2480 } else { /* st == 1 */
2481 if ((six & (0x8 >> (2*i))) == 0) {
2482 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2483 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2484 ROTRu64(a->u64[EL_IDX(i)], 39);
2485 } else { /* six.bit[2*i] == 1 */
2486 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2487 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2488 ROTRu64(a->u64[EL_IDX(i)], 41);
2494 #undef ROTRu64
2495 #undef EL_IDX
2497 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2499 int i;
2500 VECTOR_FOR_INORDER_I(i, u8) {
2501 int indexA = c->u8[i] >> 4;
2502 int indexB = c->u8[i] & 0xF;
2503 #if defined(HOST_WORDS_BIGENDIAN)
2504 r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
2505 #else
2506 r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2507 #endif
2511 #undef VECTOR_FOR_INORDER_I
2512 #undef HI_IDX
2513 #undef LO_IDX
2515 /*****************************************************************************/
2516 /* SPE extension helpers */
2517 /* Use a table to make this quicker */
2518 static const uint8_t hbrev[16] = {
2519 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2520 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2523 static inline uint8_t byte_reverse(uint8_t val)
2525 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2528 static inline uint32_t word_reverse(uint32_t val)
2530 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2531 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2534 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2535 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2537 uint32_t a, b, d, mask;
2539 mask = UINT32_MAX >> (32 - MASKBITS);
2540 a = arg1 & mask;
2541 b = arg2 & mask;
2542 d = word_reverse(1 + word_reverse(a | ~b));
2543 return (arg1 & ~mask) | (d & b);
2546 uint32_t helper_cntlsw32(uint32_t val)
2548 if (val & 0x80000000) {
2549 return clz32(~val);
2550 } else {
2551 return clz32(val);
2555 uint32_t helper_cntlzw32(uint32_t val)
2557 return clz32(val);
2560 /* 440 specific */
2561 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2562 target_ulong low, uint32_t update_Rc)
2564 target_ulong mask;
2565 int i;
2567 i = 1;
2568 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2569 if ((high & mask) == 0) {
2570 if (update_Rc) {
2571 env->crf[0] = 0x4;
2573 goto done;
2575 i++;
2577 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2578 if ((low & mask) == 0) {
2579 if (update_Rc) {
2580 env->crf[0] = 0x8;
2582 goto done;
2584 i++;
2586 if (update_Rc) {
2587 env->crf[0] = 0x2;
2589 done:
2590 env->xer = (env->xer & ~0x7F) | i;
2591 if (update_Rc) {
2592 env->crf[0] |= xer_so;
2594 return i;