Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160614' into staging
[qemu/kevin.git] / target-ppc / int_helper.c
blob74453763d6dd7fca49f612f892473c0ac1d24681
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/exec-all.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
26 #include "helper_regs.h"
27 /*****************************************************************************/
28 /* Fixed point operations helpers */
30 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
33 uint64_t rt = 0;
34 int overflow = 0;
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
58 return (target_ulong)rt;
61 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
64 int64_t rt = 0;
65 int overflow = 0;
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
90 return (target_ulong)rt;
93 #if defined(TARGET_PPC64)
95 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 uint64_t rt = 0;
98 int overflow = 0;
100 overflow = divu128(&rt, &ra, rb);
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
114 return rt;
117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
128 if (oe) {
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
137 return rt;
140 #endif
143 target_ulong helper_cntlzw(target_ulong t)
145 return clz32(t);
148 #if defined(TARGET_PPC64)
149 target_ulong helper_cntlzd(target_ulong t)
151 return clz64(t);
153 #endif
155 #if defined(TARGET_PPC64)
157 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
159 int i;
160 uint64_t ra = 0;
162 for (i = 0; i < 8; i++) {
163 int index = (rs >> (i*8)) & 0xFF;
164 if (index < 64) {
165 if (rb & (1ull << (63-index))) {
166 ra |= 1 << i;
170 return ra;
173 #endif
175 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
177 target_ulong mask = 0xff;
178 target_ulong ra = 0;
179 int i;
181 for (i = 0; i < sizeof(target_ulong); i++) {
182 if ((rs & mask) == (rb & mask)) {
183 ra |= mask;
185 mask <<= 8;
187 return ra;
190 /* shift right arithmetic helper */
191 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
192 target_ulong shift)
194 int32_t ret;
196 if (likely(!(shift & 0x20))) {
197 if (likely((uint32_t)shift != 0)) {
198 shift &= 0x1f;
199 ret = (int32_t)value >> shift;
200 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
201 env->ca = 0;
202 } else {
203 env->ca = 1;
205 } else {
206 ret = (int32_t)value;
207 env->ca = 0;
209 } else {
210 ret = (int32_t)value >> 31;
211 env->ca = (ret != 0);
213 return (target_long)ret;
216 #if defined(TARGET_PPC64)
217 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
218 target_ulong shift)
220 int64_t ret;
222 if (likely(!(shift & 0x40))) {
223 if (likely((uint64_t)shift != 0)) {
224 shift &= 0x3f;
225 ret = (int64_t)value >> shift;
226 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
227 env->ca = 0;
228 } else {
229 env->ca = 1;
231 } else {
232 ret = (int64_t)value;
233 env->ca = 0;
235 } else {
236 ret = (int64_t)value >> 63;
237 env->ca = (ret != 0);
239 return ret;
241 #endif
243 #if defined(TARGET_PPC64)
244 target_ulong helper_popcntb(target_ulong val)
246 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
247 0x5555555555555555ULL);
248 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
249 0x3333333333333333ULL);
250 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
251 0x0f0f0f0f0f0f0f0fULL);
252 return val;
255 target_ulong helper_popcntw(target_ulong val)
257 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
258 0x5555555555555555ULL);
259 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
260 0x3333333333333333ULL);
261 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
262 0x0f0f0f0f0f0f0f0fULL);
263 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
264 0x00ff00ff00ff00ffULL);
265 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
266 0x0000ffff0000ffffULL);
267 return val;
270 target_ulong helper_popcntd(target_ulong val)
272 return ctpop64(val);
274 #else
275 target_ulong helper_popcntb(target_ulong val)
277 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
278 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
279 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
280 return val;
283 target_ulong helper_popcntw(target_ulong val)
285 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
286 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
287 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
288 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
289 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
290 return val;
292 #endif
294 /*****************************************************************************/
295 /* PowerPC 601 specific instructions (POWER bridge) */
296 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
298 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
300 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
301 (int32_t)arg2 == 0) {
302 env->spr[SPR_MQ] = 0;
303 return INT32_MIN;
304 } else {
305 env->spr[SPR_MQ] = tmp % arg2;
306 return tmp / (int32_t)arg2;
310 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
311 target_ulong arg2)
313 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
315 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
316 (int32_t)arg2 == 0) {
317 env->so = env->ov = 1;
318 env->spr[SPR_MQ] = 0;
319 return INT32_MIN;
320 } else {
321 env->spr[SPR_MQ] = tmp % arg2;
322 tmp /= (int32_t)arg2;
323 if ((int32_t)tmp != tmp) {
324 env->so = env->ov = 1;
325 } else {
326 env->ov = 0;
328 return tmp;
332 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
333 target_ulong arg2)
335 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
336 (int32_t)arg2 == 0) {
337 env->spr[SPR_MQ] = 0;
338 return INT32_MIN;
339 } else {
340 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
341 return (int32_t)arg1 / (int32_t)arg2;
345 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
346 target_ulong arg2)
348 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
349 (int32_t)arg2 == 0) {
350 env->so = env->ov = 1;
351 env->spr[SPR_MQ] = 0;
352 return INT32_MIN;
353 } else {
354 env->ov = 0;
355 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
356 return (int32_t)arg1 / (int32_t)arg2;
360 /*****************************************************************************/
361 /* 602 specific instructions */
362 /* mfrom is the most crazy instruction ever seen, imho ! */
363 /* Real implementation uses a ROM table. Do the same */
364 /* Extremely decomposed:
365 * -arg / 256
366 * return 256 * log10(10 + 1.0) + 0.5
368 #if !defined(CONFIG_USER_ONLY)
369 target_ulong helper_602_mfrom(target_ulong arg)
371 if (likely(arg < 602)) {
372 #include "mfrom_table.c"
373 return mfrom_ROM_table[arg];
374 } else {
375 return 0;
378 #endif
380 /*****************************************************************************/
381 /* Altivec extension helpers */
382 #if defined(HOST_WORDS_BIGENDIAN)
383 #define HI_IDX 0
384 #define LO_IDX 1
385 #define AVRB(i) u8[i]
386 #define AVRW(i) u32[i]
387 #else
388 #define HI_IDX 1
389 #define LO_IDX 0
390 #define AVRB(i) u8[15-(i)]
391 #define AVRW(i) u32[3-(i)]
392 #endif
394 #if defined(HOST_WORDS_BIGENDIAN)
395 #define VECTOR_FOR_INORDER_I(index, element) \
396 for (index = 0; index < ARRAY_SIZE(r->element); index++)
397 #else
398 #define VECTOR_FOR_INORDER_I(index, element) \
399 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
400 #endif
402 /* Saturating arithmetic helpers. */
403 #define SATCVT(from, to, from_type, to_type, min, max) \
404 static inline to_type cvt##from##to(from_type x, int *sat) \
406 to_type r; \
408 if (x < (from_type)min) { \
409 r = min; \
410 *sat = 1; \
411 } else if (x > (from_type)max) { \
412 r = max; \
413 *sat = 1; \
414 } else { \
415 r = x; \
417 return r; \
419 #define SATCVTU(from, to, from_type, to_type, min, max) \
420 static inline to_type cvt##from##to(from_type x, int *sat) \
422 to_type r; \
424 if (x > (from_type)max) { \
425 r = max; \
426 *sat = 1; \
427 } else { \
428 r = x; \
430 return r; \
432 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
433 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
434 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
436 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
437 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
438 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
439 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
440 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
441 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
442 #undef SATCVT
443 #undef SATCVTU
445 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
447 int i, j = (sh & 0xf);
449 VECTOR_FOR_INORDER_I(i, u8) {
450 r->u8[i] = j++;
454 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
456 int i, j = 0x10 - (sh & 0xf);
458 VECTOR_FOR_INORDER_I(i, u8) {
459 r->u8[i] = j++;
463 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
465 #if defined(HOST_WORDS_BIGENDIAN)
466 env->vscr = r->u32[3];
467 #else
468 env->vscr = r->u32[0];
469 #endif
470 set_flush_to_zero(vscr_nj, &env->vec_status);
473 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
475 int i;
477 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
478 r->u32[i] = ~a->u32[i] < b->u32[i];
482 #define VARITH_DO(name, op, element) \
483 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
485 int i; \
487 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
488 r->element[i] = a->element[i] op b->element[i]; \
491 #define VARITH(suffix, element) \
492 VARITH_DO(add##suffix, +, element) \
493 VARITH_DO(sub##suffix, -, element)
494 VARITH(ubm, u8)
495 VARITH(uhm, u16)
496 VARITH(uwm, u32)
497 VARITH(udm, u64)
498 VARITH_DO(muluwm, *, u32)
499 #undef VARITH_DO
500 #undef VARITH
502 #define VARITHFP(suffix, func) \
503 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
504 ppc_avr_t *b) \
506 int i; \
508 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
509 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
512 VARITHFP(addfp, float32_add)
513 VARITHFP(subfp, float32_sub)
514 VARITHFP(minfp, float32_min)
515 VARITHFP(maxfp, float32_max)
516 #undef VARITHFP
518 #define VARITHFPFMA(suffix, type) \
519 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
520 ppc_avr_t *b, ppc_avr_t *c) \
522 int i; \
523 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
524 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
525 type, &env->vec_status); \
528 VARITHFPFMA(maddfp, 0);
529 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
530 #undef VARITHFPFMA
532 #define VARITHSAT_CASE(type, op, cvt, element) \
534 type result = (type)a->element[i] op (type)b->element[i]; \
535 r->element[i] = cvt(result, &sat); \
538 #define VARITHSAT_DO(name, op, optype, cvt, element) \
539 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
540 ppc_avr_t *b) \
542 int sat = 0; \
543 int i; \
545 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
546 switch (sizeof(r->element[0])) { \
547 case 1: \
548 VARITHSAT_CASE(optype, op, cvt, element); \
549 break; \
550 case 2: \
551 VARITHSAT_CASE(optype, op, cvt, element); \
552 break; \
553 case 4: \
554 VARITHSAT_CASE(optype, op, cvt, element); \
555 break; \
558 if (sat) { \
559 env->vscr |= (1 << VSCR_SAT); \
562 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
563 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
564 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
565 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
566 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
567 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
568 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
569 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
570 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
571 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
572 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
573 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
574 #undef VARITHSAT_CASE
575 #undef VARITHSAT_DO
576 #undef VARITHSAT_SIGNED
577 #undef VARITHSAT_UNSIGNED
579 #define VAVG_DO(name, element, etype) \
580 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
582 int i; \
584 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
585 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
586 r->element[i] = x >> 1; \
590 #define VAVG(type, signed_element, signed_type, unsigned_element, \
591 unsigned_type) \
592 VAVG_DO(avgs##type, signed_element, signed_type) \
593 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
594 VAVG(b, s8, int16_t, u8, uint16_t)
595 VAVG(h, s16, int32_t, u16, uint32_t)
596 VAVG(w, s32, int64_t, u32, uint64_t)
597 #undef VAVG_DO
598 #undef VAVG
600 #define VCF(suffix, cvt, element) \
601 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
602 ppc_avr_t *b, uint32_t uim) \
604 int i; \
606 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
607 float32 t = cvt(b->element[i], &env->vec_status); \
608 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
611 VCF(ux, uint32_to_float32, u32)
612 VCF(sx, int32_to_float32, s32)
613 #undef VCF
615 #define VCMP_DO(suffix, compare, element, record) \
616 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
617 ppc_avr_t *a, ppc_avr_t *b) \
619 uint64_t ones = (uint64_t)-1; \
620 uint64_t all = ones; \
621 uint64_t none = 0; \
622 int i; \
624 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
625 uint64_t result = (a->element[i] compare b->element[i] ? \
626 ones : 0x0); \
627 switch (sizeof(a->element[0])) { \
628 case 8: \
629 r->u64[i] = result; \
630 break; \
631 case 4: \
632 r->u32[i] = result; \
633 break; \
634 case 2: \
635 r->u16[i] = result; \
636 break; \
637 case 1: \
638 r->u8[i] = result; \
639 break; \
641 all &= result; \
642 none |= result; \
644 if (record) { \
645 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
648 #define VCMP(suffix, compare, element) \
649 VCMP_DO(suffix, compare, element, 0) \
650 VCMP_DO(suffix##_dot, compare, element, 1)
651 VCMP(equb, ==, u8)
652 VCMP(equh, ==, u16)
653 VCMP(equw, ==, u32)
654 VCMP(equd, ==, u64)
655 VCMP(gtub, >, u8)
656 VCMP(gtuh, >, u16)
657 VCMP(gtuw, >, u32)
658 VCMP(gtud, >, u64)
659 VCMP(gtsb, >, s8)
660 VCMP(gtsh, >, s16)
661 VCMP(gtsw, >, s32)
662 VCMP(gtsd, >, s64)
663 #undef VCMP_DO
664 #undef VCMP
666 #define VCMPFP_DO(suffix, compare, order, record) \
667 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
668 ppc_avr_t *a, ppc_avr_t *b) \
670 uint32_t ones = (uint32_t)-1; \
671 uint32_t all = ones; \
672 uint32_t none = 0; \
673 int i; \
675 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
676 uint32_t result; \
677 int rel = float32_compare_quiet(a->f[i], b->f[i], \
678 &env->vec_status); \
679 if (rel == float_relation_unordered) { \
680 result = 0; \
681 } else if (rel compare order) { \
682 result = ones; \
683 } else { \
684 result = 0; \
686 r->u32[i] = result; \
687 all &= result; \
688 none |= result; \
690 if (record) { \
691 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
694 #define VCMPFP(suffix, compare, order) \
695 VCMPFP_DO(suffix, compare, order, 0) \
696 VCMPFP_DO(suffix##_dot, compare, order, 1)
697 VCMPFP(eqfp, ==, float_relation_equal)
698 VCMPFP(gefp, !=, float_relation_less)
699 VCMPFP(gtfp, ==, float_relation_greater)
700 #undef VCMPFP_DO
701 #undef VCMPFP
703 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
704 ppc_avr_t *a, ppc_avr_t *b, int record)
706 int i;
707 int all_in = 0;
709 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
710 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
711 if (le_rel == float_relation_unordered) {
712 r->u32[i] = 0xc0000000;
713 all_in = 1;
714 } else {
715 float32 bneg = float32_chs(b->f[i]);
716 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
717 int le = le_rel != float_relation_greater;
718 int ge = ge_rel != float_relation_less;
720 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
721 all_in |= (!le | !ge);
724 if (record) {
725 env->crf[6] = (all_in == 0) << 1;
729 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
731 vcmpbfp_internal(env, r, a, b, 0);
734 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
735 ppc_avr_t *b)
737 vcmpbfp_internal(env, r, a, b, 1);
740 #define VCT(suffix, satcvt, element) \
741 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
742 ppc_avr_t *b, uint32_t uim) \
744 int i; \
745 int sat = 0; \
746 float_status s = env->vec_status; \
748 set_float_rounding_mode(float_round_to_zero, &s); \
749 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
750 if (float32_is_any_nan(b->f[i])) { \
751 r->element[i] = 0; \
752 } else { \
753 float64 t = float32_to_float64(b->f[i], &s); \
754 int64_t j; \
756 t = float64_scalbn(t, uim, &s); \
757 j = float64_to_int64(t, &s); \
758 r->element[i] = satcvt(j, &sat); \
761 if (sat) { \
762 env->vscr |= (1 << VSCR_SAT); \
765 VCT(uxs, cvtsduw, u32)
766 VCT(sxs, cvtsdsw, s32)
767 #undef VCT
769 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
770 ppc_avr_t *b, ppc_avr_t *c)
772 int sat = 0;
773 int i;
775 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
776 int32_t prod = a->s16[i] * b->s16[i];
777 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
779 r->s16[i] = cvtswsh(t, &sat);
782 if (sat) {
783 env->vscr |= (1 << VSCR_SAT);
787 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
788 ppc_avr_t *b, ppc_avr_t *c)
790 int sat = 0;
791 int i;
793 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
794 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
795 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
796 r->s16[i] = cvtswsh(t, &sat);
799 if (sat) {
800 env->vscr |= (1 << VSCR_SAT);
804 #define VMINMAX_DO(name, compare, element) \
805 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
807 int i; \
809 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
810 if (a->element[i] compare b->element[i]) { \
811 r->element[i] = b->element[i]; \
812 } else { \
813 r->element[i] = a->element[i]; \
817 #define VMINMAX(suffix, element) \
818 VMINMAX_DO(min##suffix, >, element) \
819 VMINMAX_DO(max##suffix, <, element)
820 VMINMAX(sb, s8)
821 VMINMAX(sh, s16)
822 VMINMAX(sw, s32)
823 VMINMAX(sd, s64)
824 VMINMAX(ub, u8)
825 VMINMAX(uh, u16)
826 VMINMAX(uw, u32)
827 VMINMAX(ud, u64)
828 #undef VMINMAX_DO
829 #undef VMINMAX
831 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
833 int i;
835 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
836 int32_t prod = a->s16[i] * b->s16[i];
837 r->s16[i] = (int16_t) (prod + c->s16[i]);
841 #define VMRG_DO(name, element, highp) \
842 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
844 ppc_avr_t result; \
845 int i; \
846 size_t n_elems = ARRAY_SIZE(r->element); \
848 for (i = 0; i < n_elems / 2; i++) { \
849 if (highp) { \
850 result.element[i*2+HI_IDX] = a->element[i]; \
851 result.element[i*2+LO_IDX] = b->element[i]; \
852 } else { \
853 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
854 b->element[n_elems - i - 1]; \
855 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
856 a->element[n_elems - i - 1]; \
859 *r = result; \
861 #if defined(HOST_WORDS_BIGENDIAN)
862 #define MRGHI 0
863 #define MRGLO 1
864 #else
865 #define MRGHI 1
866 #define MRGLO 0
867 #endif
868 #define VMRG(suffix, element) \
869 VMRG_DO(mrgl##suffix, element, MRGHI) \
870 VMRG_DO(mrgh##suffix, element, MRGLO)
871 VMRG(b, u8)
872 VMRG(h, u16)
873 VMRG(w, u32)
874 #undef VMRG_DO
875 #undef VMRG
876 #undef MRGHI
877 #undef MRGLO
879 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
880 ppc_avr_t *b, ppc_avr_t *c)
882 int32_t prod[16];
883 int i;
885 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
886 prod[i] = (int32_t)a->s8[i] * b->u8[i];
889 VECTOR_FOR_INORDER_I(i, s32) {
890 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
891 prod[4 * i + 2] + prod[4 * i + 3];
895 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
896 ppc_avr_t *b, ppc_avr_t *c)
898 int32_t prod[8];
899 int i;
901 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
902 prod[i] = a->s16[i] * b->s16[i];
905 VECTOR_FOR_INORDER_I(i, s32) {
906 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
910 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
911 ppc_avr_t *b, ppc_avr_t *c)
913 int32_t prod[8];
914 int i;
915 int sat = 0;
917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
918 prod[i] = (int32_t)a->s16[i] * b->s16[i];
921 VECTOR_FOR_INORDER_I(i, s32) {
922 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
924 r->u32[i] = cvtsdsw(t, &sat);
927 if (sat) {
928 env->vscr |= (1 << VSCR_SAT);
932 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
933 ppc_avr_t *b, ppc_avr_t *c)
935 uint16_t prod[16];
936 int i;
938 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
939 prod[i] = a->u8[i] * b->u8[i];
942 VECTOR_FOR_INORDER_I(i, u32) {
943 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
944 prod[4 * i + 2] + prod[4 * i + 3];
948 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
949 ppc_avr_t *b, ppc_avr_t *c)
951 uint32_t prod[8];
952 int i;
954 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
955 prod[i] = a->u16[i] * b->u16[i];
958 VECTOR_FOR_INORDER_I(i, u32) {
959 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
963 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
964 ppc_avr_t *b, ppc_avr_t *c)
966 uint32_t prod[8];
967 int i;
968 int sat = 0;
970 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
971 prod[i] = a->u16[i] * b->u16[i];
974 VECTOR_FOR_INORDER_I(i, s32) {
975 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
977 r->u32[i] = cvtuduw(t, &sat);
980 if (sat) {
981 env->vscr |= (1 << VSCR_SAT);
985 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
986 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
988 int i; \
990 VECTOR_FOR_INORDER_I(i, prod_element) { \
991 if (evenp) { \
992 r->prod_element[i] = \
993 (cast)a->mul_element[i * 2 + HI_IDX] * \
994 (cast)b->mul_element[i * 2 + HI_IDX]; \
995 } else { \
996 r->prod_element[i] = \
997 (cast)a->mul_element[i * 2 + LO_IDX] * \
998 (cast)b->mul_element[i * 2 + LO_IDX]; \
1002 #define VMUL(suffix, mul_element, prod_element, cast) \
1003 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1004 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1005 VMUL(sb, s8, s16, int16_t)
1006 VMUL(sh, s16, s32, int32_t)
1007 VMUL(sw, s32, s64, int64_t)
1008 VMUL(ub, u8, u16, uint16_t)
1009 VMUL(uh, u16, u32, uint32_t)
1010 VMUL(uw, u32, u64, uint64_t)
1011 #undef VMUL_DO
1012 #undef VMUL
1014 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1015 ppc_avr_t *c)
1017 ppc_avr_t result;
1018 int i;
1020 VECTOR_FOR_INORDER_I(i, u8) {
1021 int s = c->u8[i] & 0x1f;
1022 #if defined(HOST_WORDS_BIGENDIAN)
1023 int index = s & 0xf;
1024 #else
1025 int index = 15 - (s & 0xf);
1026 #endif
1028 if (s & 0x10) {
1029 result.u8[i] = b->u8[index];
1030 } else {
1031 result.u8[i] = a->u8[index];
1034 *r = result;
1037 #if defined(HOST_WORDS_BIGENDIAN)
1038 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1039 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1040 #else
1041 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1042 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1043 #endif
1045 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1047 int i;
1048 uint64_t perm = 0;
1050 VECTOR_FOR_INORDER_I(i, u8) {
1051 int index = VBPERMQ_INDEX(b, i);
1053 if (index < 128) {
1054 uint64_t mask = (1ull << (63-(index & 0x3F)));
1055 if (a->u64[VBPERMQ_DW(index)] & mask) {
1056 perm |= (0x8000 >> i);
1061 r->u64[HI_IDX] = perm;
1062 r->u64[LO_IDX] = 0;
1065 #undef VBPERMQ_INDEX
1066 #undef VBPERMQ_DW
1068 static const uint64_t VGBBD_MASKS[256] = {
1069 0x0000000000000000ull, /* 00 */
1070 0x0000000000000080ull, /* 01 */
1071 0x0000000000008000ull, /* 02 */
1072 0x0000000000008080ull, /* 03 */
1073 0x0000000000800000ull, /* 04 */
1074 0x0000000000800080ull, /* 05 */
1075 0x0000000000808000ull, /* 06 */
1076 0x0000000000808080ull, /* 07 */
1077 0x0000000080000000ull, /* 08 */
1078 0x0000000080000080ull, /* 09 */
1079 0x0000000080008000ull, /* 0A */
1080 0x0000000080008080ull, /* 0B */
1081 0x0000000080800000ull, /* 0C */
1082 0x0000000080800080ull, /* 0D */
1083 0x0000000080808000ull, /* 0E */
1084 0x0000000080808080ull, /* 0F */
1085 0x0000008000000000ull, /* 10 */
1086 0x0000008000000080ull, /* 11 */
1087 0x0000008000008000ull, /* 12 */
1088 0x0000008000008080ull, /* 13 */
1089 0x0000008000800000ull, /* 14 */
1090 0x0000008000800080ull, /* 15 */
1091 0x0000008000808000ull, /* 16 */
1092 0x0000008000808080ull, /* 17 */
1093 0x0000008080000000ull, /* 18 */
1094 0x0000008080000080ull, /* 19 */
1095 0x0000008080008000ull, /* 1A */
1096 0x0000008080008080ull, /* 1B */
1097 0x0000008080800000ull, /* 1C */
1098 0x0000008080800080ull, /* 1D */
1099 0x0000008080808000ull, /* 1E */
1100 0x0000008080808080ull, /* 1F */
1101 0x0000800000000000ull, /* 20 */
1102 0x0000800000000080ull, /* 21 */
1103 0x0000800000008000ull, /* 22 */
1104 0x0000800000008080ull, /* 23 */
1105 0x0000800000800000ull, /* 24 */
1106 0x0000800000800080ull, /* 25 */
1107 0x0000800000808000ull, /* 26 */
1108 0x0000800000808080ull, /* 27 */
1109 0x0000800080000000ull, /* 28 */
1110 0x0000800080000080ull, /* 29 */
1111 0x0000800080008000ull, /* 2A */
1112 0x0000800080008080ull, /* 2B */
1113 0x0000800080800000ull, /* 2C */
1114 0x0000800080800080ull, /* 2D */
1115 0x0000800080808000ull, /* 2E */
1116 0x0000800080808080ull, /* 2F */
1117 0x0000808000000000ull, /* 30 */
1118 0x0000808000000080ull, /* 31 */
1119 0x0000808000008000ull, /* 32 */
1120 0x0000808000008080ull, /* 33 */
1121 0x0000808000800000ull, /* 34 */
1122 0x0000808000800080ull, /* 35 */
1123 0x0000808000808000ull, /* 36 */
1124 0x0000808000808080ull, /* 37 */
1125 0x0000808080000000ull, /* 38 */
1126 0x0000808080000080ull, /* 39 */
1127 0x0000808080008000ull, /* 3A */
1128 0x0000808080008080ull, /* 3B */
1129 0x0000808080800000ull, /* 3C */
1130 0x0000808080800080ull, /* 3D */
1131 0x0000808080808000ull, /* 3E */
1132 0x0000808080808080ull, /* 3F */
1133 0x0080000000000000ull, /* 40 */
1134 0x0080000000000080ull, /* 41 */
1135 0x0080000000008000ull, /* 42 */
1136 0x0080000000008080ull, /* 43 */
1137 0x0080000000800000ull, /* 44 */
1138 0x0080000000800080ull, /* 45 */
1139 0x0080000000808000ull, /* 46 */
1140 0x0080000000808080ull, /* 47 */
1141 0x0080000080000000ull, /* 48 */
1142 0x0080000080000080ull, /* 49 */
1143 0x0080000080008000ull, /* 4A */
1144 0x0080000080008080ull, /* 4B */
1145 0x0080000080800000ull, /* 4C */
1146 0x0080000080800080ull, /* 4D */
1147 0x0080000080808000ull, /* 4E */
1148 0x0080000080808080ull, /* 4F */
1149 0x0080008000000000ull, /* 50 */
1150 0x0080008000000080ull, /* 51 */
1151 0x0080008000008000ull, /* 52 */
1152 0x0080008000008080ull, /* 53 */
1153 0x0080008000800000ull, /* 54 */
1154 0x0080008000800080ull, /* 55 */
1155 0x0080008000808000ull, /* 56 */
1156 0x0080008000808080ull, /* 57 */
1157 0x0080008080000000ull, /* 58 */
1158 0x0080008080000080ull, /* 59 */
1159 0x0080008080008000ull, /* 5A */
1160 0x0080008080008080ull, /* 5B */
1161 0x0080008080800000ull, /* 5C */
1162 0x0080008080800080ull, /* 5D */
1163 0x0080008080808000ull, /* 5E */
1164 0x0080008080808080ull, /* 5F */
1165 0x0080800000000000ull, /* 60 */
1166 0x0080800000000080ull, /* 61 */
1167 0x0080800000008000ull, /* 62 */
1168 0x0080800000008080ull, /* 63 */
1169 0x0080800000800000ull, /* 64 */
1170 0x0080800000800080ull, /* 65 */
1171 0x0080800000808000ull, /* 66 */
1172 0x0080800000808080ull, /* 67 */
1173 0x0080800080000000ull, /* 68 */
1174 0x0080800080000080ull, /* 69 */
1175 0x0080800080008000ull, /* 6A */
1176 0x0080800080008080ull, /* 6B */
1177 0x0080800080800000ull, /* 6C */
1178 0x0080800080800080ull, /* 6D */
1179 0x0080800080808000ull, /* 6E */
1180 0x0080800080808080ull, /* 6F */
1181 0x0080808000000000ull, /* 70 */
1182 0x0080808000000080ull, /* 71 */
1183 0x0080808000008000ull, /* 72 */
1184 0x0080808000008080ull, /* 73 */
1185 0x0080808000800000ull, /* 74 */
1186 0x0080808000800080ull, /* 75 */
1187 0x0080808000808000ull, /* 76 */
1188 0x0080808000808080ull, /* 77 */
1189 0x0080808080000000ull, /* 78 */
1190 0x0080808080000080ull, /* 79 */
1191 0x0080808080008000ull, /* 7A */
1192 0x0080808080008080ull, /* 7B */
1193 0x0080808080800000ull, /* 7C */
1194 0x0080808080800080ull, /* 7D */
1195 0x0080808080808000ull, /* 7E */
1196 0x0080808080808080ull, /* 7F */
1197 0x8000000000000000ull, /* 80 */
1198 0x8000000000000080ull, /* 81 */
1199 0x8000000000008000ull, /* 82 */
1200 0x8000000000008080ull, /* 83 */
1201 0x8000000000800000ull, /* 84 */
1202 0x8000000000800080ull, /* 85 */
1203 0x8000000000808000ull, /* 86 */
1204 0x8000000000808080ull, /* 87 */
1205 0x8000000080000000ull, /* 88 */
1206 0x8000000080000080ull, /* 89 */
1207 0x8000000080008000ull, /* 8A */
1208 0x8000000080008080ull, /* 8B */
1209 0x8000000080800000ull, /* 8C */
1210 0x8000000080800080ull, /* 8D */
1211 0x8000000080808000ull, /* 8E */
1212 0x8000000080808080ull, /* 8F */
1213 0x8000008000000000ull, /* 90 */
1214 0x8000008000000080ull, /* 91 */
1215 0x8000008000008000ull, /* 92 */
1216 0x8000008000008080ull, /* 93 */
1217 0x8000008000800000ull, /* 94 */
1218 0x8000008000800080ull, /* 95 */
1219 0x8000008000808000ull, /* 96 */
1220 0x8000008000808080ull, /* 97 */
1221 0x8000008080000000ull, /* 98 */
1222 0x8000008080000080ull, /* 99 */
1223 0x8000008080008000ull, /* 9A */
1224 0x8000008080008080ull, /* 9B */
1225 0x8000008080800000ull, /* 9C */
1226 0x8000008080800080ull, /* 9D */
1227 0x8000008080808000ull, /* 9E */
1228 0x8000008080808080ull, /* 9F */
1229 0x8000800000000000ull, /* A0 */
1230 0x8000800000000080ull, /* A1 */
1231 0x8000800000008000ull, /* A2 */
1232 0x8000800000008080ull, /* A3 */
1233 0x8000800000800000ull, /* A4 */
1234 0x8000800000800080ull, /* A5 */
1235 0x8000800000808000ull, /* A6 */
1236 0x8000800000808080ull, /* A7 */
1237 0x8000800080000000ull, /* A8 */
1238 0x8000800080000080ull, /* A9 */
1239 0x8000800080008000ull, /* AA */
1240 0x8000800080008080ull, /* AB */
1241 0x8000800080800000ull, /* AC */
1242 0x8000800080800080ull, /* AD */
1243 0x8000800080808000ull, /* AE */
1244 0x8000800080808080ull, /* AF */
1245 0x8000808000000000ull, /* B0 */
1246 0x8000808000000080ull, /* B1 */
1247 0x8000808000008000ull, /* B2 */
1248 0x8000808000008080ull, /* B3 */
1249 0x8000808000800000ull, /* B4 */
1250 0x8000808000800080ull, /* B5 */
1251 0x8000808000808000ull, /* B6 */
1252 0x8000808000808080ull, /* B7 */
1253 0x8000808080000000ull, /* B8 */
1254 0x8000808080000080ull, /* B9 */
1255 0x8000808080008000ull, /* BA */
1256 0x8000808080008080ull, /* BB */
1257 0x8000808080800000ull, /* BC */
1258 0x8000808080800080ull, /* BD */
1259 0x8000808080808000ull, /* BE */
1260 0x8000808080808080ull, /* BF */
1261 0x8080000000000000ull, /* C0 */
1262 0x8080000000000080ull, /* C1 */
1263 0x8080000000008000ull, /* C2 */
1264 0x8080000000008080ull, /* C3 */
1265 0x8080000000800000ull, /* C4 */
1266 0x8080000000800080ull, /* C5 */
1267 0x8080000000808000ull, /* C6 */
1268 0x8080000000808080ull, /* C7 */
1269 0x8080000080000000ull, /* C8 */
1270 0x8080000080000080ull, /* C9 */
1271 0x8080000080008000ull, /* CA */
1272 0x8080000080008080ull, /* CB */
1273 0x8080000080800000ull, /* CC */
1274 0x8080000080800080ull, /* CD */
1275 0x8080000080808000ull, /* CE */
1276 0x8080000080808080ull, /* CF */
1277 0x8080008000000000ull, /* D0 */
1278 0x8080008000000080ull, /* D1 */
1279 0x8080008000008000ull, /* D2 */
1280 0x8080008000008080ull, /* D3 */
1281 0x8080008000800000ull, /* D4 */
1282 0x8080008000800080ull, /* D5 */
1283 0x8080008000808000ull, /* D6 */
1284 0x8080008000808080ull, /* D7 */
1285 0x8080008080000000ull, /* D8 */
1286 0x8080008080000080ull, /* D9 */
1287 0x8080008080008000ull, /* DA */
1288 0x8080008080008080ull, /* DB */
1289 0x8080008080800000ull, /* DC */
1290 0x8080008080800080ull, /* DD */
1291 0x8080008080808000ull, /* DE */
1292 0x8080008080808080ull, /* DF */
1293 0x8080800000000000ull, /* E0 */
1294 0x8080800000000080ull, /* E1 */
1295 0x8080800000008000ull, /* E2 */
1296 0x8080800000008080ull, /* E3 */
1297 0x8080800000800000ull, /* E4 */
1298 0x8080800000800080ull, /* E5 */
1299 0x8080800000808000ull, /* E6 */
1300 0x8080800000808080ull, /* E7 */
1301 0x8080800080000000ull, /* E8 */
1302 0x8080800080000080ull, /* E9 */
1303 0x8080800080008000ull, /* EA */
1304 0x8080800080008080ull, /* EB */
1305 0x8080800080800000ull, /* EC */
1306 0x8080800080800080ull, /* ED */
1307 0x8080800080808000ull, /* EE */
1308 0x8080800080808080ull, /* EF */
1309 0x8080808000000000ull, /* F0 */
1310 0x8080808000000080ull, /* F1 */
1311 0x8080808000008000ull, /* F2 */
1312 0x8080808000008080ull, /* F3 */
1313 0x8080808000800000ull, /* F4 */
1314 0x8080808000800080ull, /* F5 */
1315 0x8080808000808000ull, /* F6 */
1316 0x8080808000808080ull, /* F7 */
1317 0x8080808080000000ull, /* F8 */
1318 0x8080808080000080ull, /* F9 */
1319 0x8080808080008000ull, /* FA */
1320 0x8080808080008080ull, /* FB */
1321 0x8080808080800000ull, /* FC */
1322 0x8080808080800080ull, /* FD */
1323 0x8080808080808000ull, /* FE */
1324 0x8080808080808080ull, /* FF */
1327 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1329 int i;
1330 uint64_t t[2] = { 0, 0 };
1332 VECTOR_FOR_INORDER_I(i, u8) {
1333 #if defined(HOST_WORDS_BIGENDIAN)
1334 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1335 #else
1336 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1337 #endif
1340 r->u64[0] = t[0];
1341 r->u64[1] = t[1];
1344 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1345 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1347 int i, j; \
1348 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1350 VECTOR_FOR_INORDER_I(i, srcfld) { \
1351 prod[i] = 0; \
1352 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1353 if (a->srcfld[i] & (1ull<<j)) { \
1354 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1359 VECTOR_FOR_INORDER_I(i, trgfld) { \
1360 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1364 PMSUM(vpmsumb, u8, u16, uint16_t)
1365 PMSUM(vpmsumh, u16, u32, uint32_t)
1366 PMSUM(vpmsumw, u32, u64, uint64_t)
1368 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1371 #ifdef CONFIG_INT128
1372 int i, j;
1373 __uint128_t prod[2];
1375 VECTOR_FOR_INORDER_I(i, u64) {
1376 prod[i] = 0;
1377 for (j = 0; j < 64; j++) {
1378 if (a->u64[i] & (1ull<<j)) {
1379 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1384 r->u128 = prod[0] ^ prod[1];
1386 #else
1387 int i, j;
1388 ppc_avr_t prod[2];
1390 VECTOR_FOR_INORDER_I(i, u64) {
1391 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1392 for (j = 0; j < 64; j++) {
1393 if (a->u64[i] & (1ull<<j)) {
1394 ppc_avr_t bshift;
1395 if (j == 0) {
1396 bshift.u64[HI_IDX] = 0;
1397 bshift.u64[LO_IDX] = b->u64[i];
1398 } else {
1399 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1400 bshift.u64[LO_IDX] = b->u64[i] << j;
1402 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1403 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1408 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1409 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1410 #endif
1414 #if defined(HOST_WORDS_BIGENDIAN)
1415 #define PKBIG 1
1416 #else
1417 #define PKBIG 0
1418 #endif
1419 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1421 int i, j;
1422 ppc_avr_t result;
1423 #if defined(HOST_WORDS_BIGENDIAN)
1424 const ppc_avr_t *x[2] = { a, b };
1425 #else
1426 const ppc_avr_t *x[2] = { b, a };
1427 #endif
1429 VECTOR_FOR_INORDER_I(i, u64) {
1430 VECTOR_FOR_INORDER_I(j, u32) {
1431 uint32_t e = x[i]->u32[j];
1433 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1434 ((e >> 6) & 0x3e0) |
1435 ((e >> 3) & 0x1f));
1438 *r = result;
1441 #define VPK(suffix, from, to, cvt, dosat) \
1442 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1443 ppc_avr_t *a, ppc_avr_t *b) \
1445 int i; \
1446 int sat = 0; \
1447 ppc_avr_t result; \
1448 ppc_avr_t *a0 = PKBIG ? a : b; \
1449 ppc_avr_t *a1 = PKBIG ? b : a; \
1451 VECTOR_FOR_INORDER_I(i, from) { \
1452 result.to[i] = cvt(a0->from[i], &sat); \
1453 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1455 *r = result; \
1456 if (dosat && sat) { \
1457 env->vscr |= (1 << VSCR_SAT); \
1460 #define I(x, y) (x)
1461 VPK(shss, s16, s8, cvtshsb, 1)
1462 VPK(shus, s16, u8, cvtshub, 1)
1463 VPK(swss, s32, s16, cvtswsh, 1)
1464 VPK(swus, s32, u16, cvtswuh, 1)
1465 VPK(sdss, s64, s32, cvtsdsw, 1)
1466 VPK(sdus, s64, u32, cvtsduw, 1)
1467 VPK(uhus, u16, u8, cvtuhub, 1)
1468 VPK(uwus, u32, u16, cvtuwuh, 1)
1469 VPK(udus, u64, u32, cvtuduw, 1)
1470 VPK(uhum, u16, u8, I, 0)
1471 VPK(uwum, u32, u16, I, 0)
1472 VPK(udum, u64, u32, I, 0)
1473 #undef I
1474 #undef VPK
1475 #undef PKBIG
1477 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1479 int i;
1481 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1482 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1486 #define VRFI(suffix, rounding) \
1487 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1488 ppc_avr_t *b) \
1490 int i; \
1491 float_status s = env->vec_status; \
1493 set_float_rounding_mode(rounding, &s); \
1494 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1495 r->f[i] = float32_round_to_int (b->f[i], &s); \
1498 VRFI(n, float_round_nearest_even)
1499 VRFI(m, float_round_down)
1500 VRFI(p, float_round_up)
1501 VRFI(z, float_round_to_zero)
1502 #undef VRFI
1504 #define VROTATE(suffix, element, mask) \
1505 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1507 int i; \
1509 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1510 unsigned int shift = b->element[i] & mask; \
1511 r->element[i] = (a->element[i] << shift) | \
1512 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1515 VROTATE(b, u8, 0x7)
1516 VROTATE(h, u16, 0xF)
1517 VROTATE(w, u32, 0x1F)
1518 VROTATE(d, u64, 0x3F)
1519 #undef VROTATE
1521 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1523 int i;
1525 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1526 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1528 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1532 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1533 ppc_avr_t *c)
1535 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1536 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1539 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1541 int i;
1543 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1544 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1548 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1550 int i;
1552 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1553 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1557 /* The specification says that the results are undefined if all of the
1558 * shift counts are not identical. We check to make sure that they are
1559 * to conform to what real hardware appears to do. */
1560 #define VSHIFT(suffix, leftp) \
1561 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1563 int shift = b->u8[LO_IDX*15] & 0x7; \
1564 int doit = 1; \
1565 int i; \
1567 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1568 doit = doit && ((b->u8[i] & 0x7) == shift); \
1570 if (doit) { \
1571 if (shift == 0) { \
1572 *r = *a; \
1573 } else if (leftp) { \
1574 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1576 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1577 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1578 } else { \
1579 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1581 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1582 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1586 VSHIFT(l, 1)
1587 VSHIFT(r, 0)
1588 #undef VSHIFT
1590 #define VSL(suffix, element, mask) \
1591 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1593 int i; \
1595 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1596 unsigned int shift = b->element[i] & mask; \
1598 r->element[i] = a->element[i] << shift; \
1601 VSL(b, u8, 0x7)
1602 VSL(h, u16, 0x0F)
1603 VSL(w, u32, 0x1F)
1604 VSL(d, u64, 0x3F)
1605 #undef VSL
1607 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1609 int sh = shift & 0xf;
1610 int i;
1611 ppc_avr_t result;
1613 #if defined(HOST_WORDS_BIGENDIAN)
1614 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1615 int index = sh + i;
1616 if (index > 0xf) {
1617 result.u8[i] = b->u8[index - 0x10];
1618 } else {
1619 result.u8[i] = a->u8[index];
1622 #else
1623 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1624 int index = (16 - sh) + i;
1625 if (index > 0xf) {
1626 result.u8[i] = a->u8[index - 0x10];
1627 } else {
1628 result.u8[i] = b->u8[index];
1631 #endif
1632 *r = result;
1635 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1637 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1639 #if defined(HOST_WORDS_BIGENDIAN)
1640 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1641 memset(&r->u8[16-sh], 0, sh);
1642 #else
1643 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1644 memset(&r->u8[0], 0, sh);
1645 #endif
1648 /* Experimental testing shows that hardware masks the immediate. */
1649 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1650 #if defined(HOST_WORDS_BIGENDIAN)
1651 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1652 #else
1653 #define SPLAT_ELEMENT(element) \
1654 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1655 #endif
1656 #define VSPLT(suffix, element) \
1657 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1659 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1660 int i; \
1662 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1663 r->element[i] = s; \
1666 VSPLT(b, u8)
1667 VSPLT(h, u16)
1668 VSPLT(w, u32)
1669 #undef VSPLT
1670 #undef SPLAT_ELEMENT
1671 #undef _SPLAT_MASKED
1673 #define VSPLTI(suffix, element, splat_type) \
1674 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1676 splat_type x = (int8_t)(splat << 3) >> 3; \
1677 int i; \
1679 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1680 r->element[i] = x; \
1683 VSPLTI(b, s8, int8_t)
1684 VSPLTI(h, s16, int16_t)
1685 VSPLTI(w, s32, int32_t)
1686 #undef VSPLTI
1688 #define VSR(suffix, element, mask) \
1689 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1691 int i; \
1693 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1694 unsigned int shift = b->element[i] & mask; \
1695 r->element[i] = a->element[i] >> shift; \
1698 VSR(ab, s8, 0x7)
1699 VSR(ah, s16, 0xF)
1700 VSR(aw, s32, 0x1F)
1701 VSR(ad, s64, 0x3F)
1702 VSR(b, u8, 0x7)
1703 VSR(h, u16, 0xF)
1704 VSR(w, u32, 0x1F)
1705 VSR(d, u64, 0x3F)
1706 #undef VSR
1708 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1710 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1712 #if defined(HOST_WORDS_BIGENDIAN)
1713 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1714 memset(&r->u8[0], 0, sh);
1715 #else
1716 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1717 memset(&r->u8[16 - sh], 0, sh);
1718 #endif
1721 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1723 int i;
1725 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1726 r->u32[i] = a->u32[i] >= b->u32[i];
1730 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1732 int64_t t;
1733 int i, upper;
1734 ppc_avr_t result;
1735 int sat = 0;
1737 #if defined(HOST_WORDS_BIGENDIAN)
1738 upper = ARRAY_SIZE(r->s32)-1;
1739 #else
1740 upper = 0;
1741 #endif
1742 t = (int64_t)b->s32[upper];
1743 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1744 t += a->s32[i];
1745 result.s32[i] = 0;
1747 result.s32[upper] = cvtsdsw(t, &sat);
1748 *r = result;
1750 if (sat) {
1751 env->vscr |= (1 << VSCR_SAT);
1755 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1757 int i, j, upper;
1758 ppc_avr_t result;
1759 int sat = 0;
1761 #if defined(HOST_WORDS_BIGENDIAN)
1762 upper = 1;
1763 #else
1764 upper = 0;
1765 #endif
1766 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1767 int64_t t = (int64_t)b->s32[upper + i * 2];
1769 result.u64[i] = 0;
1770 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1771 t += a->s32[2 * i + j];
1773 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1776 *r = result;
1777 if (sat) {
1778 env->vscr |= (1 << VSCR_SAT);
1782 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1784 int i, j;
1785 int sat = 0;
1787 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1788 int64_t t = (int64_t)b->s32[i];
1790 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1791 t += a->s8[4 * i + j];
1793 r->s32[i] = cvtsdsw(t, &sat);
1796 if (sat) {
1797 env->vscr |= (1 << VSCR_SAT);
1801 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1803 int sat = 0;
1804 int i;
1806 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1807 int64_t t = (int64_t)b->s32[i];
1809 t += a->s16[2 * i] + a->s16[2 * i + 1];
1810 r->s32[i] = cvtsdsw(t, &sat);
1813 if (sat) {
1814 env->vscr |= (1 << VSCR_SAT);
1818 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1820 int i, j;
1821 int sat = 0;
1823 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1824 uint64_t t = (uint64_t)b->u32[i];
1826 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1827 t += a->u8[4 * i + j];
1829 r->u32[i] = cvtuduw(t, &sat);
1832 if (sat) {
1833 env->vscr |= (1 << VSCR_SAT);
1837 #if defined(HOST_WORDS_BIGENDIAN)
1838 #define UPKHI 1
1839 #define UPKLO 0
1840 #else
1841 #define UPKHI 0
1842 #define UPKLO 1
1843 #endif
1844 #define VUPKPX(suffix, hi) \
1845 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1847 int i; \
1848 ppc_avr_t result; \
1850 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1851 uint16_t e = b->u16[hi ? i : i+4]; \
1852 uint8_t a = (e >> 15) ? 0xff : 0; \
1853 uint8_t r = (e >> 10) & 0x1f; \
1854 uint8_t g = (e >> 5) & 0x1f; \
1855 uint8_t b = e & 0x1f; \
1857 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1859 *r = result; \
1861 VUPKPX(lpx, UPKLO)
1862 VUPKPX(hpx, UPKHI)
1863 #undef VUPKPX
1865 #define VUPK(suffix, unpacked, packee, hi) \
1866 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1868 int i; \
1869 ppc_avr_t result; \
1871 if (hi) { \
1872 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1873 result.unpacked[i] = b->packee[i]; \
1875 } else { \
1876 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1877 i++) { \
1878 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1881 *r = result; \
1883 VUPK(hsb, s16, s8, UPKHI)
1884 VUPK(hsh, s32, s16, UPKHI)
1885 VUPK(hsw, s64, s32, UPKHI)
1886 VUPK(lsb, s16, s8, UPKLO)
1887 VUPK(lsh, s32, s16, UPKLO)
1888 VUPK(lsw, s64, s32, UPKLO)
1889 #undef VUPK
1890 #undef UPKHI
1891 #undef UPKLO
1893 #define VGENERIC_DO(name, element) \
1894 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1896 int i; \
1898 VECTOR_FOR_INORDER_I(i, element) { \
1899 r->element[i] = name(b->element[i]); \
1903 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1904 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1905 #define clzw(v) clz32((v))
1906 #define clzd(v) clz64((v))
1908 VGENERIC_DO(clzb, u8)
1909 VGENERIC_DO(clzh, u16)
1910 VGENERIC_DO(clzw, u32)
1911 VGENERIC_DO(clzd, u64)
1913 #undef clzb
1914 #undef clzh
1915 #undef clzw
1916 #undef clzd
1918 #define popcntb(v) ctpop8(v)
1919 #define popcnth(v) ctpop16(v)
1920 #define popcntw(v) ctpop32(v)
1921 #define popcntd(v) ctpop64(v)
1923 VGENERIC_DO(popcntb, u8)
1924 VGENERIC_DO(popcnth, u16)
1925 VGENERIC_DO(popcntw, u32)
1926 VGENERIC_DO(popcntd, u64)
1928 #undef popcntb
1929 #undef popcnth
1930 #undef popcntw
1931 #undef popcntd
1933 #undef VGENERIC_DO
1935 #if defined(HOST_WORDS_BIGENDIAN)
1936 #define QW_ONE { .u64 = { 0, 1 } }
1937 #else
1938 #define QW_ONE { .u64 = { 1, 0 } }
1939 #endif
1941 #ifndef CONFIG_INT128
1943 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1945 t->u64[0] = ~a.u64[0];
1946 t->u64[1] = ~a.u64[1];
1949 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1951 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1952 return -1;
1953 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1954 return 1;
1955 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1956 return -1;
1957 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1958 return 1;
1959 } else {
1960 return 0;
1964 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1966 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1967 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1968 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1971 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1973 ppc_avr_t not_a;
1974 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1975 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1976 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1977 avr_qw_not(&not_a, a);
1978 return avr_qw_cmpu(not_a, b) < 0;
1981 #endif
1983 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1985 #ifdef CONFIG_INT128
1986 r->u128 = a->u128 + b->u128;
1987 #else
1988 avr_qw_add(r, *a, *b);
1989 #endif
1992 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1994 #ifdef CONFIG_INT128
1995 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1996 #else
1998 if (c->u64[LO_IDX] & 1) {
1999 ppc_avr_t tmp;
2001 tmp.u64[HI_IDX] = 0;
2002 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2003 avr_qw_add(&tmp, *a, tmp);
2004 avr_qw_add(r, tmp, *b);
2005 } else {
2006 avr_qw_add(r, *a, *b);
2008 #endif
2011 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2013 #ifdef CONFIG_INT128
2014 r->u128 = (~a->u128 < b->u128);
2015 #else
2016 ppc_avr_t not_a;
2018 avr_qw_not(&not_a, *a);
2020 r->u64[HI_IDX] = 0;
2021 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2022 #endif
2025 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2027 #ifdef CONFIG_INT128
2028 int carry_out = (~a->u128 < b->u128);
2029 if (!carry_out && (c->u128 & 1)) {
2030 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2031 ((a->u128 != 0) || (b->u128 != 0));
2033 r->u128 = carry_out;
2034 #else
2036 int carry_in = c->u64[LO_IDX] & 1;
2037 int carry_out = 0;
2038 ppc_avr_t tmp;
2040 carry_out = avr_qw_addc(&tmp, *a, *b);
2042 if (!carry_out && carry_in) {
2043 ppc_avr_t one = QW_ONE;
2044 carry_out = avr_qw_addc(&tmp, tmp, one);
2046 r->u64[HI_IDX] = 0;
2047 r->u64[LO_IDX] = carry_out;
2048 #endif
2051 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2053 #ifdef CONFIG_INT128
2054 r->u128 = a->u128 - b->u128;
2055 #else
2056 ppc_avr_t tmp;
2057 ppc_avr_t one = QW_ONE;
2059 avr_qw_not(&tmp, *b);
2060 avr_qw_add(&tmp, *a, tmp);
2061 avr_qw_add(r, tmp, one);
2062 #endif
2065 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2067 #ifdef CONFIG_INT128
2068 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2069 #else
2070 ppc_avr_t tmp, sum;
2072 avr_qw_not(&tmp, *b);
2073 avr_qw_add(&sum, *a, tmp);
2075 tmp.u64[HI_IDX] = 0;
2076 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2077 avr_qw_add(r, sum, tmp);
2078 #endif
2081 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2083 #ifdef CONFIG_INT128
2084 r->u128 = (~a->u128 < ~b->u128) ||
2085 (a->u128 + ~b->u128 == (__uint128_t)-1);
2086 #else
2087 int carry = (avr_qw_cmpu(*a, *b) > 0);
2088 if (!carry) {
2089 ppc_avr_t tmp;
2090 avr_qw_not(&tmp, *b);
2091 avr_qw_add(&tmp, *a, tmp);
2092 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2094 r->u64[HI_IDX] = 0;
2095 r->u64[LO_IDX] = carry;
2096 #endif
2099 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2101 #ifdef CONFIG_INT128
2102 r->u128 =
2103 (~a->u128 < ~b->u128) ||
2104 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2105 #else
2106 int carry_in = c->u64[LO_IDX] & 1;
2107 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2108 if (!carry_out && carry_in) {
2109 ppc_avr_t tmp;
2110 avr_qw_not(&tmp, *b);
2111 avr_qw_add(&tmp, *a, tmp);
2112 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2115 r->u64[HI_IDX] = 0;
2116 r->u64[LO_IDX] = carry_out;
2117 #endif
2120 #define BCD_PLUS_PREF_1 0xC
2121 #define BCD_PLUS_PREF_2 0xF
2122 #define BCD_PLUS_ALT_1 0xA
2123 #define BCD_NEG_PREF 0xD
2124 #define BCD_NEG_ALT 0xB
2125 #define BCD_PLUS_ALT_2 0xE
2127 #if defined(HOST_WORDS_BIGENDIAN)
2128 #define BCD_DIG_BYTE(n) (15 - (n/2))
2129 #else
2130 #define BCD_DIG_BYTE(n) (n/2)
2131 #endif
2133 static int bcd_get_sgn(ppc_avr_t *bcd)
2135 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2136 case BCD_PLUS_PREF_1:
2137 case BCD_PLUS_PREF_2:
2138 case BCD_PLUS_ALT_1:
2139 case BCD_PLUS_ALT_2:
2141 return 1;
2144 case BCD_NEG_PREF:
2145 case BCD_NEG_ALT:
2147 return -1;
2150 default:
2152 return 0;
2157 static int bcd_preferred_sgn(int sgn, int ps)
2159 if (sgn >= 0) {
2160 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2161 } else {
2162 return BCD_NEG_PREF;
2166 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2168 uint8_t result;
2169 if (n & 1) {
2170 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2171 } else {
2172 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2175 if (unlikely(result > 9)) {
2176 *invalid = true;
2178 return result;
2181 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2183 if (n & 1) {
2184 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2185 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2186 } else {
2187 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2188 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2192 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2194 int i;
2195 int invalid = 0;
2196 for (i = 31; i > 0; i--) {
2197 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2198 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2199 if (unlikely(invalid)) {
2200 return 0; /* doesn't matter */
2201 } else if (dig_a > dig_b) {
2202 return 1;
2203 } else if (dig_a < dig_b) {
2204 return -1;
2208 return 0;
2211 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2212 int *overflow)
2214 int carry = 0;
2215 int i;
2216 int is_zero = 1;
2217 for (i = 1; i <= 31; i++) {
2218 uint8_t digit = bcd_get_digit(a, i, invalid) +
2219 bcd_get_digit(b, i, invalid) + carry;
2220 is_zero &= (digit == 0);
2221 if (digit > 9) {
2222 carry = 1;
2223 digit -= 10;
2224 } else {
2225 carry = 0;
2228 bcd_put_digit(t, digit, i);
2230 if (unlikely(*invalid)) {
2231 return -1;
2235 *overflow = carry;
2236 return is_zero;
2239 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2240 int *overflow)
2242 int carry = 0;
2243 int i;
2244 int is_zero = 1;
2245 for (i = 1; i <= 31; i++) {
2246 uint8_t digit = bcd_get_digit(a, i, invalid) -
2247 bcd_get_digit(b, i, invalid) + carry;
2248 is_zero &= (digit == 0);
2249 if (digit & 0x80) {
2250 carry = -1;
2251 digit += 10;
2252 } else {
2253 carry = 0;
2256 bcd_put_digit(t, digit, i);
2258 if (unlikely(*invalid)) {
2259 return -1;
2263 *overflow = carry;
2264 return is_zero;
2267 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2270 int sgna = bcd_get_sgn(a);
2271 int sgnb = bcd_get_sgn(b);
2272 int invalid = (sgna == 0) || (sgnb == 0);
2273 int overflow = 0;
2274 int zero = 0;
2275 uint32_t cr = 0;
2276 ppc_avr_t result = { .u64 = { 0, 0 } };
2278 if (!invalid) {
2279 if (sgna == sgnb) {
2280 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2281 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2282 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2283 } else if (bcd_cmp_mag(a, b) > 0) {
2284 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2285 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2286 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2287 } else {
2288 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2289 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2290 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2294 if (unlikely(invalid)) {
2295 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2296 cr = 1 << CRF_SO;
2297 } else if (overflow) {
2298 cr |= 1 << CRF_SO;
2299 } else if (zero) {
2300 cr = 1 << CRF_EQ;
2303 *r = result;
2305 return cr;
2308 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2310 ppc_avr_t bcopy = *b;
2311 int sgnb = bcd_get_sgn(b);
2312 if (sgnb < 0) {
2313 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2314 } else if (sgnb > 0) {
2315 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2317 /* else invalid ... defer to bcdadd code for proper handling */
2319 return helper_bcdadd(r, a, &bcopy, ps);
2322 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2324 int i;
2325 VECTOR_FOR_INORDER_I(i, u8) {
2326 r->u8[i] = AES_sbox[a->u8[i]];
2330 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2332 ppc_avr_t result;
2333 int i;
2335 VECTOR_FOR_INORDER_I(i, u32) {
2336 result.AVRW(i) = b->AVRW(i) ^
2337 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2338 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2339 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2340 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2342 *r = result;
2345 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2347 ppc_avr_t result;
2348 int i;
2350 VECTOR_FOR_INORDER_I(i, u8) {
2351 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2353 *r = result;
2356 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2358 /* This differs from what is written in ISA V2.07. The RTL is */
2359 /* incorrect and will be fixed in V2.07B. */
2360 int i;
2361 ppc_avr_t tmp;
2363 VECTOR_FOR_INORDER_I(i, u8) {
2364 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2367 VECTOR_FOR_INORDER_I(i, u32) {
2368 r->AVRW(i) =
2369 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2370 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2371 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2372 AES_imc[tmp.AVRB(4*i + 3)][3];
2376 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2378 ppc_avr_t result;
2379 int i;
2381 VECTOR_FOR_INORDER_I(i, u8) {
2382 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2384 *r = result;
2387 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2388 #if defined(HOST_WORDS_BIGENDIAN)
2389 #define EL_IDX(i) (i)
2390 #else
2391 #define EL_IDX(i) (3 - (i))
2392 #endif
2394 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2396 int st = (st_six & 0x10) != 0;
2397 int six = st_six & 0xF;
2398 int i;
2400 VECTOR_FOR_INORDER_I(i, u32) {
2401 if (st == 0) {
2402 if ((six & (0x8 >> i)) == 0) {
2403 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2404 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2405 (a->u32[EL_IDX(i)] >> 3);
2406 } else { /* six.bit[i] == 1 */
2407 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2408 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2409 (a->u32[EL_IDX(i)] >> 10);
2411 } else { /* st == 1 */
2412 if ((six & (0x8 >> i)) == 0) {
2413 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2414 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2415 ROTRu32(a->u32[EL_IDX(i)], 22);
2416 } else { /* six.bit[i] == 1 */
2417 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2418 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2419 ROTRu32(a->u32[EL_IDX(i)], 25);
2425 #undef ROTRu32
2426 #undef EL_IDX
2428 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2429 #if defined(HOST_WORDS_BIGENDIAN)
2430 #define EL_IDX(i) (i)
2431 #else
2432 #define EL_IDX(i) (1 - (i))
2433 #endif
2435 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2437 int st = (st_six & 0x10) != 0;
2438 int six = st_six & 0xF;
2439 int i;
2441 VECTOR_FOR_INORDER_I(i, u64) {
2442 if (st == 0) {
2443 if ((six & (0x8 >> (2*i))) == 0) {
2444 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2445 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2446 (a->u64[EL_IDX(i)] >> 7);
2447 } else { /* six.bit[2*i] == 1 */
2448 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2449 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2450 (a->u64[EL_IDX(i)] >> 6);
2452 } else { /* st == 1 */
2453 if ((six & (0x8 >> (2*i))) == 0) {
2454 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2455 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2456 ROTRu64(a->u64[EL_IDX(i)], 39);
2457 } else { /* six.bit[2*i] == 1 */
2458 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2459 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2460 ROTRu64(a->u64[EL_IDX(i)], 41);
2466 #undef ROTRu64
2467 #undef EL_IDX
2469 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2471 ppc_avr_t result;
2472 int i;
2474 VECTOR_FOR_INORDER_I(i, u8) {
2475 int indexA = c->u8[i] >> 4;
2476 int indexB = c->u8[i] & 0xF;
2477 #if defined(HOST_WORDS_BIGENDIAN)
2478 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2479 #else
2480 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2481 #endif
2483 *r = result;
2486 #undef VECTOR_FOR_INORDER_I
2487 #undef HI_IDX
2488 #undef LO_IDX
2490 /*****************************************************************************/
2491 /* SPE extension helpers */
2492 /* Use a table to make this quicker */
2493 static const uint8_t hbrev[16] = {
2494 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2495 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2498 static inline uint8_t byte_reverse(uint8_t val)
2500 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2503 static inline uint32_t word_reverse(uint32_t val)
2505 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2506 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2509 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2510 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2512 uint32_t a, b, d, mask;
2514 mask = UINT32_MAX >> (32 - MASKBITS);
2515 a = arg1 & mask;
2516 b = arg2 & mask;
2517 d = word_reverse(1 + word_reverse(a | ~b));
2518 return (arg1 & ~mask) | (d & b);
2521 uint32_t helper_cntlsw32(uint32_t val)
2523 if (val & 0x80000000) {
2524 return clz32(~val);
2525 } else {
2526 return clz32(val);
2530 uint32_t helper_cntlzw32(uint32_t val)
2532 return clz32(val);
2535 /* 440 specific */
2536 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2537 target_ulong low, uint32_t update_Rc)
2539 target_ulong mask;
2540 int i;
2542 i = 1;
2543 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2544 if ((high & mask) == 0) {
2545 if (update_Rc) {
2546 env->crf[0] = 0x4;
2548 goto done;
2550 i++;
2552 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2553 if ((low & mask) == 0) {
2554 if (update_Rc) {
2555 env->crf[0] = 0x8;
2557 goto done;
2559 i++;
2561 i = 8;
2562 if (update_Rc) {
2563 env->crf[0] = 0x2;
2565 done:
2566 env->xer = (env->xer & ~0x7F) | i;
2567 if (update_Rc) {
2568 env->crf[0] |= xer_so;
2570 return i;