s390x/kvm: proper use of the cpu states OPERATING and STOPPED
[qemu/qmp-unstable.git] / target-ppc / int_helper.c
blob713d777076a50a666570cdab0b2b7f7fd6cddffc
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "exec/helper-proto.h"
22 #include "qemu/aes.h"
24 #include "helper_regs.h"
25 /*****************************************************************************/
26 /* Fixed point operations helpers */
28 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
29 uint32_t oe)
31 uint64_t rt = 0;
32 int overflow = 0;
34 uint64_t dividend = (uint64_t)ra << 32;
35 uint64_t divisor = (uint32_t)rb;
37 if (unlikely(divisor == 0)) {
38 overflow = 1;
39 } else {
40 rt = dividend / divisor;
41 overflow = rt > UINT32_MAX;
44 if (unlikely(overflow)) {
45 rt = 0; /* Undefined */
48 if (oe) {
49 if (unlikely(overflow)) {
50 env->so = env->ov = 1;
51 } else {
52 env->ov = 0;
56 return (target_ulong)rt;
59 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
60 uint32_t oe)
62 int64_t rt = 0;
63 int overflow = 0;
65 int64_t dividend = (int64_t)ra << 32;
66 int64_t divisor = (int64_t)((int32_t)rb);
68 if (unlikely((divisor == 0) ||
69 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
70 overflow = 1;
71 } else {
72 rt = dividend / divisor;
73 overflow = rt != (int32_t)rt;
76 if (unlikely(overflow)) {
77 rt = 0; /* Undefined */
80 if (oe) {
81 if (unlikely(overflow)) {
82 env->so = env->ov = 1;
83 } else {
84 env->ov = 0;
88 return (target_ulong)rt;
91 #if defined(TARGET_PPC64)
93 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
95 uint64_t rt = 0;
96 int overflow = 0;
98 overflow = divu128(&rt, &ra, rb);
100 if (unlikely(overflow)) {
101 rt = 0; /* Undefined */
104 if (oe) {
105 if (unlikely(overflow)) {
106 env->so = env->ov = 1;
107 } else {
108 env->ov = 0;
112 return rt;
115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
117 int64_t rt = 0;
118 int64_t ra = (int64_t)rau;
119 int64_t rb = (int64_t)rbu;
120 int overflow = divs128(&rt, &ra, rb);
122 if (unlikely(overflow)) {
123 rt = 0; /* Undefined */
126 if (oe) {
128 if (unlikely(overflow)) {
129 env->so = env->ov = 1;
130 } else {
131 env->ov = 0;
135 return rt;
138 #endif
141 target_ulong helper_cntlzw(target_ulong t)
143 return clz32(t);
146 #if defined(TARGET_PPC64)
147 target_ulong helper_cntlzd(target_ulong t)
149 return clz64(t);
151 #endif
153 #if defined(TARGET_PPC64)
155 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
157 int i;
158 uint64_t ra = 0;
160 for (i = 0; i < 8; i++) {
161 int index = (rs >> (i*8)) & 0xFF;
162 if (index < 64) {
163 if (rb & (1ull << (63-index))) {
164 ra |= 1 << i;
168 return ra;
171 #endif
173 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
175 target_ulong mask = 0xff;
176 target_ulong ra = 0;
177 int i;
179 for (i = 0; i < sizeof(target_ulong); i++) {
180 if ((rs & mask) == (rb & mask)) {
181 ra |= mask;
183 mask <<= 8;
185 return ra;
188 /* shift right arithmetic helper */
189 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
190 target_ulong shift)
192 int32_t ret;
194 if (likely(!(shift & 0x20))) {
195 if (likely((uint32_t)shift != 0)) {
196 shift &= 0x1f;
197 ret = (int32_t)value >> shift;
198 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
199 env->ca = 0;
200 } else {
201 env->ca = 1;
203 } else {
204 ret = (int32_t)value;
205 env->ca = 0;
207 } else {
208 ret = (int32_t)value >> 31;
209 env->ca = (ret != 0);
211 return (target_long)ret;
214 #if defined(TARGET_PPC64)
215 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
216 target_ulong shift)
218 int64_t ret;
220 if (likely(!(shift & 0x40))) {
221 if (likely((uint64_t)shift != 0)) {
222 shift &= 0x3f;
223 ret = (int64_t)value >> shift;
224 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
225 env->ca = 0;
226 } else {
227 env->ca = 1;
229 } else {
230 ret = (int64_t)value;
231 env->ca = 0;
233 } else {
234 ret = (int64_t)value >> 63;
235 env->ca = (ret != 0);
237 return ret;
239 #endif
241 #if defined(TARGET_PPC64)
242 target_ulong helper_popcntb(target_ulong val)
244 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
245 0x5555555555555555ULL);
246 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
247 0x3333333333333333ULL);
248 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
249 0x0f0f0f0f0f0f0f0fULL);
250 return val;
253 target_ulong helper_popcntw(target_ulong val)
255 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
256 0x5555555555555555ULL);
257 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
258 0x3333333333333333ULL);
259 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
260 0x0f0f0f0f0f0f0f0fULL);
261 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
262 0x00ff00ff00ff00ffULL);
263 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
264 0x0000ffff0000ffffULL);
265 return val;
268 target_ulong helper_popcntd(target_ulong val)
270 return ctpop64(val);
272 #else
273 target_ulong helper_popcntb(target_ulong val)
275 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
276 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
277 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
278 return val;
281 target_ulong helper_popcntw(target_ulong val)
283 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
284 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
285 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
286 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
287 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
288 return val;
290 #endif
292 /*****************************************************************************/
293 /* PowerPC 601 specific instructions (POWER bridge) */
294 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
296 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
298 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
299 (int32_t)arg2 == 0) {
300 env->spr[SPR_MQ] = 0;
301 return INT32_MIN;
302 } else {
303 env->spr[SPR_MQ] = tmp % arg2;
304 return tmp / (int32_t)arg2;
308 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
309 target_ulong arg2)
311 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
313 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
314 (int32_t)arg2 == 0) {
315 env->so = env->ov = 1;
316 env->spr[SPR_MQ] = 0;
317 return INT32_MIN;
318 } else {
319 env->spr[SPR_MQ] = tmp % arg2;
320 tmp /= (int32_t)arg2;
321 if ((int32_t)tmp != tmp) {
322 env->so = env->ov = 1;
323 } else {
324 env->ov = 0;
326 return tmp;
330 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
331 target_ulong arg2)
333 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
334 (int32_t)arg2 == 0) {
335 env->spr[SPR_MQ] = 0;
336 return INT32_MIN;
337 } else {
338 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
339 return (int32_t)arg1 / (int32_t)arg2;
343 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
344 target_ulong arg2)
346 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
347 (int32_t)arg2 == 0) {
348 env->so = env->ov = 1;
349 env->spr[SPR_MQ] = 0;
350 return INT32_MIN;
351 } else {
352 env->ov = 0;
353 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
354 return (int32_t)arg1 / (int32_t)arg2;
358 /*****************************************************************************/
359 /* 602 specific instructions */
360 /* mfrom is the most crazy instruction ever seen, imho ! */
361 /* Real implementation uses a ROM table. Do the same */
362 /* Extremely decomposed:
363 * -arg / 256
364 * return 256 * log10(10 + 1.0) + 0.5
366 #if !defined(CONFIG_USER_ONLY)
367 target_ulong helper_602_mfrom(target_ulong arg)
369 if (likely(arg < 602)) {
370 #include "mfrom_table.c"
371 return mfrom_ROM_table[arg];
372 } else {
373 return 0;
376 #endif
378 /*****************************************************************************/
379 /* Altivec extension helpers */
380 #if defined(HOST_WORDS_BIGENDIAN)
381 #define HI_IDX 0
382 #define LO_IDX 1
383 #define AVRB(i) u8[i]
384 #define AVRW(i) u32[i]
385 #else
386 #define HI_IDX 1
387 #define LO_IDX 0
388 #define AVRB(i) u8[15-(i)]
389 #define AVRW(i) u32[3-(i)]
390 #endif
392 #if defined(HOST_WORDS_BIGENDIAN)
393 #define VECTOR_FOR_INORDER_I(index, element) \
394 for (index = 0; index < ARRAY_SIZE(r->element); index++)
395 #else
396 #define VECTOR_FOR_INORDER_I(index, element) \
397 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
398 #endif
400 /* Saturating arithmetic helpers. */
401 #define SATCVT(from, to, from_type, to_type, min, max) \
402 static inline to_type cvt##from##to(from_type x, int *sat) \
404 to_type r; \
406 if (x < (from_type)min) { \
407 r = min; \
408 *sat = 1; \
409 } else if (x > (from_type)max) { \
410 r = max; \
411 *sat = 1; \
412 } else { \
413 r = x; \
415 return r; \
417 #define SATCVTU(from, to, from_type, to_type, min, max) \
418 static inline to_type cvt##from##to(from_type x, int *sat) \
420 to_type r; \
422 if (x > (from_type)max) { \
423 r = max; \
424 *sat = 1; \
425 } else { \
426 r = x; \
428 return r; \
430 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
431 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
432 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
434 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
435 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
436 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
437 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
438 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
439 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
440 #undef SATCVT
441 #undef SATCVTU
443 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
445 int i, j = (sh & 0xf);
447 VECTOR_FOR_INORDER_I(i, u8) {
448 r->u8[i] = j++;
452 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
454 int i, j = 0x10 - (sh & 0xf);
456 VECTOR_FOR_INORDER_I(i, u8) {
457 r->u8[i] = j++;
461 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
463 #if defined(HOST_WORDS_BIGENDIAN)
464 env->vscr = r->u32[3];
465 #else
466 env->vscr = r->u32[0];
467 #endif
468 set_flush_to_zero(vscr_nj, &env->vec_status);
471 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
473 int i;
475 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
476 r->u32[i] = ~a->u32[i] < b->u32[i];
480 #define VARITH_DO(name, op, element) \
481 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
483 int i; \
485 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
486 r->element[i] = a->element[i] op b->element[i]; \
489 #define VARITH(suffix, element) \
490 VARITH_DO(add##suffix, +, element) \
491 VARITH_DO(sub##suffix, -, element)
492 VARITH(ubm, u8)
493 VARITH(uhm, u16)
494 VARITH(uwm, u32)
495 VARITH(udm, u64)
496 VARITH_DO(muluwm, *, u32)
497 #undef VARITH_DO
498 #undef VARITH
500 #define VARITHFP(suffix, func) \
501 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
502 ppc_avr_t *b) \
504 int i; \
506 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
507 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
510 VARITHFP(addfp, float32_add)
511 VARITHFP(subfp, float32_sub)
512 VARITHFP(minfp, float32_min)
513 VARITHFP(maxfp, float32_max)
514 #undef VARITHFP
516 #define VARITHFPFMA(suffix, type) \
517 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
518 ppc_avr_t *b, ppc_avr_t *c) \
520 int i; \
521 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
522 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
523 type, &env->vec_status); \
526 VARITHFPFMA(maddfp, 0);
527 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
528 #undef VARITHFPFMA
530 #define VARITHSAT_CASE(type, op, cvt, element) \
532 type result = (type)a->element[i] op (type)b->element[i]; \
533 r->element[i] = cvt(result, &sat); \
536 #define VARITHSAT_DO(name, op, optype, cvt, element) \
537 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
538 ppc_avr_t *b) \
540 int sat = 0; \
541 int i; \
543 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
544 switch (sizeof(r->element[0])) { \
545 case 1: \
546 VARITHSAT_CASE(optype, op, cvt, element); \
547 break; \
548 case 2: \
549 VARITHSAT_CASE(optype, op, cvt, element); \
550 break; \
551 case 4: \
552 VARITHSAT_CASE(optype, op, cvt, element); \
553 break; \
556 if (sat) { \
557 env->vscr |= (1 << VSCR_SAT); \
560 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
561 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
562 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
563 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
564 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
565 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
566 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
567 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
568 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
569 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
570 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
571 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
572 #undef VARITHSAT_CASE
573 #undef VARITHSAT_DO
574 #undef VARITHSAT_SIGNED
575 #undef VARITHSAT_UNSIGNED
577 #define VAVG_DO(name, element, etype) \
578 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
580 int i; \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
584 r->element[i] = x >> 1; \
588 #define VAVG(type, signed_element, signed_type, unsigned_element, \
589 unsigned_type) \
590 VAVG_DO(avgs##type, signed_element, signed_type) \
591 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
592 VAVG(b, s8, int16_t, u8, uint16_t)
593 VAVG(h, s16, int32_t, u16, uint32_t)
594 VAVG(w, s32, int64_t, u32, uint64_t)
595 #undef VAVG_DO
596 #undef VAVG
598 #define VCF(suffix, cvt, element) \
599 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
600 ppc_avr_t *b, uint32_t uim) \
602 int i; \
604 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
605 float32 t = cvt(b->element[i], &env->vec_status); \
606 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
609 VCF(ux, uint32_to_float32, u32)
610 VCF(sx, int32_to_float32, s32)
611 #undef VCF
613 #define VCMP_DO(suffix, compare, element, record) \
614 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
615 ppc_avr_t *a, ppc_avr_t *b) \
617 uint64_t ones = (uint64_t)-1; \
618 uint64_t all = ones; \
619 uint64_t none = 0; \
620 int i; \
622 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
623 uint64_t result = (a->element[i] compare b->element[i] ? \
624 ones : 0x0); \
625 switch (sizeof(a->element[0])) { \
626 case 8: \
627 r->u64[i] = result; \
628 break; \
629 case 4: \
630 r->u32[i] = result; \
631 break; \
632 case 2: \
633 r->u16[i] = result; \
634 break; \
635 case 1: \
636 r->u8[i] = result; \
637 break; \
639 all &= result; \
640 none |= result; \
642 if (record) { \
643 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
646 #define VCMP(suffix, compare, element) \
647 VCMP_DO(suffix, compare, element, 0) \
648 VCMP_DO(suffix##_dot, compare, element, 1)
649 VCMP(equb, ==, u8)
650 VCMP(equh, ==, u16)
651 VCMP(equw, ==, u32)
652 VCMP(equd, ==, u64)
653 VCMP(gtub, >, u8)
654 VCMP(gtuh, >, u16)
655 VCMP(gtuw, >, u32)
656 VCMP(gtud, >, u64)
657 VCMP(gtsb, >, s8)
658 VCMP(gtsh, >, s16)
659 VCMP(gtsw, >, s32)
660 VCMP(gtsd, >, s64)
661 #undef VCMP_DO
662 #undef VCMP
664 #define VCMPFP_DO(suffix, compare, order, record) \
665 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
666 ppc_avr_t *a, ppc_avr_t *b) \
668 uint32_t ones = (uint32_t)-1; \
669 uint32_t all = ones; \
670 uint32_t none = 0; \
671 int i; \
673 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
674 uint32_t result; \
675 int rel = float32_compare_quiet(a->f[i], b->f[i], \
676 &env->vec_status); \
677 if (rel == float_relation_unordered) { \
678 result = 0; \
679 } else if (rel compare order) { \
680 result = ones; \
681 } else { \
682 result = 0; \
684 r->u32[i] = result; \
685 all &= result; \
686 none |= result; \
688 if (record) { \
689 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
692 #define VCMPFP(suffix, compare, order) \
693 VCMPFP_DO(suffix, compare, order, 0) \
694 VCMPFP_DO(suffix##_dot, compare, order, 1)
695 VCMPFP(eqfp, ==, float_relation_equal)
696 VCMPFP(gefp, !=, float_relation_less)
697 VCMPFP(gtfp, ==, float_relation_greater)
698 #undef VCMPFP_DO
699 #undef VCMPFP
701 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
702 ppc_avr_t *a, ppc_avr_t *b, int record)
704 int i;
705 int all_in = 0;
707 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
708 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
709 if (le_rel == float_relation_unordered) {
710 r->u32[i] = 0xc0000000;
711 /* ALL_IN does not need to be updated here. */
712 } else {
713 float32 bneg = float32_chs(b->f[i]);
714 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
715 int le = le_rel != float_relation_greater;
716 int ge = ge_rel != float_relation_less;
718 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
719 all_in |= (!le | !ge);
722 if (record) {
723 env->crf[6] = (all_in == 0) << 1;
727 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
729 vcmpbfp_internal(env, r, a, b, 0);
732 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
733 ppc_avr_t *b)
735 vcmpbfp_internal(env, r, a, b, 1);
738 #define VCT(suffix, satcvt, element) \
739 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
740 ppc_avr_t *b, uint32_t uim) \
742 int i; \
743 int sat = 0; \
744 float_status s = env->vec_status; \
746 set_float_rounding_mode(float_round_to_zero, &s); \
747 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
748 if (float32_is_any_nan(b->f[i])) { \
749 r->element[i] = 0; \
750 } else { \
751 float64 t = float32_to_float64(b->f[i], &s); \
752 int64_t j; \
754 t = float64_scalbn(t, uim, &s); \
755 j = float64_to_int64(t, &s); \
756 r->element[i] = satcvt(j, &sat); \
759 if (sat) { \
760 env->vscr |= (1 << VSCR_SAT); \
763 VCT(uxs, cvtsduw, u32)
764 VCT(sxs, cvtsdsw, s32)
765 #undef VCT
767 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
768 ppc_avr_t *b, ppc_avr_t *c)
770 int sat = 0;
771 int i;
773 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
774 int32_t prod = a->s16[i] * b->s16[i];
775 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
777 r->s16[i] = cvtswsh(t, &sat);
780 if (sat) {
781 env->vscr |= (1 << VSCR_SAT);
785 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
786 ppc_avr_t *b, ppc_avr_t *c)
788 int sat = 0;
789 int i;
791 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
792 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
793 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
794 r->s16[i] = cvtswsh(t, &sat);
797 if (sat) {
798 env->vscr |= (1 << VSCR_SAT);
802 #define VMINMAX_DO(name, compare, element) \
803 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
805 int i; \
807 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
808 if (a->element[i] compare b->element[i]) { \
809 r->element[i] = b->element[i]; \
810 } else { \
811 r->element[i] = a->element[i]; \
815 #define VMINMAX(suffix, element) \
816 VMINMAX_DO(min##suffix, >, element) \
817 VMINMAX_DO(max##suffix, <, element)
818 VMINMAX(sb, s8)
819 VMINMAX(sh, s16)
820 VMINMAX(sw, s32)
821 VMINMAX(sd, s64)
822 VMINMAX(ub, u8)
823 VMINMAX(uh, u16)
824 VMINMAX(uw, u32)
825 VMINMAX(ud, u64)
826 #undef VMINMAX_DO
827 #undef VMINMAX
829 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
831 int i;
833 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
834 int32_t prod = a->s16[i] * b->s16[i];
835 r->s16[i] = (int16_t) (prod + c->s16[i]);
839 #define VMRG_DO(name, element, highp) \
840 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
842 ppc_avr_t result; \
843 int i; \
844 size_t n_elems = ARRAY_SIZE(r->element); \
846 for (i = 0; i < n_elems / 2; i++) { \
847 if (highp) { \
848 result.element[i*2+HI_IDX] = a->element[i]; \
849 result.element[i*2+LO_IDX] = b->element[i]; \
850 } else { \
851 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
852 b->element[n_elems - i - 1]; \
853 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
854 a->element[n_elems - i - 1]; \
857 *r = result; \
859 #if defined(HOST_WORDS_BIGENDIAN)
860 #define MRGHI 0
861 #define MRGLO 1
862 #else
863 #define MRGHI 1
864 #define MRGLO 0
865 #endif
866 #define VMRG(suffix, element) \
867 VMRG_DO(mrgl##suffix, element, MRGHI) \
868 VMRG_DO(mrgh##suffix, element, MRGLO)
869 VMRG(b, u8)
870 VMRG(h, u16)
871 VMRG(w, u32)
872 #undef VMRG_DO
873 #undef VMRG
874 #undef MRGHI
875 #undef MRGLO
877 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
880 int32_t prod[16];
881 int i;
883 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
884 prod[i] = (int32_t)a->s8[i] * b->u8[i];
887 VECTOR_FOR_INORDER_I(i, s32) {
888 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
889 prod[4 * i + 2] + prod[4 * i + 3];
893 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
896 int32_t prod[8];
897 int i;
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 prod[i] = a->s16[i] * b->s16[i];
903 VECTOR_FOR_INORDER_I(i, s32) {
904 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
908 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
909 ppc_avr_t *b, ppc_avr_t *c)
911 int32_t prod[8];
912 int i;
913 int sat = 0;
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 prod[i] = (int32_t)a->s16[i] * b->s16[i];
919 VECTOR_FOR_INORDER_I(i, s32) {
920 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
922 r->u32[i] = cvtsdsw(t, &sat);
925 if (sat) {
926 env->vscr |= (1 << VSCR_SAT);
930 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
931 ppc_avr_t *b, ppc_avr_t *c)
933 uint16_t prod[16];
934 int i;
936 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
937 prod[i] = a->u8[i] * b->u8[i];
940 VECTOR_FOR_INORDER_I(i, u32) {
941 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
942 prod[4 * i + 2] + prod[4 * i + 3];
946 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
947 ppc_avr_t *b, ppc_avr_t *c)
949 uint32_t prod[8];
950 int i;
952 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
953 prod[i] = a->u16[i] * b->u16[i];
956 VECTOR_FOR_INORDER_I(i, u32) {
957 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
961 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
962 ppc_avr_t *b, ppc_avr_t *c)
964 uint32_t prod[8];
965 int i;
966 int sat = 0;
968 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
969 prod[i] = a->u16[i] * b->u16[i];
972 VECTOR_FOR_INORDER_I(i, s32) {
973 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
975 r->u32[i] = cvtuduw(t, &sat);
978 if (sat) {
979 env->vscr |= (1 << VSCR_SAT);
983 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
984 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 int i; \
988 VECTOR_FOR_INORDER_I(i, prod_element) { \
989 if (evenp) { \
990 r->prod_element[i] = \
991 (cast)a->mul_element[i * 2 + HI_IDX] * \
992 (cast)b->mul_element[i * 2 + HI_IDX]; \
993 } else { \
994 r->prod_element[i] = \
995 (cast)a->mul_element[i * 2 + LO_IDX] * \
996 (cast)b->mul_element[i * 2 + LO_IDX]; \
1000 #define VMUL(suffix, mul_element, prod_element, cast) \
1001 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1002 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1003 VMUL(sb, s8, s16, int16_t)
1004 VMUL(sh, s16, s32, int32_t)
1005 VMUL(sw, s32, s64, int64_t)
1006 VMUL(ub, u8, u16, uint16_t)
1007 VMUL(uh, u16, u32, uint32_t)
1008 VMUL(uw, u32, u64, uint64_t)
1009 #undef VMUL_DO
1010 #undef VMUL
1012 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1013 ppc_avr_t *c)
1015 ppc_avr_t result;
1016 int i;
1018 VECTOR_FOR_INORDER_I(i, u8) {
1019 int s = c->u8[i] & 0x1f;
1020 #if defined(HOST_WORDS_BIGENDIAN)
1021 int index = s & 0xf;
1022 #else
1023 int index = 15 - (s & 0xf);
1024 #endif
1026 if (s & 0x10) {
1027 result.u8[i] = b->u8[index];
1028 } else {
1029 result.u8[i] = a->u8[index];
1032 *r = result;
1035 #if defined(HOST_WORDS_BIGENDIAN)
1036 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1037 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1038 #else
1039 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1040 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1041 #endif
1043 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1045 int i;
1046 uint64_t perm = 0;
1048 VECTOR_FOR_INORDER_I(i, u8) {
1049 int index = VBPERMQ_INDEX(b, i);
1051 if (index < 128) {
1052 uint64_t mask = (1ull << (63-(index & 0x3F)));
1053 if (a->u64[VBPERMQ_DW(index)] & mask) {
1054 perm |= (0x8000 >> i);
1059 r->u64[HI_IDX] = perm;
1060 r->u64[LO_IDX] = 0;
1063 #undef VBPERMQ_INDEX
1064 #undef VBPERMQ_DW
1066 static const uint64_t VGBBD_MASKS[256] = {
1067 0x0000000000000000ull, /* 00 */
1068 0x0000000000000080ull, /* 01 */
1069 0x0000000000008000ull, /* 02 */
1070 0x0000000000008080ull, /* 03 */
1071 0x0000000000800000ull, /* 04 */
1072 0x0000000000800080ull, /* 05 */
1073 0x0000000000808000ull, /* 06 */
1074 0x0000000000808080ull, /* 07 */
1075 0x0000000080000000ull, /* 08 */
1076 0x0000000080000080ull, /* 09 */
1077 0x0000000080008000ull, /* 0A */
1078 0x0000000080008080ull, /* 0B */
1079 0x0000000080800000ull, /* 0C */
1080 0x0000000080800080ull, /* 0D */
1081 0x0000000080808000ull, /* 0E */
1082 0x0000000080808080ull, /* 0F */
1083 0x0000008000000000ull, /* 10 */
1084 0x0000008000000080ull, /* 11 */
1085 0x0000008000008000ull, /* 12 */
1086 0x0000008000008080ull, /* 13 */
1087 0x0000008000800000ull, /* 14 */
1088 0x0000008000800080ull, /* 15 */
1089 0x0000008000808000ull, /* 16 */
1090 0x0000008000808080ull, /* 17 */
1091 0x0000008080000000ull, /* 18 */
1092 0x0000008080000080ull, /* 19 */
1093 0x0000008080008000ull, /* 1A */
1094 0x0000008080008080ull, /* 1B */
1095 0x0000008080800000ull, /* 1C */
1096 0x0000008080800080ull, /* 1D */
1097 0x0000008080808000ull, /* 1E */
1098 0x0000008080808080ull, /* 1F */
1099 0x0000800000000000ull, /* 20 */
1100 0x0000800000000080ull, /* 21 */
1101 0x0000800000008000ull, /* 22 */
1102 0x0000800000008080ull, /* 23 */
1103 0x0000800000800000ull, /* 24 */
1104 0x0000800000800080ull, /* 25 */
1105 0x0000800000808000ull, /* 26 */
1106 0x0000800000808080ull, /* 27 */
1107 0x0000800080000000ull, /* 28 */
1108 0x0000800080000080ull, /* 29 */
1109 0x0000800080008000ull, /* 2A */
1110 0x0000800080008080ull, /* 2B */
1111 0x0000800080800000ull, /* 2C */
1112 0x0000800080800080ull, /* 2D */
1113 0x0000800080808000ull, /* 2E */
1114 0x0000800080808080ull, /* 2F */
1115 0x0000808000000000ull, /* 30 */
1116 0x0000808000000080ull, /* 31 */
1117 0x0000808000008000ull, /* 32 */
1118 0x0000808000008080ull, /* 33 */
1119 0x0000808000800000ull, /* 34 */
1120 0x0000808000800080ull, /* 35 */
1121 0x0000808000808000ull, /* 36 */
1122 0x0000808000808080ull, /* 37 */
1123 0x0000808080000000ull, /* 38 */
1124 0x0000808080000080ull, /* 39 */
1125 0x0000808080008000ull, /* 3A */
1126 0x0000808080008080ull, /* 3B */
1127 0x0000808080800000ull, /* 3C */
1128 0x0000808080800080ull, /* 3D */
1129 0x0000808080808000ull, /* 3E */
1130 0x0000808080808080ull, /* 3F */
1131 0x0080000000000000ull, /* 40 */
1132 0x0080000000000080ull, /* 41 */
1133 0x0080000000008000ull, /* 42 */
1134 0x0080000000008080ull, /* 43 */
1135 0x0080000000800000ull, /* 44 */
1136 0x0080000000800080ull, /* 45 */
1137 0x0080000000808000ull, /* 46 */
1138 0x0080000000808080ull, /* 47 */
1139 0x0080000080000000ull, /* 48 */
1140 0x0080000080000080ull, /* 49 */
1141 0x0080000080008000ull, /* 4A */
1142 0x0080000080008080ull, /* 4B */
1143 0x0080000080800000ull, /* 4C */
1144 0x0080000080800080ull, /* 4D */
1145 0x0080000080808000ull, /* 4E */
1146 0x0080000080808080ull, /* 4F */
1147 0x0080008000000000ull, /* 50 */
1148 0x0080008000000080ull, /* 51 */
1149 0x0080008000008000ull, /* 52 */
1150 0x0080008000008080ull, /* 53 */
1151 0x0080008000800000ull, /* 54 */
1152 0x0080008000800080ull, /* 55 */
1153 0x0080008000808000ull, /* 56 */
1154 0x0080008000808080ull, /* 57 */
1155 0x0080008080000000ull, /* 58 */
1156 0x0080008080000080ull, /* 59 */
1157 0x0080008080008000ull, /* 5A */
1158 0x0080008080008080ull, /* 5B */
1159 0x0080008080800000ull, /* 5C */
1160 0x0080008080800080ull, /* 5D */
1161 0x0080008080808000ull, /* 5E */
1162 0x0080008080808080ull, /* 5F */
1163 0x0080800000000000ull, /* 60 */
1164 0x0080800000000080ull, /* 61 */
1165 0x0080800000008000ull, /* 62 */
1166 0x0080800000008080ull, /* 63 */
1167 0x0080800000800000ull, /* 64 */
1168 0x0080800000800080ull, /* 65 */
1169 0x0080800000808000ull, /* 66 */
1170 0x0080800000808080ull, /* 67 */
1171 0x0080800080000000ull, /* 68 */
1172 0x0080800080000080ull, /* 69 */
1173 0x0080800080008000ull, /* 6A */
1174 0x0080800080008080ull, /* 6B */
1175 0x0080800080800000ull, /* 6C */
1176 0x0080800080800080ull, /* 6D */
1177 0x0080800080808000ull, /* 6E */
1178 0x0080800080808080ull, /* 6F */
1179 0x0080808000000000ull, /* 70 */
1180 0x0080808000000080ull, /* 71 */
1181 0x0080808000008000ull, /* 72 */
1182 0x0080808000008080ull, /* 73 */
1183 0x0080808000800000ull, /* 74 */
1184 0x0080808000800080ull, /* 75 */
1185 0x0080808000808000ull, /* 76 */
1186 0x0080808000808080ull, /* 77 */
1187 0x0080808080000000ull, /* 78 */
1188 0x0080808080000080ull, /* 79 */
1189 0x0080808080008000ull, /* 7A */
1190 0x0080808080008080ull, /* 7B */
1191 0x0080808080800000ull, /* 7C */
1192 0x0080808080800080ull, /* 7D */
1193 0x0080808080808000ull, /* 7E */
1194 0x0080808080808080ull, /* 7F */
1195 0x8000000000000000ull, /* 80 */
1196 0x8000000000000080ull, /* 81 */
1197 0x8000000000008000ull, /* 82 */
1198 0x8000000000008080ull, /* 83 */
1199 0x8000000000800000ull, /* 84 */
1200 0x8000000000800080ull, /* 85 */
1201 0x8000000000808000ull, /* 86 */
1202 0x8000000000808080ull, /* 87 */
1203 0x8000000080000000ull, /* 88 */
1204 0x8000000080000080ull, /* 89 */
1205 0x8000000080008000ull, /* 8A */
1206 0x8000000080008080ull, /* 8B */
1207 0x8000000080800000ull, /* 8C */
1208 0x8000000080800080ull, /* 8D */
1209 0x8000000080808000ull, /* 8E */
1210 0x8000000080808080ull, /* 8F */
1211 0x8000008000000000ull, /* 90 */
1212 0x8000008000000080ull, /* 91 */
1213 0x8000008000008000ull, /* 92 */
1214 0x8000008000008080ull, /* 93 */
1215 0x8000008000800000ull, /* 94 */
1216 0x8000008000800080ull, /* 95 */
1217 0x8000008000808000ull, /* 96 */
1218 0x8000008000808080ull, /* 97 */
1219 0x8000008080000000ull, /* 98 */
1220 0x8000008080000080ull, /* 99 */
1221 0x8000008080008000ull, /* 9A */
1222 0x8000008080008080ull, /* 9B */
1223 0x8000008080800000ull, /* 9C */
1224 0x8000008080800080ull, /* 9D */
1225 0x8000008080808000ull, /* 9E */
1226 0x8000008080808080ull, /* 9F */
1227 0x8000800000000000ull, /* A0 */
1228 0x8000800000000080ull, /* A1 */
1229 0x8000800000008000ull, /* A2 */
1230 0x8000800000008080ull, /* A3 */
1231 0x8000800000800000ull, /* A4 */
1232 0x8000800000800080ull, /* A5 */
1233 0x8000800000808000ull, /* A6 */
1234 0x8000800000808080ull, /* A7 */
1235 0x8000800080000000ull, /* A8 */
1236 0x8000800080000080ull, /* A9 */
1237 0x8000800080008000ull, /* AA */
1238 0x8000800080008080ull, /* AB */
1239 0x8000800080800000ull, /* AC */
1240 0x8000800080800080ull, /* AD */
1241 0x8000800080808000ull, /* AE */
1242 0x8000800080808080ull, /* AF */
1243 0x8000808000000000ull, /* B0 */
1244 0x8000808000000080ull, /* B1 */
1245 0x8000808000008000ull, /* B2 */
1246 0x8000808000008080ull, /* B3 */
1247 0x8000808000800000ull, /* B4 */
1248 0x8000808000800080ull, /* B5 */
1249 0x8000808000808000ull, /* B6 */
1250 0x8000808000808080ull, /* B7 */
1251 0x8000808080000000ull, /* B8 */
1252 0x8000808080000080ull, /* B9 */
1253 0x8000808080008000ull, /* BA */
1254 0x8000808080008080ull, /* BB */
1255 0x8000808080800000ull, /* BC */
1256 0x8000808080800080ull, /* BD */
1257 0x8000808080808000ull, /* BE */
1258 0x8000808080808080ull, /* BF */
1259 0x8080000000000000ull, /* C0 */
1260 0x8080000000000080ull, /* C1 */
1261 0x8080000000008000ull, /* C2 */
1262 0x8080000000008080ull, /* C3 */
1263 0x8080000000800000ull, /* C4 */
1264 0x8080000000800080ull, /* C5 */
1265 0x8080000000808000ull, /* C6 */
1266 0x8080000000808080ull, /* C7 */
1267 0x8080000080000000ull, /* C8 */
1268 0x8080000080000080ull, /* C9 */
1269 0x8080000080008000ull, /* CA */
1270 0x8080000080008080ull, /* CB */
1271 0x8080000080800000ull, /* CC */
1272 0x8080000080800080ull, /* CD */
1273 0x8080000080808000ull, /* CE */
1274 0x8080000080808080ull, /* CF */
1275 0x8080008000000000ull, /* D0 */
1276 0x8080008000000080ull, /* D1 */
1277 0x8080008000008000ull, /* D2 */
1278 0x8080008000008080ull, /* D3 */
1279 0x8080008000800000ull, /* D4 */
1280 0x8080008000800080ull, /* D5 */
1281 0x8080008000808000ull, /* D6 */
1282 0x8080008000808080ull, /* D7 */
1283 0x8080008080000000ull, /* D8 */
1284 0x8080008080000080ull, /* D9 */
1285 0x8080008080008000ull, /* DA */
1286 0x8080008080008080ull, /* DB */
1287 0x8080008080800000ull, /* DC */
1288 0x8080008080800080ull, /* DD */
1289 0x8080008080808000ull, /* DE */
1290 0x8080008080808080ull, /* DF */
1291 0x8080800000000000ull, /* E0 */
1292 0x8080800000000080ull, /* E1 */
1293 0x8080800000008000ull, /* E2 */
1294 0x8080800000008080ull, /* E3 */
1295 0x8080800000800000ull, /* E4 */
1296 0x8080800000800080ull, /* E5 */
1297 0x8080800000808000ull, /* E6 */
1298 0x8080800000808080ull, /* E7 */
1299 0x8080800080000000ull, /* E8 */
1300 0x8080800080000080ull, /* E9 */
1301 0x8080800080008000ull, /* EA */
1302 0x8080800080008080ull, /* EB */
1303 0x8080800080800000ull, /* EC */
1304 0x8080800080800080ull, /* ED */
1305 0x8080800080808000ull, /* EE */
1306 0x8080800080808080ull, /* EF */
1307 0x8080808000000000ull, /* F0 */
1308 0x8080808000000080ull, /* F1 */
1309 0x8080808000008000ull, /* F2 */
1310 0x8080808000008080ull, /* F3 */
1311 0x8080808000800000ull, /* F4 */
1312 0x8080808000800080ull, /* F5 */
1313 0x8080808000808000ull, /* F6 */
1314 0x8080808000808080ull, /* F7 */
1315 0x8080808080000000ull, /* F8 */
1316 0x8080808080000080ull, /* F9 */
1317 0x8080808080008000ull, /* FA */
1318 0x8080808080008080ull, /* FB */
1319 0x8080808080800000ull, /* FC */
1320 0x8080808080800080ull, /* FD */
1321 0x8080808080808000ull, /* FE */
1322 0x8080808080808080ull, /* FF */
1325 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1327 int i;
1328 uint64_t t[2] = { 0, 0 };
1330 VECTOR_FOR_INORDER_I(i, u8) {
1331 #if defined(HOST_WORDS_BIGENDIAN)
1332 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1333 #else
1334 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1335 #endif
1338 r->u64[0] = t[0];
1339 r->u64[1] = t[1];
1342 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1343 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1345 int i, j; \
1346 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1348 VECTOR_FOR_INORDER_I(i, srcfld) { \
1349 prod[i] = 0; \
1350 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1351 if (a->srcfld[i] & (1ull<<j)) { \
1352 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1357 VECTOR_FOR_INORDER_I(i, trgfld) { \
1358 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1362 PMSUM(vpmsumb, u8, u16, uint16_t)
1363 PMSUM(vpmsumh, u16, u32, uint32_t)
1364 PMSUM(vpmsumw, u32, u64, uint64_t)
1366 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1369 #ifdef CONFIG_INT128
1370 int i, j;
1371 __uint128_t prod[2];
1373 VECTOR_FOR_INORDER_I(i, u64) {
1374 prod[i] = 0;
1375 for (j = 0; j < 64; j++) {
1376 if (a->u64[i] & (1ull<<j)) {
1377 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1382 r->u128 = prod[0] ^ prod[1];
1384 #else
1385 int i, j;
1386 ppc_avr_t prod[2];
1388 VECTOR_FOR_INORDER_I(i, u64) {
1389 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1390 for (j = 0; j < 64; j++) {
1391 if (a->u64[i] & (1ull<<j)) {
1392 ppc_avr_t bshift;
1393 if (j == 0) {
1394 bshift.u64[HI_IDX] = 0;
1395 bshift.u64[LO_IDX] = b->u64[i];
1396 } else {
1397 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1398 bshift.u64[LO_IDX] = b->u64[i] << j;
1400 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1401 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1406 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1407 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1408 #endif
1412 #if defined(HOST_WORDS_BIGENDIAN)
1413 #define PKBIG 1
1414 #else
1415 #define PKBIG 0
1416 #endif
1417 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1419 int i, j;
1420 ppc_avr_t result;
1421 #if defined(HOST_WORDS_BIGENDIAN)
1422 const ppc_avr_t *x[2] = { a, b };
1423 #else
1424 const ppc_avr_t *x[2] = { b, a };
1425 #endif
1427 VECTOR_FOR_INORDER_I(i, u64) {
1428 VECTOR_FOR_INORDER_I(j, u32) {
1429 uint32_t e = x[i]->u32[j];
1431 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1432 ((e >> 6) & 0x3e0) |
1433 ((e >> 3) & 0x1f));
1436 *r = result;
1439 #define VPK(suffix, from, to, cvt, dosat) \
1440 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1441 ppc_avr_t *a, ppc_avr_t *b) \
1443 int i; \
1444 int sat = 0; \
1445 ppc_avr_t result; \
1446 ppc_avr_t *a0 = PKBIG ? a : b; \
1447 ppc_avr_t *a1 = PKBIG ? b : a; \
1449 VECTOR_FOR_INORDER_I(i, from) { \
1450 result.to[i] = cvt(a0->from[i], &sat); \
1451 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1453 *r = result; \
1454 if (dosat && sat) { \
1455 env->vscr |= (1 << VSCR_SAT); \
1458 #define I(x, y) (x)
1459 VPK(shss, s16, s8, cvtshsb, 1)
1460 VPK(shus, s16, u8, cvtshub, 1)
1461 VPK(swss, s32, s16, cvtswsh, 1)
1462 VPK(swus, s32, u16, cvtswuh, 1)
1463 VPK(sdss, s64, s32, cvtsdsw, 1)
1464 VPK(sdus, s64, u32, cvtsduw, 1)
1465 VPK(uhus, u16, u8, cvtuhub, 1)
1466 VPK(uwus, u32, u16, cvtuwuh, 1)
1467 VPK(udus, u64, u32, cvtuduw, 1)
1468 VPK(uhum, u16, u8, I, 0)
1469 VPK(uwum, u32, u16, I, 0)
1470 VPK(udum, u64, u32, I, 0)
1471 #undef I
1472 #undef VPK
1473 #undef PKBIG
1475 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1477 int i;
1479 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1480 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1484 #define VRFI(suffix, rounding) \
1485 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1486 ppc_avr_t *b) \
1488 int i; \
1489 float_status s = env->vec_status; \
1491 set_float_rounding_mode(rounding, &s); \
1492 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1493 r->f[i] = float32_round_to_int (b->f[i], &s); \
1496 VRFI(n, float_round_nearest_even)
1497 VRFI(m, float_round_down)
1498 VRFI(p, float_round_up)
1499 VRFI(z, float_round_to_zero)
1500 #undef VRFI
1502 #define VROTATE(suffix, element, mask) \
1503 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1505 int i; \
1507 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1508 unsigned int shift = b->element[i] & mask; \
1509 r->element[i] = (a->element[i] << shift) | \
1510 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1513 VROTATE(b, u8, 0x7)
1514 VROTATE(h, u16, 0xF)
1515 VROTATE(w, u32, 0x1F)
1516 VROTATE(d, u64, 0x3F)
1517 #undef VROTATE
1519 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1521 int i;
1523 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1524 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1526 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1530 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1531 ppc_avr_t *c)
1533 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1534 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1537 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1539 int i;
1541 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1542 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1546 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1548 int i;
1550 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1551 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1555 #if defined(HOST_WORDS_BIGENDIAN)
1556 #define LEFT 0
1557 #define RIGHT 1
1558 #else
1559 #define LEFT 1
1560 #define RIGHT 0
1561 #endif
1562 /* The specification says that the results are undefined if all of the
1563 * shift counts are not identical. We check to make sure that they are
1564 * to conform to what real hardware appears to do. */
1565 #define VSHIFT(suffix, leftp) \
1566 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1568 int shift = b->u8[LO_IDX*15] & 0x7; \
1569 int doit = 1; \
1570 int i; \
1572 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1573 doit = doit && ((b->u8[i] & 0x7) == shift); \
1575 if (doit) { \
1576 if (shift == 0) { \
1577 *r = *a; \
1578 } else if (leftp) { \
1579 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1581 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1582 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1583 } else { \
1584 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1586 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1587 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1591 VSHIFT(l, LEFT)
1592 VSHIFT(r, RIGHT)
1593 #undef VSHIFT
1594 #undef LEFT
1595 #undef RIGHT
1597 #define VSL(suffix, element, mask) \
1598 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1600 int i; \
1602 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1603 unsigned int shift = b->element[i] & mask; \
1605 r->element[i] = a->element[i] << shift; \
1608 VSL(b, u8, 0x7)
1609 VSL(h, u16, 0x0F)
1610 VSL(w, u32, 0x1F)
1611 VSL(d, u64, 0x3F)
1612 #undef VSL
1614 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1616 int sh = shift & 0xf;
1617 int i;
1618 ppc_avr_t result;
1620 #if defined(HOST_WORDS_BIGENDIAN)
1621 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1622 int index = sh + i;
1623 if (index > 0xf) {
1624 result.u8[i] = b->u8[index - 0x10];
1625 } else {
1626 result.u8[i] = a->u8[index];
1629 #else
1630 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1631 int index = (16 - sh) + i;
1632 if (index > 0xf) {
1633 result.u8[i] = a->u8[index - 0x10];
1634 } else {
1635 result.u8[i] = b->u8[index];
1638 #endif
1639 *r = result;
1642 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1644 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1646 #if defined(HOST_WORDS_BIGENDIAN)
1647 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1648 memset(&r->u8[16-sh], 0, sh);
1649 #else
1650 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1651 memset(&r->u8[0], 0, sh);
1652 #endif
1655 /* Experimental testing shows that hardware masks the immediate. */
1656 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1657 #if defined(HOST_WORDS_BIGENDIAN)
1658 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1659 #else
1660 #define SPLAT_ELEMENT(element) \
1661 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1662 #endif
1663 #define VSPLT(suffix, element) \
1664 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1666 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1667 int i; \
1669 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1670 r->element[i] = s; \
1673 VSPLT(b, u8)
1674 VSPLT(h, u16)
1675 VSPLT(w, u32)
1676 #undef VSPLT
1677 #undef SPLAT_ELEMENT
1678 #undef _SPLAT_MASKED
1680 #define VSPLTI(suffix, element, splat_type) \
1681 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1683 splat_type x = (int8_t)(splat << 3) >> 3; \
1684 int i; \
1686 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1687 r->element[i] = x; \
1690 VSPLTI(b, s8, int8_t)
1691 VSPLTI(h, s16, int16_t)
1692 VSPLTI(w, s32, int32_t)
1693 #undef VSPLTI
1695 #define VSR(suffix, element, mask) \
1696 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1698 int i; \
1700 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1701 unsigned int shift = b->element[i] & mask; \
1702 r->element[i] = a->element[i] >> shift; \
1705 VSR(ab, s8, 0x7)
1706 VSR(ah, s16, 0xF)
1707 VSR(aw, s32, 0x1F)
1708 VSR(ad, s64, 0x3F)
1709 VSR(b, u8, 0x7)
1710 VSR(h, u16, 0xF)
1711 VSR(w, u32, 0x1F)
1712 VSR(d, u64, 0x3F)
1713 #undef VSR
1715 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1717 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1719 #if defined(HOST_WORDS_BIGENDIAN)
1720 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1721 memset(&r->u8[0], 0, sh);
1722 #else
1723 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1724 memset(&r->u8[16 - sh], 0, sh);
1725 #endif
1728 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1730 int i;
1732 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1733 r->u32[i] = a->u32[i] >= b->u32[i];
1737 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1739 int64_t t;
1740 int i, upper;
1741 ppc_avr_t result;
1742 int sat = 0;
1744 #if defined(HOST_WORDS_BIGENDIAN)
1745 upper = ARRAY_SIZE(r->s32)-1;
1746 #else
1747 upper = 0;
1748 #endif
1749 t = (int64_t)b->s32[upper];
1750 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1751 t += a->s32[i];
1752 result.s32[i] = 0;
1754 result.s32[upper] = cvtsdsw(t, &sat);
1755 *r = result;
1757 if (sat) {
1758 env->vscr |= (1 << VSCR_SAT);
1762 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1764 int i, j, upper;
1765 ppc_avr_t result;
1766 int sat = 0;
1768 #if defined(HOST_WORDS_BIGENDIAN)
1769 upper = 1;
1770 #else
1771 upper = 0;
1772 #endif
1773 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1774 int64_t t = (int64_t)b->s32[upper + i * 2];
1776 result.u64[i] = 0;
1777 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1778 t += a->s32[2 * i + j];
1780 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1783 *r = result;
1784 if (sat) {
1785 env->vscr |= (1 << VSCR_SAT);
1789 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1791 int i, j;
1792 int sat = 0;
1794 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1795 int64_t t = (int64_t)b->s32[i];
1797 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1798 t += a->s8[4 * i + j];
1800 r->s32[i] = cvtsdsw(t, &sat);
1803 if (sat) {
1804 env->vscr |= (1 << VSCR_SAT);
1808 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1810 int sat = 0;
1811 int i;
1813 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1814 int64_t t = (int64_t)b->s32[i];
1816 t += a->s16[2 * i] + a->s16[2 * i + 1];
1817 r->s32[i] = cvtsdsw(t, &sat);
1820 if (sat) {
1821 env->vscr |= (1 << VSCR_SAT);
1825 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1827 int i, j;
1828 int sat = 0;
1830 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1831 uint64_t t = (uint64_t)b->u32[i];
1833 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1834 t += a->u8[4 * i + j];
1836 r->u32[i] = cvtuduw(t, &sat);
1839 if (sat) {
1840 env->vscr |= (1 << VSCR_SAT);
1844 #if defined(HOST_WORDS_BIGENDIAN)
1845 #define UPKHI 1
1846 #define UPKLO 0
1847 #else
1848 #define UPKHI 0
1849 #define UPKLO 1
1850 #endif
1851 #define VUPKPX(suffix, hi) \
1852 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1854 int i; \
1855 ppc_avr_t result; \
1857 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1858 uint16_t e = b->u16[hi ? i : i+4]; \
1859 uint8_t a = (e >> 15) ? 0xff : 0; \
1860 uint8_t r = (e >> 10) & 0x1f; \
1861 uint8_t g = (e >> 5) & 0x1f; \
1862 uint8_t b = e & 0x1f; \
1864 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1866 *r = result; \
1868 VUPKPX(lpx, UPKLO)
1869 VUPKPX(hpx, UPKHI)
1870 #undef VUPKPX
1872 #define VUPK(suffix, unpacked, packee, hi) \
1873 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1875 int i; \
1876 ppc_avr_t result; \
1878 if (hi) { \
1879 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1880 result.unpacked[i] = b->packee[i]; \
1882 } else { \
1883 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1884 i++) { \
1885 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1888 *r = result; \
1890 VUPK(hsb, s16, s8, UPKHI)
1891 VUPK(hsh, s32, s16, UPKHI)
1892 VUPK(hsw, s64, s32, UPKHI)
1893 VUPK(lsb, s16, s8, UPKLO)
1894 VUPK(lsh, s32, s16, UPKLO)
1895 VUPK(lsw, s64, s32, UPKLO)
1896 #undef VUPK
1897 #undef UPKHI
1898 #undef UPKLO
1900 #define VGENERIC_DO(name, element) \
1901 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1903 int i; \
1905 VECTOR_FOR_INORDER_I(i, element) { \
1906 r->element[i] = name(b->element[i]); \
1910 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1911 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1912 #define clzw(v) clz32((v))
1913 #define clzd(v) clz64((v))
1915 VGENERIC_DO(clzb, u8)
1916 VGENERIC_DO(clzh, u16)
1917 VGENERIC_DO(clzw, u32)
1918 VGENERIC_DO(clzd, u64)
1920 #undef clzb
1921 #undef clzh
1922 #undef clzw
1923 #undef clzd
1925 #define popcntb(v) ctpop8(v)
1926 #define popcnth(v) ctpop16(v)
1927 #define popcntw(v) ctpop32(v)
1928 #define popcntd(v) ctpop64(v)
1930 VGENERIC_DO(popcntb, u8)
1931 VGENERIC_DO(popcnth, u16)
1932 VGENERIC_DO(popcntw, u32)
1933 VGENERIC_DO(popcntd, u64)
1935 #undef popcntb
1936 #undef popcnth
1937 #undef popcntw
1938 #undef popcntd
1940 #undef VGENERIC_DO
1942 #if defined(HOST_WORDS_BIGENDIAN)
1943 #define QW_ONE { .u64 = { 0, 1 } }
1944 #else
1945 #define QW_ONE { .u64 = { 1, 0 } }
1946 #endif
1948 #ifndef CONFIG_INT128
1950 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1952 t->u64[0] = ~a.u64[0];
1953 t->u64[1] = ~a.u64[1];
1956 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1958 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1959 return -1;
1960 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1961 return 1;
1962 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1963 return -1;
1964 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1965 return 1;
1966 } else {
1967 return 0;
1971 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1973 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1974 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1975 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1978 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1980 ppc_avr_t not_a;
1981 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1982 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1983 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1984 avr_qw_not(&not_a, a);
1985 return avr_qw_cmpu(not_a, b) < 0;
1988 #endif
1990 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1992 #ifdef CONFIG_INT128
1993 r->u128 = a->u128 + b->u128;
1994 #else
1995 avr_qw_add(r, *a, *b);
1996 #endif
1999 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2001 #ifdef CONFIG_INT128
2002 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2003 #else
2005 if (c->u64[LO_IDX] & 1) {
2006 ppc_avr_t tmp;
2008 tmp.u64[HI_IDX] = 0;
2009 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2010 avr_qw_add(&tmp, *a, tmp);
2011 avr_qw_add(r, tmp, *b);
2012 } else {
2013 avr_qw_add(r, *a, *b);
2015 #endif
2018 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2020 #ifdef CONFIG_INT128
2021 r->u128 = (~a->u128 < b->u128);
2022 #else
2023 ppc_avr_t not_a;
2025 avr_qw_not(&not_a, *a);
2027 r->u64[HI_IDX] = 0;
2028 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2029 #endif
2032 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2034 #ifdef CONFIG_INT128
2035 int carry_out = (~a->u128 < b->u128);
2036 if (!carry_out && (c->u128 & 1)) {
2037 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2038 ((a->u128 != 0) || (b->u128 != 0));
2040 r->u128 = carry_out;
2041 #else
2043 int carry_in = c->u64[LO_IDX] & 1;
2044 int carry_out = 0;
2045 ppc_avr_t tmp;
2047 carry_out = avr_qw_addc(&tmp, *a, *b);
2049 if (!carry_out && carry_in) {
2050 ppc_avr_t one = QW_ONE;
2051 carry_out = avr_qw_addc(&tmp, tmp, one);
2053 r->u64[HI_IDX] = 0;
2054 r->u64[LO_IDX] = carry_out;
2055 #endif
2058 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2060 #ifdef CONFIG_INT128
2061 r->u128 = a->u128 - b->u128;
2062 #else
2063 ppc_avr_t tmp;
2064 ppc_avr_t one = QW_ONE;
2066 avr_qw_not(&tmp, *b);
2067 avr_qw_add(&tmp, *a, tmp);
2068 avr_qw_add(r, tmp, one);
2069 #endif
2072 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2074 #ifdef CONFIG_INT128
2075 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2076 #else
2077 ppc_avr_t tmp, sum;
2079 avr_qw_not(&tmp, *b);
2080 avr_qw_add(&sum, *a, tmp);
2082 tmp.u64[HI_IDX] = 0;
2083 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2084 avr_qw_add(r, sum, tmp);
2085 #endif
2088 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2090 #ifdef CONFIG_INT128
2091 r->u128 = (~a->u128 < ~b->u128) ||
2092 (a->u128 + ~b->u128 == (__uint128_t)-1);
2093 #else
2094 int carry = (avr_qw_cmpu(*a, *b) > 0);
2095 if (!carry) {
2096 ppc_avr_t tmp;
2097 avr_qw_not(&tmp, *b);
2098 avr_qw_add(&tmp, *a, tmp);
2099 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2101 r->u64[HI_IDX] = 0;
2102 r->u64[LO_IDX] = carry;
2103 #endif
2106 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2108 #ifdef CONFIG_INT128
2109 r->u128 =
2110 (~a->u128 < ~b->u128) ||
2111 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2112 #else
2113 int carry_in = c->u64[LO_IDX] & 1;
2114 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2115 if (!carry_out && carry_in) {
2116 ppc_avr_t tmp;
2117 avr_qw_not(&tmp, *b);
2118 avr_qw_add(&tmp, *a, tmp);
2119 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2122 r->u64[HI_IDX] = 0;
2123 r->u64[LO_IDX] = carry_out;
2124 #endif
2127 #define BCD_PLUS_PREF_1 0xC
2128 #define BCD_PLUS_PREF_2 0xF
2129 #define BCD_PLUS_ALT_1 0xA
2130 #define BCD_NEG_PREF 0xD
2131 #define BCD_NEG_ALT 0xB
2132 #define BCD_PLUS_ALT_2 0xE
2134 #if defined(HOST_WORDS_BIGENDIAN)
2135 #define BCD_DIG_BYTE(n) (15 - (n/2))
2136 #else
2137 #define BCD_DIG_BYTE(n) (n/2)
2138 #endif
2140 static int bcd_get_sgn(ppc_avr_t *bcd)
2142 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2143 case BCD_PLUS_PREF_1:
2144 case BCD_PLUS_PREF_2:
2145 case BCD_PLUS_ALT_1:
2146 case BCD_PLUS_ALT_2:
2148 return 1;
2151 case BCD_NEG_PREF:
2152 case BCD_NEG_ALT:
2154 return -1;
2157 default:
2159 return 0;
2164 static int bcd_preferred_sgn(int sgn, int ps)
2166 if (sgn >= 0) {
2167 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2168 } else {
2169 return BCD_NEG_PREF;
2173 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2175 uint8_t result;
2176 if (n & 1) {
2177 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2178 } else {
2179 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2182 if (unlikely(result > 9)) {
2183 *invalid = true;
2185 return result;
2188 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2190 if (n & 1) {
2191 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2192 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2193 } else {
2194 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2195 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2199 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2201 int i;
2202 int invalid = 0;
2203 for (i = 31; i > 0; i--) {
2204 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2205 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2206 if (unlikely(invalid)) {
2207 return 0; /* doesn't matter */
2208 } else if (dig_a > dig_b) {
2209 return 1;
2210 } else if (dig_a < dig_b) {
2211 return -1;
2215 return 0;
2218 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2219 int *overflow)
2221 int carry = 0;
2222 int i;
2223 int is_zero = 1;
2224 for (i = 1; i <= 31; i++) {
2225 uint8_t digit = bcd_get_digit(a, i, invalid) +
2226 bcd_get_digit(b, i, invalid) + carry;
2227 is_zero &= (digit == 0);
2228 if (digit > 9) {
2229 carry = 1;
2230 digit -= 10;
2231 } else {
2232 carry = 0;
2235 bcd_put_digit(t, digit, i);
2237 if (unlikely(*invalid)) {
2238 return -1;
2242 *overflow = carry;
2243 return is_zero;
2246 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2247 int *overflow)
2249 int carry = 0;
2250 int i;
2251 int is_zero = 1;
2252 for (i = 1; i <= 31; i++) {
2253 uint8_t digit = bcd_get_digit(a, i, invalid) -
2254 bcd_get_digit(b, i, invalid) + carry;
2255 is_zero &= (digit == 0);
2256 if (digit & 0x80) {
2257 carry = -1;
2258 digit += 10;
2259 } else {
2260 carry = 0;
2263 bcd_put_digit(t, digit, i);
2265 if (unlikely(*invalid)) {
2266 return -1;
2270 *overflow = carry;
2271 return is_zero;
2274 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2277 int sgna = bcd_get_sgn(a);
2278 int sgnb = bcd_get_sgn(b);
2279 int invalid = (sgna == 0) || (sgnb == 0);
2280 int overflow = 0;
2281 int zero = 0;
2282 uint32_t cr = 0;
2283 ppc_avr_t result = { .u64 = { 0, 0 } };
2285 if (!invalid) {
2286 if (sgna == sgnb) {
2287 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2288 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2289 cr = (sgna > 0) ? 4 : 8;
2290 } else if (bcd_cmp_mag(a, b) > 0) {
2291 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2292 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2293 cr = (sgna > 0) ? 4 : 8;
2294 } else {
2295 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2296 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2297 cr = (sgnb > 0) ? 4 : 8;
2301 if (unlikely(invalid)) {
2302 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2303 cr = 1;
2304 } else if (overflow) {
2305 cr |= 1;
2306 } else if (zero) {
2307 cr = 2;
2310 *r = result;
2312 return cr;
2315 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2317 ppc_avr_t bcopy = *b;
2318 int sgnb = bcd_get_sgn(b);
2319 if (sgnb < 0) {
2320 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2321 } else if (sgnb > 0) {
2322 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2324 /* else invalid ... defer to bcdadd code for proper handling */
2326 return helper_bcdadd(r, a, &bcopy, ps);
2329 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2331 int i;
2332 VECTOR_FOR_INORDER_I(i, u8) {
2333 r->u8[i] = AES_sbox[a->u8[i]];
2337 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2339 int i;
2341 VECTOR_FOR_INORDER_I(i, u32) {
2342 r->AVRW(i) = b->AVRW(i) ^
2343 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2344 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2345 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2346 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2350 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2352 int i;
2354 VECTOR_FOR_INORDER_I(i, u8) {
2355 r->AVRB(i) = b->AVRB(i) ^ (AES_Te4[a->AVRB(AES_shifts[i])] & 0xFF);
2359 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2361 /* This differs from what is written in ISA V2.07. The RTL is */
2362 /* incorrect and will be fixed in V2.07B. */
2363 int i;
2364 ppc_avr_t tmp;
2366 VECTOR_FOR_INORDER_I(i, u8) {
2367 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2370 VECTOR_FOR_INORDER_I(i, u32) {
2371 r->AVRW(i) =
2372 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2373 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2374 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2375 AES_imc[tmp.AVRB(4*i + 3)][3];
2379 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2381 int i;
2383 VECTOR_FOR_INORDER_I(i, u8) {
2384 r->AVRB(i) = b->AVRB(i) ^ (AES_Td4[a->AVRB(AES_ishifts[i])] & 0xFF);
2388 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2389 #if defined(HOST_WORDS_BIGENDIAN)
2390 #define EL_IDX(i) (i)
2391 #else
2392 #define EL_IDX(i) (3 - (i))
2393 #endif
2395 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2397 int st = (st_six & 0x10) != 0;
2398 int six = st_six & 0xF;
2399 int i;
2401 VECTOR_FOR_INORDER_I(i, u32) {
2402 if (st == 0) {
2403 if ((six & (0x8 >> i)) == 0) {
2404 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2405 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2406 (a->u32[EL_IDX(i)] >> 3);
2407 } else { /* six.bit[i] == 1 */
2408 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2409 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2410 (a->u32[EL_IDX(i)] >> 10);
2412 } else { /* st == 1 */
2413 if ((six & (0x8 >> i)) == 0) {
2414 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2415 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2416 ROTRu32(a->u32[EL_IDX(i)], 22);
2417 } else { /* six.bit[i] == 1 */
2418 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2419 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2420 ROTRu32(a->u32[EL_IDX(i)], 25);
2426 #undef ROTRu32
2427 #undef EL_IDX
2429 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2430 #if defined(HOST_WORDS_BIGENDIAN)
2431 #define EL_IDX(i) (i)
2432 #else
2433 #define EL_IDX(i) (1 - (i))
2434 #endif
2436 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2438 int st = (st_six & 0x10) != 0;
2439 int six = st_six & 0xF;
2440 int i;
2442 VECTOR_FOR_INORDER_I(i, u64) {
2443 if (st == 0) {
2444 if ((six & (0x8 >> (2*i))) == 0) {
2445 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2446 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2447 (a->u64[EL_IDX(i)] >> 7);
2448 } else { /* six.bit[2*i] == 1 */
2449 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2450 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2451 (a->u64[EL_IDX(i)] >> 6);
2453 } else { /* st == 1 */
2454 if ((six & (0x8 >> (2*i))) == 0) {
2455 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2456 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2457 ROTRu64(a->u64[EL_IDX(i)], 39);
2458 } else { /* six.bit[2*i] == 1 */
2459 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2460 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2461 ROTRu64(a->u64[EL_IDX(i)], 41);
2467 #undef ROTRu64
2468 #undef EL_IDX
2470 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2472 int i;
2473 VECTOR_FOR_INORDER_I(i, u8) {
2474 int indexA = c->u8[i] >> 4;
2475 int indexB = c->u8[i] & 0xF;
2476 #if defined(HOST_WORDS_BIGENDIAN)
2477 r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
2478 #else
2479 r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2480 #endif
2484 #undef VECTOR_FOR_INORDER_I
2485 #undef HI_IDX
2486 #undef LO_IDX
2488 /*****************************************************************************/
2489 /* SPE extension helpers */
2490 /* Use a table to make this quicker */
2491 static const uint8_t hbrev[16] = {
2492 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2493 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2496 static inline uint8_t byte_reverse(uint8_t val)
2498 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2501 static inline uint32_t word_reverse(uint32_t val)
2503 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2504 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2507 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2508 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2510 uint32_t a, b, d, mask;
2512 mask = UINT32_MAX >> (32 - MASKBITS);
2513 a = arg1 & mask;
2514 b = arg2 & mask;
2515 d = word_reverse(1 + word_reverse(a | ~b));
2516 return (arg1 & ~mask) | (d & b);
2519 uint32_t helper_cntlsw32(uint32_t val)
2521 if (val & 0x80000000) {
2522 return clz32(~val);
2523 } else {
2524 return clz32(val);
2528 uint32_t helper_cntlzw32(uint32_t val)
2530 return clz32(val);
2533 /* 440 specific */
2534 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2535 target_ulong low, uint32_t update_Rc)
2537 target_ulong mask;
2538 int i;
2540 i = 1;
2541 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2542 if ((high & mask) == 0) {
2543 if (update_Rc) {
2544 env->crf[0] = 0x4;
2546 goto done;
2548 i++;
2550 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2551 if ((low & mask) == 0) {
2552 if (update_Rc) {
2553 env->crf[0] = 0x8;
2555 goto done;
2557 i++;
2559 if (update_Rc) {
2560 env->crf[0] = 0x2;
2562 done:
2563 env->xer = (env->xer & ~0x7F) | i;
2564 if (update_Rc) {
2565 env->crf[0] |= xer_so;
2567 return i;