fpu: softfloat: drop INLINE macro
[qemu/ar7.git] / target-ppc / int_helper.c
blobf6e88467077c633a9ebbe861f087ae1cd7dc1d16
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "cpu.h"
20 #include "qemu/host-utils.h"
21 #include "exec/helper-proto.h"
22 #include "qemu/aes.h"
24 #include "helper_regs.h"
25 /*****************************************************************************/
26 /* Fixed point operations helpers */
27 #if defined(TARGET_PPC64)
29 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
31 int64_t th;
32 uint64_t tl;
34 muls64(&tl, (uint64_t *)&th, arg1, arg2);
35 /* If th != 0 && th != -1, then we had an overflow */
36 if (likely((uint64_t)(th + 1) <= 1)) {
37 env->ov = 0;
38 } else {
39 env->so = env->ov = 1;
41 return (int64_t)tl;
43 #endif
45 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint32_t oe)
48 uint64_t rt = 0;
49 int overflow = 0;
51 uint64_t dividend = (uint64_t)ra << 32;
52 uint64_t divisor = (uint32_t)rb;
54 if (unlikely(divisor == 0)) {
55 overflow = 1;
56 } else {
57 rt = dividend / divisor;
58 overflow = rt > UINT32_MAX;
61 if (unlikely(overflow)) {
62 rt = 0; /* Undefined */
65 if (oe) {
66 if (unlikely(overflow)) {
67 env->so = env->ov = 1;
68 } else {
69 env->ov = 0;
73 return (target_ulong)rt;
76 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
77 uint32_t oe)
79 int64_t rt = 0;
80 int overflow = 0;
82 int64_t dividend = (int64_t)ra << 32;
83 int64_t divisor = (int64_t)((int32_t)rb);
85 if (unlikely((divisor == 0) ||
86 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
87 overflow = 1;
88 } else {
89 rt = dividend / divisor;
90 overflow = rt != (int32_t)rt;
93 if (unlikely(overflow)) {
94 rt = 0; /* Undefined */
97 if (oe) {
98 if (unlikely(overflow)) {
99 env->so = env->ov = 1;
100 } else {
101 env->ov = 0;
105 return (target_ulong)rt;
108 #if defined(TARGET_PPC64)
110 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
112 uint64_t rt = 0;
113 int overflow = 0;
115 overflow = divu128(&rt, &ra, rb);
117 if (unlikely(overflow)) {
118 rt = 0; /* Undefined */
121 if (oe) {
122 if (unlikely(overflow)) {
123 env->so = env->ov = 1;
124 } else {
125 env->ov = 0;
129 return rt;
132 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
134 int64_t rt = 0;
135 int64_t ra = (int64_t)rau;
136 int64_t rb = (int64_t)rbu;
137 int overflow = divs128(&rt, &ra, rb);
139 if (unlikely(overflow)) {
140 rt = 0; /* Undefined */
143 if (oe) {
145 if (unlikely(overflow)) {
146 env->so = env->ov = 1;
147 } else {
148 env->ov = 0;
152 return rt;
155 #endif
158 target_ulong helper_cntlzw(target_ulong t)
160 return clz32(t);
163 #if defined(TARGET_PPC64)
164 target_ulong helper_cntlzd(target_ulong t)
166 return clz64(t);
168 #endif
170 #if defined(TARGET_PPC64)
172 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
174 int i;
175 uint64_t ra = 0;
177 for (i = 0; i < 8; i++) {
178 int index = (rs >> (i*8)) & 0xFF;
179 if (index < 64) {
180 if (rb & (1ull << (63-index))) {
181 ra |= 1 << i;
185 return ra;
188 #endif
190 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
192 target_ulong mask = 0xff;
193 target_ulong ra = 0;
194 int i;
196 for (i = 0; i < sizeof(target_ulong); i++) {
197 if ((rs & mask) == (rb & mask)) {
198 ra |= mask;
200 mask <<= 8;
202 return ra;
205 /* shift right arithmetic helper */
206 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
207 target_ulong shift)
209 int32_t ret;
211 if (likely(!(shift & 0x20))) {
212 if (likely((uint32_t)shift != 0)) {
213 shift &= 0x1f;
214 ret = (int32_t)value >> shift;
215 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
216 env->ca = 0;
217 } else {
218 env->ca = 1;
220 } else {
221 ret = (int32_t)value;
222 env->ca = 0;
224 } else {
225 ret = (int32_t)value >> 31;
226 env->ca = (ret != 0);
228 return (target_long)ret;
231 #if defined(TARGET_PPC64)
232 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
233 target_ulong shift)
235 int64_t ret;
237 if (likely(!(shift & 0x40))) {
238 if (likely((uint64_t)shift != 0)) {
239 shift &= 0x3f;
240 ret = (int64_t)value >> shift;
241 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
242 env->ca = 0;
243 } else {
244 env->ca = 1;
246 } else {
247 ret = (int64_t)value;
248 env->ca = 0;
250 } else {
251 ret = (int64_t)value >> 63;
252 env->ca = (ret != 0);
254 return ret;
256 #endif
258 #if defined(TARGET_PPC64)
259 target_ulong helper_popcntb(target_ulong val)
261 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
262 0x5555555555555555ULL);
263 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
264 0x3333333333333333ULL);
265 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
266 0x0f0f0f0f0f0f0f0fULL);
267 return val;
270 target_ulong helper_popcntw(target_ulong val)
272 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
273 0x5555555555555555ULL);
274 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
275 0x3333333333333333ULL);
276 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
277 0x0f0f0f0f0f0f0f0fULL);
278 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
279 0x00ff00ff00ff00ffULL);
280 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
281 0x0000ffff0000ffffULL);
282 return val;
285 target_ulong helper_popcntd(target_ulong val)
287 return ctpop64(val);
289 #else
290 target_ulong helper_popcntb(target_ulong val)
292 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
293 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
294 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
295 return val;
298 target_ulong helper_popcntw(target_ulong val)
300 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
301 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
302 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
303 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
304 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
305 return val;
307 #endif
309 /*****************************************************************************/
310 /* PowerPC 601 specific instructions (POWER bridge) */
311 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
313 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
315 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
316 (int32_t)arg2 == 0) {
317 env->spr[SPR_MQ] = 0;
318 return INT32_MIN;
319 } else {
320 env->spr[SPR_MQ] = tmp % arg2;
321 return tmp / (int32_t)arg2;
325 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
326 target_ulong arg2)
328 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
330 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
331 (int32_t)arg2 == 0) {
332 env->so = env->ov = 1;
333 env->spr[SPR_MQ] = 0;
334 return INT32_MIN;
335 } else {
336 env->spr[SPR_MQ] = tmp % arg2;
337 tmp /= (int32_t)arg2;
338 if ((int32_t)tmp != tmp) {
339 env->so = env->ov = 1;
340 } else {
341 env->ov = 0;
343 return tmp;
347 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
348 target_ulong arg2)
350 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
351 (int32_t)arg2 == 0) {
352 env->spr[SPR_MQ] = 0;
353 return INT32_MIN;
354 } else {
355 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
356 return (int32_t)arg1 / (int32_t)arg2;
360 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
361 target_ulong arg2)
363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->so = env->ov = 1;
366 env->spr[SPR_MQ] = 0;
367 return INT32_MIN;
368 } else {
369 env->ov = 0;
370 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
371 return (int32_t)arg1 / (int32_t)arg2;
375 /*****************************************************************************/
376 /* 602 specific instructions */
377 /* mfrom is the most crazy instruction ever seen, imho ! */
378 /* Real implementation uses a ROM table. Do the same */
379 /* Extremely decomposed:
380 * -arg / 256
381 * return 256 * log10(10 + 1.0) + 0.5
383 #if !defined(CONFIG_USER_ONLY)
384 target_ulong helper_602_mfrom(target_ulong arg)
386 if (likely(arg < 602)) {
387 #include "mfrom_table.c"
388 return mfrom_ROM_table[arg];
389 } else {
390 return 0;
393 #endif
395 /*****************************************************************************/
396 /* Altivec extension helpers */
397 #if defined(HOST_WORDS_BIGENDIAN)
398 #define HI_IDX 0
399 #define LO_IDX 1
400 #define AVRB(i) u8[i]
401 #define AVRW(i) u32[i]
402 #else
403 #define HI_IDX 1
404 #define LO_IDX 0
405 #define AVRB(i) u8[15-(i)]
406 #define AVRW(i) u32[3-(i)]
407 #endif
409 #if defined(HOST_WORDS_BIGENDIAN)
410 #define VECTOR_FOR_INORDER_I(index, element) \
411 for (index = 0; index < ARRAY_SIZE(r->element); index++)
412 #else
413 #define VECTOR_FOR_INORDER_I(index, element) \
414 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
415 #endif
417 /* Saturating arithmetic helpers. */
418 #define SATCVT(from, to, from_type, to_type, min, max) \
419 static inline to_type cvt##from##to(from_type x, int *sat) \
421 to_type r; \
423 if (x < (from_type)min) { \
424 r = min; \
425 *sat = 1; \
426 } else if (x > (from_type)max) { \
427 r = max; \
428 *sat = 1; \
429 } else { \
430 r = x; \
432 return r; \
434 #define SATCVTU(from, to, from_type, to_type, min, max) \
435 static inline to_type cvt##from##to(from_type x, int *sat) \
437 to_type r; \
439 if (x > (from_type)max) { \
440 r = max; \
441 *sat = 1; \
442 } else { \
443 r = x; \
445 return r; \
447 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
448 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
449 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
451 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
452 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
453 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
454 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
455 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
456 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
457 #undef SATCVT
458 #undef SATCVTU
460 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
462 int i, j = (sh & 0xf);
464 VECTOR_FOR_INORDER_I(i, u8) {
465 r->u8[i] = j++;
469 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
471 int i, j = 0x10 - (sh & 0xf);
473 VECTOR_FOR_INORDER_I(i, u8) {
474 r->u8[i] = j++;
478 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
480 #if defined(HOST_WORDS_BIGENDIAN)
481 env->vscr = r->u32[3];
482 #else
483 env->vscr = r->u32[0];
484 #endif
485 set_flush_to_zero(vscr_nj, &env->vec_status);
488 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
490 int i;
492 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
493 r->u32[i] = ~a->u32[i] < b->u32[i];
497 #define VARITH_DO(name, op, element) \
498 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
500 int i; \
502 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
503 r->element[i] = a->element[i] op b->element[i]; \
506 #define VARITH(suffix, element) \
507 VARITH_DO(add##suffix, +, element) \
508 VARITH_DO(sub##suffix, -, element)
509 VARITH(ubm, u8)
510 VARITH(uhm, u16)
511 VARITH(uwm, u32)
512 VARITH(udm, u64)
513 VARITH_DO(muluwm, *, u32)
514 #undef VARITH_DO
515 #undef VARITH
517 #define VARITHFP(suffix, func) \
518 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
519 ppc_avr_t *b) \
521 int i; \
523 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
524 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
527 VARITHFP(addfp, float32_add)
528 VARITHFP(subfp, float32_sub)
529 VARITHFP(minfp, float32_min)
530 VARITHFP(maxfp, float32_max)
531 #undef VARITHFP
533 #define VARITHFPFMA(suffix, type) \
534 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
535 ppc_avr_t *b, ppc_avr_t *c) \
537 int i; \
538 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
539 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
540 type, &env->vec_status); \
543 VARITHFPFMA(maddfp, 0);
544 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
545 #undef VARITHFPFMA
547 #define VARITHSAT_CASE(type, op, cvt, element) \
549 type result = (type)a->element[i] op (type)b->element[i]; \
550 r->element[i] = cvt(result, &sat); \
553 #define VARITHSAT_DO(name, op, optype, cvt, element) \
554 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b) \
557 int sat = 0; \
558 int i; \
560 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
561 switch (sizeof(r->element[0])) { \
562 case 1: \
563 VARITHSAT_CASE(optype, op, cvt, element); \
564 break; \
565 case 2: \
566 VARITHSAT_CASE(optype, op, cvt, element); \
567 break; \
568 case 4: \
569 VARITHSAT_CASE(optype, op, cvt, element); \
570 break; \
573 if (sat) { \
574 env->vscr |= (1 << VSCR_SAT); \
577 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
578 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
579 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
580 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
581 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
582 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
583 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
584 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
585 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
586 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
587 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
588 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
589 #undef VARITHSAT_CASE
590 #undef VARITHSAT_DO
591 #undef VARITHSAT_SIGNED
592 #undef VARITHSAT_UNSIGNED
594 #define VAVG_DO(name, element, etype) \
595 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
597 int i; \
599 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
600 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
601 r->element[i] = x >> 1; \
605 #define VAVG(type, signed_element, signed_type, unsigned_element, \
606 unsigned_type) \
607 VAVG_DO(avgs##type, signed_element, signed_type) \
608 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
609 VAVG(b, s8, int16_t, u8, uint16_t)
610 VAVG(h, s16, int32_t, u16, uint32_t)
611 VAVG(w, s32, int64_t, u32, uint64_t)
612 #undef VAVG_DO
613 #undef VAVG
615 #define VCF(suffix, cvt, element) \
616 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
617 ppc_avr_t *b, uint32_t uim) \
619 int i; \
621 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
622 float32 t = cvt(b->element[i], &env->vec_status); \
623 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
626 VCF(ux, uint32_to_float32, u32)
627 VCF(sx, int32_to_float32, s32)
628 #undef VCF
630 #define VCMP_DO(suffix, compare, element, record) \
631 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
632 ppc_avr_t *a, ppc_avr_t *b) \
634 uint64_t ones = (uint64_t)-1; \
635 uint64_t all = ones; \
636 uint64_t none = 0; \
637 int i; \
639 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
640 uint64_t result = (a->element[i] compare b->element[i] ? \
641 ones : 0x0); \
642 switch (sizeof(a->element[0])) { \
643 case 8: \
644 r->u64[i] = result; \
645 break; \
646 case 4: \
647 r->u32[i] = result; \
648 break; \
649 case 2: \
650 r->u16[i] = result; \
651 break; \
652 case 1: \
653 r->u8[i] = result; \
654 break; \
656 all &= result; \
657 none |= result; \
659 if (record) { \
660 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
663 #define VCMP(suffix, compare, element) \
664 VCMP_DO(suffix, compare, element, 0) \
665 VCMP_DO(suffix##_dot, compare, element, 1)
666 VCMP(equb, ==, u8)
667 VCMP(equh, ==, u16)
668 VCMP(equw, ==, u32)
669 VCMP(equd, ==, u64)
670 VCMP(gtub, >, u8)
671 VCMP(gtuh, >, u16)
672 VCMP(gtuw, >, u32)
673 VCMP(gtud, >, u64)
674 VCMP(gtsb, >, s8)
675 VCMP(gtsh, >, s16)
676 VCMP(gtsw, >, s32)
677 VCMP(gtsd, >, s64)
678 #undef VCMP_DO
679 #undef VCMP
681 #define VCMPFP_DO(suffix, compare, order, record) \
682 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
683 ppc_avr_t *a, ppc_avr_t *b) \
685 uint32_t ones = (uint32_t)-1; \
686 uint32_t all = ones; \
687 uint32_t none = 0; \
688 int i; \
690 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
691 uint32_t result; \
692 int rel = float32_compare_quiet(a->f[i], b->f[i], \
693 &env->vec_status); \
694 if (rel == float_relation_unordered) { \
695 result = 0; \
696 } else if (rel compare order) { \
697 result = ones; \
698 } else { \
699 result = 0; \
701 r->u32[i] = result; \
702 all &= result; \
703 none |= result; \
705 if (record) { \
706 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
709 #define VCMPFP(suffix, compare, order) \
710 VCMPFP_DO(suffix, compare, order, 0) \
711 VCMPFP_DO(suffix##_dot, compare, order, 1)
712 VCMPFP(eqfp, ==, float_relation_equal)
713 VCMPFP(gefp, !=, float_relation_less)
714 VCMPFP(gtfp, ==, float_relation_greater)
715 #undef VCMPFP_DO
716 #undef VCMPFP
718 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
719 ppc_avr_t *a, ppc_avr_t *b, int record)
721 int i;
722 int all_in = 0;
724 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
725 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
726 if (le_rel == float_relation_unordered) {
727 r->u32[i] = 0xc0000000;
728 /* ALL_IN does not need to be updated here. */
729 } else {
730 float32 bneg = float32_chs(b->f[i]);
731 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
732 int le = le_rel != float_relation_greater;
733 int ge = ge_rel != float_relation_less;
735 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
736 all_in |= (!le | !ge);
739 if (record) {
740 env->crf[6] = (all_in == 0) << 1;
744 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
746 vcmpbfp_internal(env, r, a, b, 0);
749 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
750 ppc_avr_t *b)
752 vcmpbfp_internal(env, r, a, b, 1);
755 #define VCT(suffix, satcvt, element) \
756 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
757 ppc_avr_t *b, uint32_t uim) \
759 int i; \
760 int sat = 0; \
761 float_status s = env->vec_status; \
763 set_float_rounding_mode(float_round_to_zero, &s); \
764 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
765 if (float32_is_any_nan(b->f[i])) { \
766 r->element[i] = 0; \
767 } else { \
768 float64 t = float32_to_float64(b->f[i], &s); \
769 int64_t j; \
771 t = float64_scalbn(t, uim, &s); \
772 j = float64_to_int64(t, &s); \
773 r->element[i] = satcvt(j, &sat); \
776 if (sat) { \
777 env->vscr |= (1 << VSCR_SAT); \
780 VCT(uxs, cvtsduw, u32)
781 VCT(sxs, cvtsdsw, s32)
782 #undef VCT
784 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
785 ppc_avr_t *b, ppc_avr_t *c)
787 int sat = 0;
788 int i;
790 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
791 int32_t prod = a->s16[i] * b->s16[i];
792 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
794 r->s16[i] = cvtswsh(t, &sat);
797 if (sat) {
798 env->vscr |= (1 << VSCR_SAT);
802 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
803 ppc_avr_t *b, ppc_avr_t *c)
805 int sat = 0;
806 int i;
808 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
809 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
810 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
811 r->s16[i] = cvtswsh(t, &sat);
814 if (sat) {
815 env->vscr |= (1 << VSCR_SAT);
819 #define VMINMAX_DO(name, compare, element) \
820 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
822 int i; \
824 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
825 if (a->element[i] compare b->element[i]) { \
826 r->element[i] = b->element[i]; \
827 } else { \
828 r->element[i] = a->element[i]; \
832 #define VMINMAX(suffix, element) \
833 VMINMAX_DO(min##suffix, >, element) \
834 VMINMAX_DO(max##suffix, <, element)
835 VMINMAX(sb, s8)
836 VMINMAX(sh, s16)
837 VMINMAX(sw, s32)
838 VMINMAX(sd, s64)
839 VMINMAX(ub, u8)
840 VMINMAX(uh, u16)
841 VMINMAX(uw, u32)
842 VMINMAX(ud, u64)
843 #undef VMINMAX_DO
844 #undef VMINMAX
846 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
848 int i;
850 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
851 int32_t prod = a->s16[i] * b->s16[i];
852 r->s16[i] = (int16_t) (prod + c->s16[i]);
856 #define VMRG_DO(name, element, highp) \
857 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
859 ppc_avr_t result; \
860 int i; \
861 size_t n_elems = ARRAY_SIZE(r->element); \
863 for (i = 0; i < n_elems / 2; i++) { \
864 if (highp) { \
865 result.element[i*2+HI_IDX] = a->element[i]; \
866 result.element[i*2+LO_IDX] = b->element[i]; \
867 } else { \
868 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
869 b->element[n_elems - i - 1]; \
870 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
871 a->element[n_elems - i - 1]; \
874 *r = result; \
876 #if defined(HOST_WORDS_BIGENDIAN)
877 #define MRGHI 0
878 #define MRGLO 1
879 #else
880 #define MRGHI 1
881 #define MRGLO 0
882 #endif
883 #define VMRG(suffix, element) \
884 VMRG_DO(mrgl##suffix, element, MRGHI) \
885 VMRG_DO(mrgh##suffix, element, MRGLO)
886 VMRG(b, u8)
887 VMRG(h, u16)
888 VMRG(w, u32)
889 #undef VMRG_DO
890 #undef VMRG
891 #undef MRGHI
892 #undef MRGLO
894 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
897 int32_t prod[16];
898 int i;
900 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
901 prod[i] = (int32_t)a->s8[i] * b->u8[i];
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
906 prod[4 * i + 2] + prod[4 * i + 3];
910 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
911 ppc_avr_t *b, ppc_avr_t *c)
913 int32_t prod[8];
914 int i;
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = a->s16[i] * b->s16[i];
920 VECTOR_FOR_INORDER_I(i, s32) {
921 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
925 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
926 ppc_avr_t *b, ppc_avr_t *c)
928 int32_t prod[8];
929 int i;
930 int sat = 0;
932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
933 prod[i] = (int32_t)a->s16[i] * b->s16[i];
936 VECTOR_FOR_INORDER_I(i, s32) {
937 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
939 r->u32[i] = cvtsdsw(t, &sat);
942 if (sat) {
943 env->vscr |= (1 << VSCR_SAT);
947 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
950 uint16_t prod[16];
951 int i;
953 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
954 prod[i] = a->u8[i] * b->u8[i];
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
959 prod[4 * i + 2] + prod[4 * i + 3];
963 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
964 ppc_avr_t *b, ppc_avr_t *c)
966 uint32_t prod[8];
967 int i;
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
973 VECTOR_FOR_INORDER_I(i, u32) {
974 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
978 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
979 ppc_avr_t *b, ppc_avr_t *c)
981 uint32_t prod[8];
982 int i;
983 int sat = 0;
985 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
986 prod[i] = a->u16[i] * b->u16[i];
989 VECTOR_FOR_INORDER_I(i, s32) {
990 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
992 r->u32[i] = cvtuduw(t, &sat);
995 if (sat) {
996 env->vscr |= (1 << VSCR_SAT);
1000 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1001 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1003 int i; \
1005 VECTOR_FOR_INORDER_I(i, prod_element) { \
1006 if (evenp) { \
1007 r->prod_element[i] = \
1008 (cast)a->mul_element[i * 2 + HI_IDX] * \
1009 (cast)b->mul_element[i * 2 + HI_IDX]; \
1010 } else { \
1011 r->prod_element[i] = \
1012 (cast)a->mul_element[i * 2 + LO_IDX] * \
1013 (cast)b->mul_element[i * 2 + LO_IDX]; \
1017 #define VMUL(suffix, mul_element, prod_element, cast) \
1018 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1019 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1020 VMUL(sb, s8, s16, int16_t)
1021 VMUL(sh, s16, s32, int32_t)
1022 VMUL(sw, s32, s64, int64_t)
1023 VMUL(ub, u8, u16, uint16_t)
1024 VMUL(uh, u16, u32, uint32_t)
1025 VMUL(uw, u32, u64, uint64_t)
1026 #undef VMUL_DO
1027 #undef VMUL
1029 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1030 ppc_avr_t *c)
1032 ppc_avr_t result;
1033 int i;
1035 VECTOR_FOR_INORDER_I(i, u8) {
1036 int s = c->u8[i] & 0x1f;
1037 #if defined(HOST_WORDS_BIGENDIAN)
1038 int index = s & 0xf;
1039 #else
1040 int index = 15 - (s & 0xf);
1041 #endif
1043 if (s & 0x10) {
1044 result.u8[i] = b->u8[index];
1045 } else {
1046 result.u8[i] = a->u8[index];
1049 *r = result;
1052 #if defined(HOST_WORDS_BIGENDIAN)
1053 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1054 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1055 #else
1056 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1057 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1058 #endif
1060 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1062 int i;
1063 uint64_t perm = 0;
1065 VECTOR_FOR_INORDER_I(i, u8) {
1066 int index = VBPERMQ_INDEX(b, i);
1068 if (index < 128) {
1069 uint64_t mask = (1ull << (63-(index & 0x3F)));
1070 if (a->u64[VBPERMQ_DW(index)] & mask) {
1071 perm |= (0x8000 >> i);
1076 r->u64[HI_IDX] = perm;
1077 r->u64[LO_IDX] = 0;
1080 #undef VBPERMQ_INDEX
1081 #undef VBPERMQ_DW
1083 static const uint64_t VGBBD_MASKS[256] = {
1084 0x0000000000000000ull, /* 00 */
1085 0x0000000000000080ull, /* 01 */
1086 0x0000000000008000ull, /* 02 */
1087 0x0000000000008080ull, /* 03 */
1088 0x0000000000800000ull, /* 04 */
1089 0x0000000000800080ull, /* 05 */
1090 0x0000000000808000ull, /* 06 */
1091 0x0000000000808080ull, /* 07 */
1092 0x0000000080000000ull, /* 08 */
1093 0x0000000080000080ull, /* 09 */
1094 0x0000000080008000ull, /* 0A */
1095 0x0000000080008080ull, /* 0B */
1096 0x0000000080800000ull, /* 0C */
1097 0x0000000080800080ull, /* 0D */
1098 0x0000000080808000ull, /* 0E */
1099 0x0000000080808080ull, /* 0F */
1100 0x0000008000000000ull, /* 10 */
1101 0x0000008000000080ull, /* 11 */
1102 0x0000008000008000ull, /* 12 */
1103 0x0000008000008080ull, /* 13 */
1104 0x0000008000800000ull, /* 14 */
1105 0x0000008000800080ull, /* 15 */
1106 0x0000008000808000ull, /* 16 */
1107 0x0000008000808080ull, /* 17 */
1108 0x0000008080000000ull, /* 18 */
1109 0x0000008080000080ull, /* 19 */
1110 0x0000008080008000ull, /* 1A */
1111 0x0000008080008080ull, /* 1B */
1112 0x0000008080800000ull, /* 1C */
1113 0x0000008080800080ull, /* 1D */
1114 0x0000008080808000ull, /* 1E */
1115 0x0000008080808080ull, /* 1F */
1116 0x0000800000000000ull, /* 20 */
1117 0x0000800000000080ull, /* 21 */
1118 0x0000800000008000ull, /* 22 */
1119 0x0000800000008080ull, /* 23 */
1120 0x0000800000800000ull, /* 24 */
1121 0x0000800000800080ull, /* 25 */
1122 0x0000800000808000ull, /* 26 */
1123 0x0000800000808080ull, /* 27 */
1124 0x0000800080000000ull, /* 28 */
1125 0x0000800080000080ull, /* 29 */
1126 0x0000800080008000ull, /* 2A */
1127 0x0000800080008080ull, /* 2B */
1128 0x0000800080800000ull, /* 2C */
1129 0x0000800080800080ull, /* 2D */
1130 0x0000800080808000ull, /* 2E */
1131 0x0000800080808080ull, /* 2F */
1132 0x0000808000000000ull, /* 30 */
1133 0x0000808000000080ull, /* 31 */
1134 0x0000808000008000ull, /* 32 */
1135 0x0000808000008080ull, /* 33 */
1136 0x0000808000800000ull, /* 34 */
1137 0x0000808000800080ull, /* 35 */
1138 0x0000808000808000ull, /* 36 */
1139 0x0000808000808080ull, /* 37 */
1140 0x0000808080000000ull, /* 38 */
1141 0x0000808080000080ull, /* 39 */
1142 0x0000808080008000ull, /* 3A */
1143 0x0000808080008080ull, /* 3B */
1144 0x0000808080800000ull, /* 3C */
1145 0x0000808080800080ull, /* 3D */
1146 0x0000808080808000ull, /* 3E */
1147 0x0000808080808080ull, /* 3F */
1148 0x0080000000000000ull, /* 40 */
1149 0x0080000000000080ull, /* 41 */
1150 0x0080000000008000ull, /* 42 */
1151 0x0080000000008080ull, /* 43 */
1152 0x0080000000800000ull, /* 44 */
1153 0x0080000000800080ull, /* 45 */
1154 0x0080000000808000ull, /* 46 */
1155 0x0080000000808080ull, /* 47 */
1156 0x0080000080000000ull, /* 48 */
1157 0x0080000080000080ull, /* 49 */
1158 0x0080000080008000ull, /* 4A */
1159 0x0080000080008080ull, /* 4B */
1160 0x0080000080800000ull, /* 4C */
1161 0x0080000080800080ull, /* 4D */
1162 0x0080000080808000ull, /* 4E */
1163 0x0080000080808080ull, /* 4F */
1164 0x0080008000000000ull, /* 50 */
1165 0x0080008000000080ull, /* 51 */
1166 0x0080008000008000ull, /* 52 */
1167 0x0080008000008080ull, /* 53 */
1168 0x0080008000800000ull, /* 54 */
1169 0x0080008000800080ull, /* 55 */
1170 0x0080008000808000ull, /* 56 */
1171 0x0080008000808080ull, /* 57 */
1172 0x0080008080000000ull, /* 58 */
1173 0x0080008080000080ull, /* 59 */
1174 0x0080008080008000ull, /* 5A */
1175 0x0080008080008080ull, /* 5B */
1176 0x0080008080800000ull, /* 5C */
1177 0x0080008080800080ull, /* 5D */
1178 0x0080008080808000ull, /* 5E */
1179 0x0080008080808080ull, /* 5F */
1180 0x0080800000000000ull, /* 60 */
1181 0x0080800000000080ull, /* 61 */
1182 0x0080800000008000ull, /* 62 */
1183 0x0080800000008080ull, /* 63 */
1184 0x0080800000800000ull, /* 64 */
1185 0x0080800000800080ull, /* 65 */
1186 0x0080800000808000ull, /* 66 */
1187 0x0080800000808080ull, /* 67 */
1188 0x0080800080000000ull, /* 68 */
1189 0x0080800080000080ull, /* 69 */
1190 0x0080800080008000ull, /* 6A */
1191 0x0080800080008080ull, /* 6B */
1192 0x0080800080800000ull, /* 6C */
1193 0x0080800080800080ull, /* 6D */
1194 0x0080800080808000ull, /* 6E */
1195 0x0080800080808080ull, /* 6F */
1196 0x0080808000000000ull, /* 70 */
1197 0x0080808000000080ull, /* 71 */
1198 0x0080808000008000ull, /* 72 */
1199 0x0080808000008080ull, /* 73 */
1200 0x0080808000800000ull, /* 74 */
1201 0x0080808000800080ull, /* 75 */
1202 0x0080808000808000ull, /* 76 */
1203 0x0080808000808080ull, /* 77 */
1204 0x0080808080000000ull, /* 78 */
1205 0x0080808080000080ull, /* 79 */
1206 0x0080808080008000ull, /* 7A */
1207 0x0080808080008080ull, /* 7B */
1208 0x0080808080800000ull, /* 7C */
1209 0x0080808080800080ull, /* 7D */
1210 0x0080808080808000ull, /* 7E */
1211 0x0080808080808080ull, /* 7F */
1212 0x8000000000000000ull, /* 80 */
1213 0x8000000000000080ull, /* 81 */
1214 0x8000000000008000ull, /* 82 */
1215 0x8000000000008080ull, /* 83 */
1216 0x8000000000800000ull, /* 84 */
1217 0x8000000000800080ull, /* 85 */
1218 0x8000000000808000ull, /* 86 */
1219 0x8000000000808080ull, /* 87 */
1220 0x8000000080000000ull, /* 88 */
1221 0x8000000080000080ull, /* 89 */
1222 0x8000000080008000ull, /* 8A */
1223 0x8000000080008080ull, /* 8B */
1224 0x8000000080800000ull, /* 8C */
1225 0x8000000080800080ull, /* 8D */
1226 0x8000000080808000ull, /* 8E */
1227 0x8000000080808080ull, /* 8F */
1228 0x8000008000000000ull, /* 90 */
1229 0x8000008000000080ull, /* 91 */
1230 0x8000008000008000ull, /* 92 */
1231 0x8000008000008080ull, /* 93 */
1232 0x8000008000800000ull, /* 94 */
1233 0x8000008000800080ull, /* 95 */
1234 0x8000008000808000ull, /* 96 */
1235 0x8000008000808080ull, /* 97 */
1236 0x8000008080000000ull, /* 98 */
1237 0x8000008080000080ull, /* 99 */
1238 0x8000008080008000ull, /* 9A */
1239 0x8000008080008080ull, /* 9B */
1240 0x8000008080800000ull, /* 9C */
1241 0x8000008080800080ull, /* 9D */
1242 0x8000008080808000ull, /* 9E */
1243 0x8000008080808080ull, /* 9F */
1244 0x8000800000000000ull, /* A0 */
1245 0x8000800000000080ull, /* A1 */
1246 0x8000800000008000ull, /* A2 */
1247 0x8000800000008080ull, /* A3 */
1248 0x8000800000800000ull, /* A4 */
1249 0x8000800000800080ull, /* A5 */
1250 0x8000800000808000ull, /* A6 */
1251 0x8000800000808080ull, /* A7 */
1252 0x8000800080000000ull, /* A8 */
1253 0x8000800080000080ull, /* A9 */
1254 0x8000800080008000ull, /* AA */
1255 0x8000800080008080ull, /* AB */
1256 0x8000800080800000ull, /* AC */
1257 0x8000800080800080ull, /* AD */
1258 0x8000800080808000ull, /* AE */
1259 0x8000800080808080ull, /* AF */
1260 0x8000808000000000ull, /* B0 */
1261 0x8000808000000080ull, /* B1 */
1262 0x8000808000008000ull, /* B2 */
1263 0x8000808000008080ull, /* B3 */
1264 0x8000808000800000ull, /* B4 */
1265 0x8000808000800080ull, /* B5 */
1266 0x8000808000808000ull, /* B6 */
1267 0x8000808000808080ull, /* B7 */
1268 0x8000808080000000ull, /* B8 */
1269 0x8000808080000080ull, /* B9 */
1270 0x8000808080008000ull, /* BA */
1271 0x8000808080008080ull, /* BB */
1272 0x8000808080800000ull, /* BC */
1273 0x8000808080800080ull, /* BD */
1274 0x8000808080808000ull, /* BE */
1275 0x8000808080808080ull, /* BF */
1276 0x8080000000000000ull, /* C0 */
1277 0x8080000000000080ull, /* C1 */
1278 0x8080000000008000ull, /* C2 */
1279 0x8080000000008080ull, /* C3 */
1280 0x8080000000800000ull, /* C4 */
1281 0x8080000000800080ull, /* C5 */
1282 0x8080000000808000ull, /* C6 */
1283 0x8080000000808080ull, /* C7 */
1284 0x8080000080000000ull, /* C8 */
1285 0x8080000080000080ull, /* C9 */
1286 0x8080000080008000ull, /* CA */
1287 0x8080000080008080ull, /* CB */
1288 0x8080000080800000ull, /* CC */
1289 0x8080000080800080ull, /* CD */
1290 0x8080000080808000ull, /* CE */
1291 0x8080000080808080ull, /* CF */
1292 0x8080008000000000ull, /* D0 */
1293 0x8080008000000080ull, /* D1 */
1294 0x8080008000008000ull, /* D2 */
1295 0x8080008000008080ull, /* D3 */
1296 0x8080008000800000ull, /* D4 */
1297 0x8080008000800080ull, /* D5 */
1298 0x8080008000808000ull, /* D6 */
1299 0x8080008000808080ull, /* D7 */
1300 0x8080008080000000ull, /* D8 */
1301 0x8080008080000080ull, /* D9 */
1302 0x8080008080008000ull, /* DA */
1303 0x8080008080008080ull, /* DB */
1304 0x8080008080800000ull, /* DC */
1305 0x8080008080800080ull, /* DD */
1306 0x8080008080808000ull, /* DE */
1307 0x8080008080808080ull, /* DF */
1308 0x8080800000000000ull, /* E0 */
1309 0x8080800000000080ull, /* E1 */
1310 0x8080800000008000ull, /* E2 */
1311 0x8080800000008080ull, /* E3 */
1312 0x8080800000800000ull, /* E4 */
1313 0x8080800000800080ull, /* E5 */
1314 0x8080800000808000ull, /* E6 */
1315 0x8080800000808080ull, /* E7 */
1316 0x8080800080000000ull, /* E8 */
1317 0x8080800080000080ull, /* E9 */
1318 0x8080800080008000ull, /* EA */
1319 0x8080800080008080ull, /* EB */
1320 0x8080800080800000ull, /* EC */
1321 0x8080800080800080ull, /* ED */
1322 0x8080800080808000ull, /* EE */
1323 0x8080800080808080ull, /* EF */
1324 0x8080808000000000ull, /* F0 */
1325 0x8080808000000080ull, /* F1 */
1326 0x8080808000008000ull, /* F2 */
1327 0x8080808000008080ull, /* F3 */
1328 0x8080808000800000ull, /* F4 */
1329 0x8080808000800080ull, /* F5 */
1330 0x8080808000808000ull, /* F6 */
1331 0x8080808000808080ull, /* F7 */
1332 0x8080808080000000ull, /* F8 */
1333 0x8080808080000080ull, /* F9 */
1334 0x8080808080008000ull, /* FA */
1335 0x8080808080008080ull, /* FB */
1336 0x8080808080800000ull, /* FC */
1337 0x8080808080800080ull, /* FD */
1338 0x8080808080808000ull, /* FE */
1339 0x8080808080808080ull, /* FF */
1342 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1344 int i;
1345 uint64_t t[2] = { 0, 0 };
1347 VECTOR_FOR_INORDER_I(i, u8) {
1348 #if defined(HOST_WORDS_BIGENDIAN)
1349 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1350 #else
1351 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1352 #endif
1355 r->u64[0] = t[0];
1356 r->u64[1] = t[1];
1359 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1360 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1362 int i, j; \
1363 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1365 VECTOR_FOR_INORDER_I(i, srcfld) { \
1366 prod[i] = 0; \
1367 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1368 if (a->srcfld[i] & (1ull<<j)) { \
1369 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1374 VECTOR_FOR_INORDER_I(i, trgfld) { \
1375 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1379 PMSUM(vpmsumb, u8, u16, uint16_t)
1380 PMSUM(vpmsumh, u16, u32, uint32_t)
1381 PMSUM(vpmsumw, u32, u64, uint64_t)
1383 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1386 #ifdef CONFIG_INT128
1387 int i, j;
1388 __uint128_t prod[2];
1390 VECTOR_FOR_INORDER_I(i, u64) {
1391 prod[i] = 0;
1392 for (j = 0; j < 64; j++) {
1393 if (a->u64[i] & (1ull<<j)) {
1394 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1399 r->u128 = prod[0] ^ prod[1];
1401 #else
1402 int i, j;
1403 ppc_avr_t prod[2];
1405 VECTOR_FOR_INORDER_I(i, u64) {
1406 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1407 for (j = 0; j < 64; j++) {
1408 if (a->u64[i] & (1ull<<j)) {
1409 ppc_avr_t bshift;
1410 if (j == 0) {
1411 bshift.u64[HI_IDX] = 0;
1412 bshift.u64[LO_IDX] = b->u64[i];
1413 } else {
1414 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1415 bshift.u64[LO_IDX] = b->u64[i] << j;
1417 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1418 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1423 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1424 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1425 #endif
1429 #if defined(HOST_WORDS_BIGENDIAN)
1430 #define PKBIG 1
1431 #else
1432 #define PKBIG 0
1433 #endif
1434 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1436 int i, j;
1437 ppc_avr_t result;
1438 #if defined(HOST_WORDS_BIGENDIAN)
1439 const ppc_avr_t *x[2] = { a, b };
1440 #else
1441 const ppc_avr_t *x[2] = { b, a };
1442 #endif
1444 VECTOR_FOR_INORDER_I(i, u64) {
1445 VECTOR_FOR_INORDER_I(j, u32) {
1446 uint32_t e = x[i]->u32[j];
1448 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1449 ((e >> 6) & 0x3e0) |
1450 ((e >> 3) & 0x1f));
1453 *r = result;
1456 #define VPK(suffix, from, to, cvt, dosat) \
1457 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1458 ppc_avr_t *a, ppc_avr_t *b) \
1460 int i; \
1461 int sat = 0; \
1462 ppc_avr_t result; \
1463 ppc_avr_t *a0 = PKBIG ? a : b; \
1464 ppc_avr_t *a1 = PKBIG ? b : a; \
1466 VECTOR_FOR_INORDER_I(i, from) { \
1467 result.to[i] = cvt(a0->from[i], &sat); \
1468 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1470 *r = result; \
1471 if (dosat && sat) { \
1472 env->vscr |= (1 << VSCR_SAT); \
1475 #define I(x, y) (x)
1476 VPK(shss, s16, s8, cvtshsb, 1)
1477 VPK(shus, s16, u8, cvtshub, 1)
1478 VPK(swss, s32, s16, cvtswsh, 1)
1479 VPK(swus, s32, u16, cvtswuh, 1)
1480 VPK(sdss, s64, s32, cvtsdsw, 1)
1481 VPK(sdus, s64, u32, cvtsduw, 1)
1482 VPK(uhus, u16, u8, cvtuhub, 1)
1483 VPK(uwus, u32, u16, cvtuwuh, 1)
1484 VPK(udus, u64, u32, cvtuduw, 1)
1485 VPK(uhum, u16, u8, I, 0)
1486 VPK(uwum, u32, u16, I, 0)
1487 VPK(udum, u64, u32, I, 0)
1488 #undef I
1489 #undef VPK
1490 #undef PKBIG
1492 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1494 int i;
1496 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1497 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1501 #define VRFI(suffix, rounding) \
1502 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1503 ppc_avr_t *b) \
1505 int i; \
1506 float_status s = env->vec_status; \
1508 set_float_rounding_mode(rounding, &s); \
1509 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1510 r->f[i] = float32_round_to_int (b->f[i], &s); \
1513 VRFI(n, float_round_nearest_even)
1514 VRFI(m, float_round_down)
1515 VRFI(p, float_round_up)
1516 VRFI(z, float_round_to_zero)
1517 #undef VRFI
1519 #define VROTATE(suffix, element, mask) \
1520 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1522 int i; \
1524 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1525 unsigned int shift = b->element[i] & mask; \
1526 r->element[i] = (a->element[i] << shift) | \
1527 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1530 VROTATE(b, u8, 0x7)
1531 VROTATE(h, u16, 0xF)
1532 VROTATE(w, u32, 0x1F)
1533 VROTATE(d, u64, 0x3F)
1534 #undef VROTATE
1536 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1538 int i;
1540 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1541 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1543 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1547 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1548 ppc_avr_t *c)
1550 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1551 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1554 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1556 int i;
1558 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1559 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1563 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1565 int i;
1567 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1568 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1572 #if defined(HOST_WORDS_BIGENDIAN)
1573 #define LEFT 0
1574 #define RIGHT 1
1575 #else
1576 #define LEFT 1
1577 #define RIGHT 0
1578 #endif
1579 /* The specification says that the results are undefined if all of the
1580 * shift counts are not identical. We check to make sure that they are
1581 * to conform to what real hardware appears to do. */
1582 #define VSHIFT(suffix, leftp) \
1583 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1585 int shift = b->u8[LO_IDX*15] & 0x7; \
1586 int doit = 1; \
1587 int i; \
1589 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1590 doit = doit && ((b->u8[i] & 0x7) == shift); \
1592 if (doit) { \
1593 if (shift == 0) { \
1594 *r = *a; \
1595 } else if (leftp) { \
1596 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1598 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1599 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1600 } else { \
1601 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1603 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1604 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1608 VSHIFT(l, LEFT)
1609 VSHIFT(r, RIGHT)
1610 #undef VSHIFT
1611 #undef LEFT
1612 #undef RIGHT
1614 #define VSL(suffix, element, mask) \
1615 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1617 int i; \
1619 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1620 unsigned int shift = b->element[i] & mask; \
1622 r->element[i] = a->element[i] << shift; \
1625 VSL(b, u8, 0x7)
1626 VSL(h, u16, 0x0F)
1627 VSL(w, u32, 0x1F)
1628 VSL(d, u64, 0x3F)
1629 #undef VSL
1631 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1633 int sh = shift & 0xf;
1634 int i;
1635 ppc_avr_t result;
1637 #if defined(HOST_WORDS_BIGENDIAN)
1638 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1639 int index = sh + i;
1640 if (index > 0xf) {
1641 result.u8[i] = b->u8[index - 0x10];
1642 } else {
1643 result.u8[i] = a->u8[index];
1646 #else
1647 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1648 int index = (16 - sh) + i;
1649 if (index > 0xf) {
1650 result.u8[i] = a->u8[index - 0x10];
1651 } else {
1652 result.u8[i] = b->u8[index];
1655 #endif
1656 *r = result;
1659 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1661 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1663 #if defined(HOST_WORDS_BIGENDIAN)
1664 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1665 memset(&r->u8[16-sh], 0, sh);
1666 #else
1667 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1668 memset(&r->u8[0], 0, sh);
1669 #endif
1672 /* Experimental testing shows that hardware masks the immediate. */
1673 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1674 #if defined(HOST_WORDS_BIGENDIAN)
1675 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1676 #else
1677 #define SPLAT_ELEMENT(element) \
1678 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1679 #endif
1680 #define VSPLT(suffix, element) \
1681 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1683 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1684 int i; \
1686 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1687 r->element[i] = s; \
1690 VSPLT(b, u8)
1691 VSPLT(h, u16)
1692 VSPLT(w, u32)
1693 #undef VSPLT
1694 #undef SPLAT_ELEMENT
1695 #undef _SPLAT_MASKED
1697 #define VSPLTI(suffix, element, splat_type) \
1698 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1700 splat_type x = (int8_t)(splat << 3) >> 3; \
1701 int i; \
1703 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1704 r->element[i] = x; \
1707 VSPLTI(b, s8, int8_t)
1708 VSPLTI(h, s16, int16_t)
1709 VSPLTI(w, s32, int32_t)
1710 #undef VSPLTI
1712 #define VSR(suffix, element, mask) \
1713 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1715 int i; \
1717 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1718 unsigned int shift = b->element[i] & mask; \
1719 r->element[i] = a->element[i] >> shift; \
1722 VSR(ab, s8, 0x7)
1723 VSR(ah, s16, 0xF)
1724 VSR(aw, s32, 0x1F)
1725 VSR(ad, s64, 0x3F)
1726 VSR(b, u8, 0x7)
1727 VSR(h, u16, 0xF)
1728 VSR(w, u32, 0x1F)
1729 VSR(d, u64, 0x3F)
1730 #undef VSR
1732 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1734 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1736 #if defined(HOST_WORDS_BIGENDIAN)
1737 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1738 memset(&r->u8[0], 0, sh);
1739 #else
1740 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1741 memset(&r->u8[16 - sh], 0, sh);
1742 #endif
1745 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1747 int i;
1749 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1750 r->u32[i] = a->u32[i] >= b->u32[i];
1754 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1756 int64_t t;
1757 int i, upper;
1758 ppc_avr_t result;
1759 int sat = 0;
1761 #if defined(HOST_WORDS_BIGENDIAN)
1762 upper = ARRAY_SIZE(r->s32)-1;
1763 #else
1764 upper = 0;
1765 #endif
1766 t = (int64_t)b->s32[upper];
1767 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1768 t += a->s32[i];
1769 result.s32[i] = 0;
1771 result.s32[upper] = cvtsdsw(t, &sat);
1772 *r = result;
1774 if (sat) {
1775 env->vscr |= (1 << VSCR_SAT);
1779 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1781 int i, j, upper;
1782 ppc_avr_t result;
1783 int sat = 0;
1785 #if defined(HOST_WORDS_BIGENDIAN)
1786 upper = 1;
1787 #else
1788 upper = 0;
1789 #endif
1790 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1791 int64_t t = (int64_t)b->s32[upper + i * 2];
1793 result.u64[i] = 0;
1794 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1795 t += a->s32[2 * i + j];
1797 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1800 *r = result;
1801 if (sat) {
1802 env->vscr |= (1 << VSCR_SAT);
1806 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1808 int i, j;
1809 int sat = 0;
1811 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1812 int64_t t = (int64_t)b->s32[i];
1814 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1815 t += a->s8[4 * i + j];
1817 r->s32[i] = cvtsdsw(t, &sat);
1820 if (sat) {
1821 env->vscr |= (1 << VSCR_SAT);
1825 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1827 int sat = 0;
1828 int i;
1830 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1831 int64_t t = (int64_t)b->s32[i];
1833 t += a->s16[2 * i] + a->s16[2 * i + 1];
1834 r->s32[i] = cvtsdsw(t, &sat);
1837 if (sat) {
1838 env->vscr |= (1 << VSCR_SAT);
1842 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1844 int i, j;
1845 int sat = 0;
1847 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1848 uint64_t t = (uint64_t)b->u32[i];
1850 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1851 t += a->u8[4 * i + j];
1853 r->u32[i] = cvtuduw(t, &sat);
1856 if (sat) {
1857 env->vscr |= (1 << VSCR_SAT);
1861 #if defined(HOST_WORDS_BIGENDIAN)
1862 #define UPKHI 1
1863 #define UPKLO 0
1864 #else
1865 #define UPKHI 0
1866 #define UPKLO 1
1867 #endif
1868 #define VUPKPX(suffix, hi) \
1869 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1871 int i; \
1872 ppc_avr_t result; \
1874 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1875 uint16_t e = b->u16[hi ? i : i+4]; \
1876 uint8_t a = (e >> 15) ? 0xff : 0; \
1877 uint8_t r = (e >> 10) & 0x1f; \
1878 uint8_t g = (e >> 5) & 0x1f; \
1879 uint8_t b = e & 0x1f; \
1881 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1883 *r = result; \
1885 VUPKPX(lpx, UPKLO)
1886 VUPKPX(hpx, UPKHI)
1887 #undef VUPKPX
1889 #define VUPK(suffix, unpacked, packee, hi) \
1890 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1892 int i; \
1893 ppc_avr_t result; \
1895 if (hi) { \
1896 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1897 result.unpacked[i] = b->packee[i]; \
1899 } else { \
1900 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1901 i++) { \
1902 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1905 *r = result; \
1907 VUPK(hsb, s16, s8, UPKHI)
1908 VUPK(hsh, s32, s16, UPKHI)
1909 VUPK(hsw, s64, s32, UPKHI)
1910 VUPK(lsb, s16, s8, UPKLO)
1911 VUPK(lsh, s32, s16, UPKLO)
1912 VUPK(lsw, s64, s32, UPKLO)
1913 #undef VUPK
1914 #undef UPKHI
1915 #undef UPKLO
1917 #define VGENERIC_DO(name, element) \
1918 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1920 int i; \
1922 VECTOR_FOR_INORDER_I(i, element) { \
1923 r->element[i] = name(b->element[i]); \
1927 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1928 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1929 #define clzw(v) clz32((v))
1930 #define clzd(v) clz64((v))
1932 VGENERIC_DO(clzb, u8)
1933 VGENERIC_DO(clzh, u16)
1934 VGENERIC_DO(clzw, u32)
1935 VGENERIC_DO(clzd, u64)
1937 #undef clzb
1938 #undef clzh
1939 #undef clzw
1940 #undef clzd
1942 #define popcntb(v) ctpop8(v)
1943 #define popcnth(v) ctpop16(v)
1944 #define popcntw(v) ctpop32(v)
1945 #define popcntd(v) ctpop64(v)
1947 VGENERIC_DO(popcntb, u8)
1948 VGENERIC_DO(popcnth, u16)
1949 VGENERIC_DO(popcntw, u32)
1950 VGENERIC_DO(popcntd, u64)
1952 #undef popcntb
1953 #undef popcnth
1954 #undef popcntw
1955 #undef popcntd
1957 #undef VGENERIC_DO
1959 #if defined(HOST_WORDS_BIGENDIAN)
1960 #define QW_ONE { .u64 = { 0, 1 } }
1961 #else
1962 #define QW_ONE { .u64 = { 1, 0 } }
1963 #endif
1965 #ifndef CONFIG_INT128
1967 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1969 t->u64[0] = ~a.u64[0];
1970 t->u64[1] = ~a.u64[1];
1973 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1975 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1976 return -1;
1977 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1978 return 1;
1979 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1980 return -1;
1981 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1982 return 1;
1983 } else {
1984 return 0;
1988 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1990 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1991 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1992 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1995 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1997 ppc_avr_t not_a;
1998 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1999 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2000 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2001 avr_qw_not(&not_a, a);
2002 return avr_qw_cmpu(not_a, b) < 0;
2005 #endif
2007 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2009 #ifdef CONFIG_INT128
2010 r->u128 = a->u128 + b->u128;
2011 #else
2012 avr_qw_add(r, *a, *b);
2013 #endif
2016 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2018 #ifdef CONFIG_INT128
2019 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2020 #else
2022 if (c->u64[LO_IDX] & 1) {
2023 ppc_avr_t tmp;
2025 tmp.u64[HI_IDX] = 0;
2026 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2027 avr_qw_add(&tmp, *a, tmp);
2028 avr_qw_add(r, tmp, *b);
2029 } else {
2030 avr_qw_add(r, *a, *b);
2032 #endif
2035 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2037 #ifdef CONFIG_INT128
2038 r->u128 = (~a->u128 < b->u128);
2039 #else
2040 ppc_avr_t not_a;
2042 avr_qw_not(&not_a, *a);
2044 r->u64[HI_IDX] = 0;
2045 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2046 #endif
2049 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2051 #ifdef CONFIG_INT128
2052 int carry_out = (~a->u128 < b->u128);
2053 if (!carry_out && (c->u128 & 1)) {
2054 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2055 ((a->u128 != 0) || (b->u128 != 0));
2057 r->u128 = carry_out;
2058 #else
2060 int carry_in = c->u64[LO_IDX] & 1;
2061 int carry_out = 0;
2062 ppc_avr_t tmp;
2064 carry_out = avr_qw_addc(&tmp, *a, *b);
2066 if (!carry_out && carry_in) {
2067 ppc_avr_t one = QW_ONE;
2068 carry_out = avr_qw_addc(&tmp, tmp, one);
2070 r->u64[HI_IDX] = 0;
2071 r->u64[LO_IDX] = carry_out;
2072 #endif
2075 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2077 #ifdef CONFIG_INT128
2078 r->u128 = a->u128 - b->u128;
2079 #else
2080 ppc_avr_t tmp;
2081 ppc_avr_t one = QW_ONE;
2083 avr_qw_not(&tmp, *b);
2084 avr_qw_add(&tmp, *a, tmp);
2085 avr_qw_add(r, tmp, one);
2086 #endif
2089 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2091 #ifdef CONFIG_INT128
2092 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2093 #else
2094 ppc_avr_t tmp, sum;
2096 avr_qw_not(&tmp, *b);
2097 avr_qw_add(&sum, *a, tmp);
2099 tmp.u64[HI_IDX] = 0;
2100 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2101 avr_qw_add(r, sum, tmp);
2102 #endif
2105 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2107 #ifdef CONFIG_INT128
2108 r->u128 = (~a->u128 < ~b->u128) ||
2109 (a->u128 + ~b->u128 == (__uint128_t)-1);
2110 #else
2111 int carry = (avr_qw_cmpu(*a, *b) > 0);
2112 if (!carry) {
2113 ppc_avr_t tmp;
2114 avr_qw_not(&tmp, *b);
2115 avr_qw_add(&tmp, *a, tmp);
2116 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2118 r->u64[HI_IDX] = 0;
2119 r->u64[LO_IDX] = carry;
2120 #endif
2123 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2125 #ifdef CONFIG_INT128
2126 r->u128 =
2127 (~a->u128 < ~b->u128) ||
2128 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2129 #else
2130 int carry_in = c->u64[LO_IDX] & 1;
2131 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2132 if (!carry_out && carry_in) {
2133 ppc_avr_t tmp;
2134 avr_qw_not(&tmp, *b);
2135 avr_qw_add(&tmp, *a, tmp);
2136 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2139 r->u64[HI_IDX] = 0;
2140 r->u64[LO_IDX] = carry_out;
2141 #endif
2144 #define BCD_PLUS_PREF_1 0xC
2145 #define BCD_PLUS_PREF_2 0xF
2146 #define BCD_PLUS_ALT_1 0xA
2147 #define BCD_NEG_PREF 0xD
2148 #define BCD_NEG_ALT 0xB
2149 #define BCD_PLUS_ALT_2 0xE
2151 #if defined(HOST_WORDS_BIGENDIAN)
2152 #define BCD_DIG_BYTE(n) (15 - (n/2))
2153 #else
2154 #define BCD_DIG_BYTE(n) (n/2)
2155 #endif
2157 static int bcd_get_sgn(ppc_avr_t *bcd)
2159 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2160 case BCD_PLUS_PREF_1:
2161 case BCD_PLUS_PREF_2:
2162 case BCD_PLUS_ALT_1:
2163 case BCD_PLUS_ALT_2:
2165 return 1;
2168 case BCD_NEG_PREF:
2169 case BCD_NEG_ALT:
2171 return -1;
2174 default:
2176 return 0;
2181 static int bcd_preferred_sgn(int sgn, int ps)
2183 if (sgn >= 0) {
2184 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2185 } else {
2186 return BCD_NEG_PREF;
2190 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2192 uint8_t result;
2193 if (n & 1) {
2194 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2195 } else {
2196 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2199 if (unlikely(result > 9)) {
2200 *invalid = true;
2202 return result;
2205 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2207 if (n & 1) {
2208 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2209 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2210 } else {
2211 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2212 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2216 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2218 int i;
2219 int invalid = 0;
2220 for (i = 31; i > 0; i--) {
2221 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2222 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2223 if (unlikely(invalid)) {
2224 return 0; /* doesn't matter */
2225 } else if (dig_a > dig_b) {
2226 return 1;
2227 } else if (dig_a < dig_b) {
2228 return -1;
2232 return 0;
2235 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2236 int *overflow)
2238 int carry = 0;
2239 int i;
2240 int is_zero = 1;
2241 for (i = 1; i <= 31; i++) {
2242 uint8_t digit = bcd_get_digit(a, i, invalid) +
2243 bcd_get_digit(b, i, invalid) + carry;
2244 is_zero &= (digit == 0);
2245 if (digit > 9) {
2246 carry = 1;
2247 digit -= 10;
2248 } else {
2249 carry = 0;
2252 bcd_put_digit(t, digit, i);
2254 if (unlikely(*invalid)) {
2255 return -1;
2259 *overflow = carry;
2260 return is_zero;
2263 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2264 int *overflow)
2266 int carry = 0;
2267 int i;
2268 int is_zero = 1;
2269 for (i = 1; i <= 31; i++) {
2270 uint8_t digit = bcd_get_digit(a, i, invalid) -
2271 bcd_get_digit(b, i, invalid) + carry;
2272 is_zero &= (digit == 0);
2273 if (digit & 0x80) {
2274 carry = -1;
2275 digit += 10;
2276 } else {
2277 carry = 0;
2280 bcd_put_digit(t, digit, i);
2282 if (unlikely(*invalid)) {
2283 return -1;
2287 *overflow = carry;
2288 return is_zero;
2291 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2294 int sgna = bcd_get_sgn(a);
2295 int sgnb = bcd_get_sgn(b);
2296 int invalid = (sgna == 0) || (sgnb == 0);
2297 int overflow = 0;
2298 int zero = 0;
2299 uint32_t cr = 0;
2300 ppc_avr_t result = { .u64 = { 0, 0 } };
2302 if (!invalid) {
2303 if (sgna == sgnb) {
2304 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2305 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2306 cr = (sgna > 0) ? 4 : 8;
2307 } else if (bcd_cmp_mag(a, b) > 0) {
2308 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2309 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2310 cr = (sgna > 0) ? 4 : 8;
2311 } else {
2312 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2313 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2314 cr = (sgnb > 0) ? 4 : 8;
2318 if (unlikely(invalid)) {
2319 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2320 cr = 1;
2321 } else if (overflow) {
2322 cr |= 1;
2323 } else if (zero) {
2324 cr = 2;
2327 *r = result;
2329 return cr;
2332 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2334 ppc_avr_t bcopy = *b;
2335 int sgnb = bcd_get_sgn(b);
2336 if (sgnb < 0) {
2337 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2338 } else if (sgnb > 0) {
2339 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2341 /* else invalid ... defer to bcdadd code for proper handling */
2343 return helper_bcdadd(r, a, &bcopy, ps);
2346 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2348 int i;
2349 VECTOR_FOR_INORDER_I(i, u8) {
2350 r->u8[i] = AES_sbox[a->u8[i]];
2354 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2356 int i;
2358 VECTOR_FOR_INORDER_I(i, u32) {
2359 r->AVRW(i) = b->AVRW(i) ^
2360 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2361 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2362 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2363 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2367 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2369 int i;
2371 VECTOR_FOR_INORDER_I(i, u8) {
2372 r->AVRB(i) = b->AVRB(i) ^ (AES_Te4[a->AVRB(AES_shifts[i])] & 0xFF);
2376 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2378 /* This differs from what is written in ISA V2.07. The RTL is */
2379 /* incorrect and will be fixed in V2.07B. */
2380 int i;
2381 ppc_avr_t tmp;
2383 VECTOR_FOR_INORDER_I(i, u8) {
2384 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2387 VECTOR_FOR_INORDER_I(i, u32) {
2388 r->AVRW(i) =
2389 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2390 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2391 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2392 AES_imc[tmp.AVRB(4*i + 3)][3];
2396 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2398 int i;
2400 VECTOR_FOR_INORDER_I(i, u8) {
2401 r->AVRB(i) = b->AVRB(i) ^ (AES_Td4[a->AVRB(AES_ishifts[i])] & 0xFF);
2405 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2406 #if defined(HOST_WORDS_BIGENDIAN)
2407 #define EL_IDX(i) (i)
2408 #else
2409 #define EL_IDX(i) (3 - (i))
2410 #endif
2412 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2414 int st = (st_six & 0x10) != 0;
2415 int six = st_six & 0xF;
2416 int i;
2418 VECTOR_FOR_INORDER_I(i, u32) {
2419 if (st == 0) {
2420 if ((six & (0x8 >> i)) == 0) {
2421 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2422 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2423 (a->u32[EL_IDX(i)] >> 3);
2424 } else { /* six.bit[i] == 1 */
2425 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2426 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2427 (a->u32[EL_IDX(i)] >> 10);
2429 } else { /* st == 1 */
2430 if ((six & (0x8 >> i)) == 0) {
2431 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2432 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2433 ROTRu32(a->u32[EL_IDX(i)], 22);
2434 } else { /* six.bit[i] == 1 */
2435 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2436 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2437 ROTRu32(a->u32[EL_IDX(i)], 25);
2443 #undef ROTRu32
2444 #undef EL_IDX
2446 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2447 #if defined(HOST_WORDS_BIGENDIAN)
2448 #define EL_IDX(i) (i)
2449 #else
2450 #define EL_IDX(i) (1 - (i))
2451 #endif
2453 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2455 int st = (st_six & 0x10) != 0;
2456 int six = st_six & 0xF;
2457 int i;
2459 VECTOR_FOR_INORDER_I(i, u64) {
2460 if (st == 0) {
2461 if ((six & (0x8 >> (2*i))) == 0) {
2462 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2463 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2464 (a->u64[EL_IDX(i)] >> 7);
2465 } else { /* six.bit[2*i] == 1 */
2466 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2467 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2468 (a->u64[EL_IDX(i)] >> 6);
2470 } else { /* st == 1 */
2471 if ((six & (0x8 >> (2*i))) == 0) {
2472 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2473 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2474 ROTRu64(a->u64[EL_IDX(i)], 39);
2475 } else { /* six.bit[2*i] == 1 */
2476 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2477 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2478 ROTRu64(a->u64[EL_IDX(i)], 41);
2484 #undef ROTRu64
2485 #undef EL_IDX
2487 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2489 int i;
2490 VECTOR_FOR_INORDER_I(i, u8) {
2491 int indexA = c->u8[i] >> 4;
2492 int indexB = c->u8[i] & 0xF;
2493 #if defined(HOST_WORDS_BIGENDIAN)
2494 r->u8[i] = a->u8[indexA] ^ b->u8[indexB];
2495 #else
2496 r->u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2497 #endif
2501 #undef VECTOR_FOR_INORDER_I
2502 #undef HI_IDX
2503 #undef LO_IDX
2505 /*****************************************************************************/
2506 /* SPE extension helpers */
2507 /* Use a table to make this quicker */
2508 static const uint8_t hbrev[16] = {
2509 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2510 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2513 static inline uint8_t byte_reverse(uint8_t val)
2515 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2518 static inline uint32_t word_reverse(uint32_t val)
2520 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2521 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2524 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2525 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2527 uint32_t a, b, d, mask;
2529 mask = UINT32_MAX >> (32 - MASKBITS);
2530 a = arg1 & mask;
2531 b = arg2 & mask;
2532 d = word_reverse(1 + word_reverse(a | ~b));
2533 return (arg1 & ~mask) | (d & b);
2536 uint32_t helper_cntlsw32(uint32_t val)
2538 if (val & 0x80000000) {
2539 return clz32(~val);
2540 } else {
2541 return clz32(val);
2545 uint32_t helper_cntlzw32(uint32_t val)
2547 return clz32(val);
2550 /* 440 specific */
2551 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2552 target_ulong low, uint32_t update_Rc)
2554 target_ulong mask;
2555 int i;
2557 i = 1;
2558 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2559 if ((high & mask) == 0) {
2560 if (update_Rc) {
2561 env->crf[0] = 0x4;
2563 goto done;
2565 i++;
2567 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2568 if ((low & mask) == 0) {
2569 if (update_Rc) {
2570 env->crf[0] = 0x8;
2572 goto done;
2574 i++;
2576 if (update_Rc) {
2577 env->crf[0] = 0x2;
2579 done:
2580 env->xer = (env->xer & ~0x7F) | i;
2581 if (update_Rc) {
2582 env->crf[0] |= xer_so;
2584 return i;