vmdk: Fix converting to streamOptimized
[qemu/ar7.git] / target-ppc / int_helper.c
blob27b0258d3132355eb9aa57f47c1fd71ff3ede510
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "qemu/host-utils.h"
22 #include "exec/helper-proto.h"
23 #include "crypto/aes.h"
25 #include "helper_regs.h"
26 /*****************************************************************************/
27 /* Fixed point operations helpers */
29 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
30 uint32_t oe)
32 uint64_t rt = 0;
33 int overflow = 0;
35 uint64_t dividend = (uint64_t)ra << 32;
36 uint64_t divisor = (uint32_t)rb;
38 if (unlikely(divisor == 0)) {
39 overflow = 1;
40 } else {
41 rt = dividend / divisor;
42 overflow = rt > UINT32_MAX;
45 if (unlikely(overflow)) {
46 rt = 0; /* Undefined */
49 if (oe) {
50 if (unlikely(overflow)) {
51 env->so = env->ov = 1;
52 } else {
53 env->ov = 0;
57 return (target_ulong)rt;
60 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
61 uint32_t oe)
63 int64_t rt = 0;
64 int overflow = 0;
66 int64_t dividend = (int64_t)ra << 32;
67 int64_t divisor = (int64_t)((int32_t)rb);
69 if (unlikely((divisor == 0) ||
70 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
71 overflow = 1;
72 } else {
73 rt = dividend / divisor;
74 overflow = rt != (int32_t)rt;
77 if (unlikely(overflow)) {
78 rt = 0; /* Undefined */
81 if (oe) {
82 if (unlikely(overflow)) {
83 env->so = env->ov = 1;
84 } else {
85 env->ov = 0;
89 return (target_ulong)rt;
92 #if defined(TARGET_PPC64)
94 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
96 uint64_t rt = 0;
97 int overflow = 0;
99 overflow = divu128(&rt, &ra, rb);
101 if (unlikely(overflow)) {
102 rt = 0; /* Undefined */
105 if (oe) {
106 if (unlikely(overflow)) {
107 env->so = env->ov = 1;
108 } else {
109 env->ov = 0;
113 return rt;
116 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118 int64_t rt = 0;
119 int64_t ra = (int64_t)rau;
120 int64_t rb = (int64_t)rbu;
121 int overflow = divs128(&rt, &ra, rb);
123 if (unlikely(overflow)) {
124 rt = 0; /* Undefined */
127 if (oe) {
129 if (unlikely(overflow)) {
130 env->so = env->ov = 1;
131 } else {
132 env->ov = 0;
136 return rt;
139 #endif
142 target_ulong helper_cntlzw(target_ulong t)
144 return clz32(t);
147 #if defined(TARGET_PPC64)
148 target_ulong helper_cntlzd(target_ulong t)
150 return clz64(t);
152 #endif
154 #if defined(TARGET_PPC64)
156 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
158 int i;
159 uint64_t ra = 0;
161 for (i = 0; i < 8; i++) {
162 int index = (rs >> (i*8)) & 0xFF;
163 if (index < 64) {
164 if (rb & (1ull << (63-index))) {
165 ra |= 1 << i;
169 return ra;
172 #endif
174 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
176 target_ulong mask = 0xff;
177 target_ulong ra = 0;
178 int i;
180 for (i = 0; i < sizeof(target_ulong); i++) {
181 if ((rs & mask) == (rb & mask)) {
182 ra |= mask;
184 mask <<= 8;
186 return ra;
189 /* shift right arithmetic helper */
190 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
191 target_ulong shift)
193 int32_t ret;
195 if (likely(!(shift & 0x20))) {
196 if (likely((uint32_t)shift != 0)) {
197 shift &= 0x1f;
198 ret = (int32_t)value >> shift;
199 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
200 env->ca = 0;
201 } else {
202 env->ca = 1;
204 } else {
205 ret = (int32_t)value;
206 env->ca = 0;
208 } else {
209 ret = (int32_t)value >> 31;
210 env->ca = (ret != 0);
212 return (target_long)ret;
215 #if defined(TARGET_PPC64)
216 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
217 target_ulong shift)
219 int64_t ret;
221 if (likely(!(shift & 0x40))) {
222 if (likely((uint64_t)shift != 0)) {
223 shift &= 0x3f;
224 ret = (int64_t)value >> shift;
225 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
226 env->ca = 0;
227 } else {
228 env->ca = 1;
230 } else {
231 ret = (int64_t)value;
232 env->ca = 0;
234 } else {
235 ret = (int64_t)value >> 63;
236 env->ca = (ret != 0);
238 return ret;
240 #endif
242 #if defined(TARGET_PPC64)
243 target_ulong helper_popcntb(target_ulong val)
245 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
246 0x5555555555555555ULL);
247 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
248 0x3333333333333333ULL);
249 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
250 0x0f0f0f0f0f0f0f0fULL);
251 return val;
254 target_ulong helper_popcntw(target_ulong val)
256 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
257 0x5555555555555555ULL);
258 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
259 0x3333333333333333ULL);
260 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
261 0x0f0f0f0f0f0f0f0fULL);
262 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
263 0x00ff00ff00ff00ffULL);
264 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
265 0x0000ffff0000ffffULL);
266 return val;
269 target_ulong helper_popcntd(target_ulong val)
271 return ctpop64(val);
273 #else
274 target_ulong helper_popcntb(target_ulong val)
276 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
277 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
278 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
279 return val;
282 target_ulong helper_popcntw(target_ulong val)
284 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
285 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
286 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
287 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
288 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
289 return val;
291 #endif
293 /*****************************************************************************/
294 /* PowerPC 601 specific instructions (POWER bridge) */
295 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
297 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
299 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
300 (int32_t)arg2 == 0) {
301 env->spr[SPR_MQ] = 0;
302 return INT32_MIN;
303 } else {
304 env->spr[SPR_MQ] = tmp % arg2;
305 return tmp / (int32_t)arg2;
309 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
310 target_ulong arg2)
312 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
314 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
315 (int32_t)arg2 == 0) {
316 env->so = env->ov = 1;
317 env->spr[SPR_MQ] = 0;
318 return INT32_MIN;
319 } else {
320 env->spr[SPR_MQ] = tmp % arg2;
321 tmp /= (int32_t)arg2;
322 if ((int32_t)tmp != tmp) {
323 env->so = env->ov = 1;
324 } else {
325 env->ov = 0;
327 return tmp;
331 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
332 target_ulong arg2)
334 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
335 (int32_t)arg2 == 0) {
336 env->spr[SPR_MQ] = 0;
337 return INT32_MIN;
338 } else {
339 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
340 return (int32_t)arg1 / (int32_t)arg2;
344 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
345 target_ulong arg2)
347 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
348 (int32_t)arg2 == 0) {
349 env->so = env->ov = 1;
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->ov = 0;
354 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
355 return (int32_t)arg1 / (int32_t)arg2;
359 /*****************************************************************************/
360 /* 602 specific instructions */
361 /* mfrom is the most crazy instruction ever seen, imho ! */
362 /* Real implementation uses a ROM table. Do the same */
363 /* Extremely decomposed:
364 * -arg / 256
365 * return 256 * log10(10 + 1.0) + 0.5
367 #if !defined(CONFIG_USER_ONLY)
368 target_ulong helper_602_mfrom(target_ulong arg)
370 if (likely(arg < 602)) {
371 #include "mfrom_table.c"
372 return mfrom_ROM_table[arg];
373 } else {
374 return 0;
377 #endif
379 /*****************************************************************************/
380 /* Altivec extension helpers */
381 #if defined(HOST_WORDS_BIGENDIAN)
382 #define HI_IDX 0
383 #define LO_IDX 1
384 #define AVRB(i) u8[i]
385 #define AVRW(i) u32[i]
386 #else
387 #define HI_IDX 1
388 #define LO_IDX 0
389 #define AVRB(i) u8[15-(i)]
390 #define AVRW(i) u32[3-(i)]
391 #endif
393 #if defined(HOST_WORDS_BIGENDIAN)
394 #define VECTOR_FOR_INORDER_I(index, element) \
395 for (index = 0; index < ARRAY_SIZE(r->element); index++)
396 #else
397 #define VECTOR_FOR_INORDER_I(index, element) \
398 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
399 #endif
401 /* Saturating arithmetic helpers. */
402 #define SATCVT(from, to, from_type, to_type, min, max) \
403 static inline to_type cvt##from##to(from_type x, int *sat) \
405 to_type r; \
407 if (x < (from_type)min) { \
408 r = min; \
409 *sat = 1; \
410 } else if (x > (from_type)max) { \
411 r = max; \
412 *sat = 1; \
413 } else { \
414 r = x; \
416 return r; \
418 #define SATCVTU(from, to, from_type, to_type, min, max) \
419 static inline to_type cvt##from##to(from_type x, int *sat) \
421 to_type r; \
423 if (x > (from_type)max) { \
424 r = max; \
425 *sat = 1; \
426 } else { \
427 r = x; \
429 return r; \
431 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
432 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
433 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
435 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
436 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
437 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
438 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
439 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
440 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
441 #undef SATCVT
442 #undef SATCVTU
444 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
446 int i, j = (sh & 0xf);
448 VECTOR_FOR_INORDER_I(i, u8) {
449 r->u8[i] = j++;
453 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
455 int i, j = 0x10 - (sh & 0xf);
457 VECTOR_FOR_INORDER_I(i, u8) {
458 r->u8[i] = j++;
462 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
464 #if defined(HOST_WORDS_BIGENDIAN)
465 env->vscr = r->u32[3];
466 #else
467 env->vscr = r->u32[0];
468 #endif
469 set_flush_to_zero(vscr_nj, &env->vec_status);
472 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
474 int i;
476 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
477 r->u32[i] = ~a->u32[i] < b->u32[i];
481 #define VARITH_DO(name, op, element) \
482 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
484 int i; \
486 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
487 r->element[i] = a->element[i] op b->element[i]; \
490 #define VARITH(suffix, element) \
491 VARITH_DO(add##suffix, +, element) \
492 VARITH_DO(sub##suffix, -, element)
493 VARITH(ubm, u8)
494 VARITH(uhm, u16)
495 VARITH(uwm, u32)
496 VARITH(udm, u64)
497 VARITH_DO(muluwm, *, u32)
498 #undef VARITH_DO
499 #undef VARITH
501 #define VARITHFP(suffix, func) \
502 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
503 ppc_avr_t *b) \
505 int i; \
507 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
508 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
511 VARITHFP(addfp, float32_add)
512 VARITHFP(subfp, float32_sub)
513 VARITHFP(minfp, float32_min)
514 VARITHFP(maxfp, float32_max)
515 #undef VARITHFP
517 #define VARITHFPFMA(suffix, type) \
518 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
519 ppc_avr_t *b, ppc_avr_t *c) \
521 int i; \
522 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
523 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
524 type, &env->vec_status); \
527 VARITHFPFMA(maddfp, 0);
528 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
529 #undef VARITHFPFMA
531 #define VARITHSAT_CASE(type, op, cvt, element) \
533 type result = (type)a->element[i] op (type)b->element[i]; \
534 r->element[i] = cvt(result, &sat); \
537 #define VARITHSAT_DO(name, op, optype, cvt, element) \
538 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
539 ppc_avr_t *b) \
541 int sat = 0; \
542 int i; \
544 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
545 switch (sizeof(r->element[0])) { \
546 case 1: \
547 VARITHSAT_CASE(optype, op, cvt, element); \
548 break; \
549 case 2: \
550 VARITHSAT_CASE(optype, op, cvt, element); \
551 break; \
552 case 4: \
553 VARITHSAT_CASE(optype, op, cvt, element); \
554 break; \
557 if (sat) { \
558 env->vscr |= (1 << VSCR_SAT); \
561 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
562 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
563 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
564 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
565 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
566 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
567 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
568 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
569 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
570 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
571 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
572 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
573 #undef VARITHSAT_CASE
574 #undef VARITHSAT_DO
575 #undef VARITHSAT_SIGNED
576 #undef VARITHSAT_UNSIGNED
578 #define VAVG_DO(name, element, etype) \
579 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
581 int i; \
583 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
584 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
585 r->element[i] = x >> 1; \
589 #define VAVG(type, signed_element, signed_type, unsigned_element, \
590 unsigned_type) \
591 VAVG_DO(avgs##type, signed_element, signed_type) \
592 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
593 VAVG(b, s8, int16_t, u8, uint16_t)
594 VAVG(h, s16, int32_t, u16, uint32_t)
595 VAVG(w, s32, int64_t, u32, uint64_t)
596 #undef VAVG_DO
597 #undef VAVG
599 #define VCF(suffix, cvt, element) \
600 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
601 ppc_avr_t *b, uint32_t uim) \
603 int i; \
605 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
606 float32 t = cvt(b->element[i], &env->vec_status); \
607 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
610 VCF(ux, uint32_to_float32, u32)
611 VCF(sx, int32_to_float32, s32)
612 #undef VCF
614 #define VCMP_DO(suffix, compare, element, record) \
615 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
616 ppc_avr_t *a, ppc_avr_t *b) \
618 uint64_t ones = (uint64_t)-1; \
619 uint64_t all = ones; \
620 uint64_t none = 0; \
621 int i; \
623 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
624 uint64_t result = (a->element[i] compare b->element[i] ? \
625 ones : 0x0); \
626 switch (sizeof(a->element[0])) { \
627 case 8: \
628 r->u64[i] = result; \
629 break; \
630 case 4: \
631 r->u32[i] = result; \
632 break; \
633 case 2: \
634 r->u16[i] = result; \
635 break; \
636 case 1: \
637 r->u8[i] = result; \
638 break; \
640 all &= result; \
641 none |= result; \
643 if (record) { \
644 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
647 #define VCMP(suffix, compare, element) \
648 VCMP_DO(suffix, compare, element, 0) \
649 VCMP_DO(suffix##_dot, compare, element, 1)
650 VCMP(equb, ==, u8)
651 VCMP(equh, ==, u16)
652 VCMP(equw, ==, u32)
653 VCMP(equd, ==, u64)
654 VCMP(gtub, >, u8)
655 VCMP(gtuh, >, u16)
656 VCMP(gtuw, >, u32)
657 VCMP(gtud, >, u64)
658 VCMP(gtsb, >, s8)
659 VCMP(gtsh, >, s16)
660 VCMP(gtsw, >, s32)
661 VCMP(gtsd, >, s64)
662 #undef VCMP_DO
663 #undef VCMP
665 #define VCMPFP_DO(suffix, compare, order, record) \
666 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
667 ppc_avr_t *a, ppc_avr_t *b) \
669 uint32_t ones = (uint32_t)-1; \
670 uint32_t all = ones; \
671 uint32_t none = 0; \
672 int i; \
674 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
675 uint32_t result; \
676 int rel = float32_compare_quiet(a->f[i], b->f[i], \
677 &env->vec_status); \
678 if (rel == float_relation_unordered) { \
679 result = 0; \
680 } else if (rel compare order) { \
681 result = ones; \
682 } else { \
683 result = 0; \
685 r->u32[i] = result; \
686 all &= result; \
687 none |= result; \
689 if (record) { \
690 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
693 #define VCMPFP(suffix, compare, order) \
694 VCMPFP_DO(suffix, compare, order, 0) \
695 VCMPFP_DO(suffix##_dot, compare, order, 1)
696 VCMPFP(eqfp, ==, float_relation_equal)
697 VCMPFP(gefp, !=, float_relation_less)
698 VCMPFP(gtfp, ==, float_relation_greater)
699 #undef VCMPFP_DO
700 #undef VCMPFP
702 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
703 ppc_avr_t *a, ppc_avr_t *b, int record)
705 int i;
706 int all_in = 0;
708 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
709 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
710 if (le_rel == float_relation_unordered) {
711 r->u32[i] = 0xc0000000;
712 all_in = 1;
713 } else {
714 float32 bneg = float32_chs(b->f[i]);
715 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
716 int le = le_rel != float_relation_greater;
717 int ge = ge_rel != float_relation_less;
719 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
720 all_in |= (!le | !ge);
723 if (record) {
724 env->crf[6] = (all_in == 0) << 1;
728 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
730 vcmpbfp_internal(env, r, a, b, 0);
733 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
734 ppc_avr_t *b)
736 vcmpbfp_internal(env, r, a, b, 1);
739 #define VCT(suffix, satcvt, element) \
740 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
741 ppc_avr_t *b, uint32_t uim) \
743 int i; \
744 int sat = 0; \
745 float_status s = env->vec_status; \
747 set_float_rounding_mode(float_round_to_zero, &s); \
748 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
749 if (float32_is_any_nan(b->f[i])) { \
750 r->element[i] = 0; \
751 } else { \
752 float64 t = float32_to_float64(b->f[i], &s); \
753 int64_t j; \
755 t = float64_scalbn(t, uim, &s); \
756 j = float64_to_int64(t, &s); \
757 r->element[i] = satcvt(j, &sat); \
760 if (sat) { \
761 env->vscr |= (1 << VSCR_SAT); \
764 VCT(uxs, cvtsduw, u32)
765 VCT(sxs, cvtsdsw, s32)
766 #undef VCT
768 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
769 ppc_avr_t *b, ppc_avr_t *c)
771 int sat = 0;
772 int i;
774 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
775 int32_t prod = a->s16[i] * b->s16[i];
776 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
778 r->s16[i] = cvtswsh(t, &sat);
781 if (sat) {
782 env->vscr |= (1 << VSCR_SAT);
786 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
787 ppc_avr_t *b, ppc_avr_t *c)
789 int sat = 0;
790 int i;
792 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
793 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
794 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
795 r->s16[i] = cvtswsh(t, &sat);
798 if (sat) {
799 env->vscr |= (1 << VSCR_SAT);
803 #define VMINMAX_DO(name, compare, element) \
804 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
806 int i; \
808 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
809 if (a->element[i] compare b->element[i]) { \
810 r->element[i] = b->element[i]; \
811 } else { \
812 r->element[i] = a->element[i]; \
816 #define VMINMAX(suffix, element) \
817 VMINMAX_DO(min##suffix, >, element) \
818 VMINMAX_DO(max##suffix, <, element)
819 VMINMAX(sb, s8)
820 VMINMAX(sh, s16)
821 VMINMAX(sw, s32)
822 VMINMAX(sd, s64)
823 VMINMAX(ub, u8)
824 VMINMAX(uh, u16)
825 VMINMAX(uw, u32)
826 VMINMAX(ud, u64)
827 #undef VMINMAX_DO
828 #undef VMINMAX
830 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
832 int i;
834 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
835 int32_t prod = a->s16[i] * b->s16[i];
836 r->s16[i] = (int16_t) (prod + c->s16[i]);
840 #define VMRG_DO(name, element, highp) \
841 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
843 ppc_avr_t result; \
844 int i; \
845 size_t n_elems = ARRAY_SIZE(r->element); \
847 for (i = 0; i < n_elems / 2; i++) { \
848 if (highp) { \
849 result.element[i*2+HI_IDX] = a->element[i]; \
850 result.element[i*2+LO_IDX] = b->element[i]; \
851 } else { \
852 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
853 b->element[n_elems - i - 1]; \
854 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
855 a->element[n_elems - i - 1]; \
858 *r = result; \
860 #if defined(HOST_WORDS_BIGENDIAN)
861 #define MRGHI 0
862 #define MRGLO 1
863 #else
864 #define MRGHI 1
865 #define MRGLO 0
866 #endif
867 #define VMRG(suffix, element) \
868 VMRG_DO(mrgl##suffix, element, MRGHI) \
869 VMRG_DO(mrgh##suffix, element, MRGLO)
870 VMRG(b, u8)
871 VMRG(h, u16)
872 VMRG(w, u32)
873 #undef VMRG_DO
874 #undef VMRG
875 #undef MRGHI
876 #undef MRGLO
878 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
879 ppc_avr_t *b, ppc_avr_t *c)
881 int32_t prod[16];
882 int i;
884 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
885 prod[i] = (int32_t)a->s8[i] * b->u8[i];
888 VECTOR_FOR_INORDER_I(i, s32) {
889 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
890 prod[4 * i + 2] + prod[4 * i + 3];
894 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
895 ppc_avr_t *b, ppc_avr_t *c)
897 int32_t prod[8];
898 int i;
900 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
901 prod[i] = a->s16[i] * b->s16[i];
904 VECTOR_FOR_INORDER_I(i, s32) {
905 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
909 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
912 int32_t prod[8];
913 int i;
914 int sat = 0;
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 prod[i] = (int32_t)a->s16[i] * b->s16[i];
920 VECTOR_FOR_INORDER_I(i, s32) {
921 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
923 r->u32[i] = cvtsdsw(t, &sat);
926 if (sat) {
927 env->vscr |= (1 << VSCR_SAT);
931 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
932 ppc_avr_t *b, ppc_avr_t *c)
934 uint16_t prod[16];
935 int i;
937 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
938 prod[i] = a->u8[i] * b->u8[i];
941 VECTOR_FOR_INORDER_I(i, u32) {
942 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
943 prod[4 * i + 2] + prod[4 * i + 3];
947 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
948 ppc_avr_t *b, ppc_avr_t *c)
950 uint32_t prod[8];
951 int i;
953 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
954 prod[i] = a->u16[i] * b->u16[i];
957 VECTOR_FOR_INORDER_I(i, u32) {
958 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
962 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
963 ppc_avr_t *b, ppc_avr_t *c)
965 uint32_t prod[8];
966 int i;
967 int sat = 0;
969 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
970 prod[i] = a->u16[i] * b->u16[i];
973 VECTOR_FOR_INORDER_I(i, s32) {
974 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
976 r->u32[i] = cvtuduw(t, &sat);
979 if (sat) {
980 env->vscr |= (1 << VSCR_SAT);
984 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
985 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 int i; \
989 VECTOR_FOR_INORDER_I(i, prod_element) { \
990 if (evenp) { \
991 r->prod_element[i] = \
992 (cast)a->mul_element[i * 2 + HI_IDX] * \
993 (cast)b->mul_element[i * 2 + HI_IDX]; \
994 } else { \
995 r->prod_element[i] = \
996 (cast)a->mul_element[i * 2 + LO_IDX] * \
997 (cast)b->mul_element[i * 2 + LO_IDX]; \
1001 #define VMUL(suffix, mul_element, prod_element, cast) \
1002 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1003 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1004 VMUL(sb, s8, s16, int16_t)
1005 VMUL(sh, s16, s32, int32_t)
1006 VMUL(sw, s32, s64, int64_t)
1007 VMUL(ub, u8, u16, uint16_t)
1008 VMUL(uh, u16, u32, uint32_t)
1009 VMUL(uw, u32, u64, uint64_t)
1010 #undef VMUL_DO
1011 #undef VMUL
1013 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1014 ppc_avr_t *c)
1016 ppc_avr_t result;
1017 int i;
1019 VECTOR_FOR_INORDER_I(i, u8) {
1020 int s = c->u8[i] & 0x1f;
1021 #if defined(HOST_WORDS_BIGENDIAN)
1022 int index = s & 0xf;
1023 #else
1024 int index = 15 - (s & 0xf);
1025 #endif
1027 if (s & 0x10) {
1028 result.u8[i] = b->u8[index];
1029 } else {
1030 result.u8[i] = a->u8[index];
1033 *r = result;
1036 #if defined(HOST_WORDS_BIGENDIAN)
1037 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1038 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1039 #else
1040 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1041 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1042 #endif
1044 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1046 int i;
1047 uint64_t perm = 0;
1049 VECTOR_FOR_INORDER_I(i, u8) {
1050 int index = VBPERMQ_INDEX(b, i);
1052 if (index < 128) {
1053 uint64_t mask = (1ull << (63-(index & 0x3F)));
1054 if (a->u64[VBPERMQ_DW(index)] & mask) {
1055 perm |= (0x8000 >> i);
1060 r->u64[HI_IDX] = perm;
1061 r->u64[LO_IDX] = 0;
1064 #undef VBPERMQ_INDEX
1065 #undef VBPERMQ_DW
1067 static const uint64_t VGBBD_MASKS[256] = {
1068 0x0000000000000000ull, /* 00 */
1069 0x0000000000000080ull, /* 01 */
1070 0x0000000000008000ull, /* 02 */
1071 0x0000000000008080ull, /* 03 */
1072 0x0000000000800000ull, /* 04 */
1073 0x0000000000800080ull, /* 05 */
1074 0x0000000000808000ull, /* 06 */
1075 0x0000000000808080ull, /* 07 */
1076 0x0000000080000000ull, /* 08 */
1077 0x0000000080000080ull, /* 09 */
1078 0x0000000080008000ull, /* 0A */
1079 0x0000000080008080ull, /* 0B */
1080 0x0000000080800000ull, /* 0C */
1081 0x0000000080800080ull, /* 0D */
1082 0x0000000080808000ull, /* 0E */
1083 0x0000000080808080ull, /* 0F */
1084 0x0000008000000000ull, /* 10 */
1085 0x0000008000000080ull, /* 11 */
1086 0x0000008000008000ull, /* 12 */
1087 0x0000008000008080ull, /* 13 */
1088 0x0000008000800000ull, /* 14 */
1089 0x0000008000800080ull, /* 15 */
1090 0x0000008000808000ull, /* 16 */
1091 0x0000008000808080ull, /* 17 */
1092 0x0000008080000000ull, /* 18 */
1093 0x0000008080000080ull, /* 19 */
1094 0x0000008080008000ull, /* 1A */
1095 0x0000008080008080ull, /* 1B */
1096 0x0000008080800000ull, /* 1C */
1097 0x0000008080800080ull, /* 1D */
1098 0x0000008080808000ull, /* 1E */
1099 0x0000008080808080ull, /* 1F */
1100 0x0000800000000000ull, /* 20 */
1101 0x0000800000000080ull, /* 21 */
1102 0x0000800000008000ull, /* 22 */
1103 0x0000800000008080ull, /* 23 */
1104 0x0000800000800000ull, /* 24 */
1105 0x0000800000800080ull, /* 25 */
1106 0x0000800000808000ull, /* 26 */
1107 0x0000800000808080ull, /* 27 */
1108 0x0000800080000000ull, /* 28 */
1109 0x0000800080000080ull, /* 29 */
1110 0x0000800080008000ull, /* 2A */
1111 0x0000800080008080ull, /* 2B */
1112 0x0000800080800000ull, /* 2C */
1113 0x0000800080800080ull, /* 2D */
1114 0x0000800080808000ull, /* 2E */
1115 0x0000800080808080ull, /* 2F */
1116 0x0000808000000000ull, /* 30 */
1117 0x0000808000000080ull, /* 31 */
1118 0x0000808000008000ull, /* 32 */
1119 0x0000808000008080ull, /* 33 */
1120 0x0000808000800000ull, /* 34 */
1121 0x0000808000800080ull, /* 35 */
1122 0x0000808000808000ull, /* 36 */
1123 0x0000808000808080ull, /* 37 */
1124 0x0000808080000000ull, /* 38 */
1125 0x0000808080000080ull, /* 39 */
1126 0x0000808080008000ull, /* 3A */
1127 0x0000808080008080ull, /* 3B */
1128 0x0000808080800000ull, /* 3C */
1129 0x0000808080800080ull, /* 3D */
1130 0x0000808080808000ull, /* 3E */
1131 0x0000808080808080ull, /* 3F */
1132 0x0080000000000000ull, /* 40 */
1133 0x0080000000000080ull, /* 41 */
1134 0x0080000000008000ull, /* 42 */
1135 0x0080000000008080ull, /* 43 */
1136 0x0080000000800000ull, /* 44 */
1137 0x0080000000800080ull, /* 45 */
1138 0x0080000000808000ull, /* 46 */
1139 0x0080000000808080ull, /* 47 */
1140 0x0080000080000000ull, /* 48 */
1141 0x0080000080000080ull, /* 49 */
1142 0x0080000080008000ull, /* 4A */
1143 0x0080000080008080ull, /* 4B */
1144 0x0080000080800000ull, /* 4C */
1145 0x0080000080800080ull, /* 4D */
1146 0x0080000080808000ull, /* 4E */
1147 0x0080000080808080ull, /* 4F */
1148 0x0080008000000000ull, /* 50 */
1149 0x0080008000000080ull, /* 51 */
1150 0x0080008000008000ull, /* 52 */
1151 0x0080008000008080ull, /* 53 */
1152 0x0080008000800000ull, /* 54 */
1153 0x0080008000800080ull, /* 55 */
1154 0x0080008000808000ull, /* 56 */
1155 0x0080008000808080ull, /* 57 */
1156 0x0080008080000000ull, /* 58 */
1157 0x0080008080000080ull, /* 59 */
1158 0x0080008080008000ull, /* 5A */
1159 0x0080008080008080ull, /* 5B */
1160 0x0080008080800000ull, /* 5C */
1161 0x0080008080800080ull, /* 5D */
1162 0x0080008080808000ull, /* 5E */
1163 0x0080008080808080ull, /* 5F */
1164 0x0080800000000000ull, /* 60 */
1165 0x0080800000000080ull, /* 61 */
1166 0x0080800000008000ull, /* 62 */
1167 0x0080800000008080ull, /* 63 */
1168 0x0080800000800000ull, /* 64 */
1169 0x0080800000800080ull, /* 65 */
1170 0x0080800000808000ull, /* 66 */
1171 0x0080800000808080ull, /* 67 */
1172 0x0080800080000000ull, /* 68 */
1173 0x0080800080000080ull, /* 69 */
1174 0x0080800080008000ull, /* 6A */
1175 0x0080800080008080ull, /* 6B */
1176 0x0080800080800000ull, /* 6C */
1177 0x0080800080800080ull, /* 6D */
1178 0x0080800080808000ull, /* 6E */
1179 0x0080800080808080ull, /* 6F */
1180 0x0080808000000000ull, /* 70 */
1181 0x0080808000000080ull, /* 71 */
1182 0x0080808000008000ull, /* 72 */
1183 0x0080808000008080ull, /* 73 */
1184 0x0080808000800000ull, /* 74 */
1185 0x0080808000800080ull, /* 75 */
1186 0x0080808000808000ull, /* 76 */
1187 0x0080808000808080ull, /* 77 */
1188 0x0080808080000000ull, /* 78 */
1189 0x0080808080000080ull, /* 79 */
1190 0x0080808080008000ull, /* 7A */
1191 0x0080808080008080ull, /* 7B */
1192 0x0080808080800000ull, /* 7C */
1193 0x0080808080800080ull, /* 7D */
1194 0x0080808080808000ull, /* 7E */
1195 0x0080808080808080ull, /* 7F */
1196 0x8000000000000000ull, /* 80 */
1197 0x8000000000000080ull, /* 81 */
1198 0x8000000000008000ull, /* 82 */
1199 0x8000000000008080ull, /* 83 */
1200 0x8000000000800000ull, /* 84 */
1201 0x8000000000800080ull, /* 85 */
1202 0x8000000000808000ull, /* 86 */
1203 0x8000000000808080ull, /* 87 */
1204 0x8000000080000000ull, /* 88 */
1205 0x8000000080000080ull, /* 89 */
1206 0x8000000080008000ull, /* 8A */
1207 0x8000000080008080ull, /* 8B */
1208 0x8000000080800000ull, /* 8C */
1209 0x8000000080800080ull, /* 8D */
1210 0x8000000080808000ull, /* 8E */
1211 0x8000000080808080ull, /* 8F */
1212 0x8000008000000000ull, /* 90 */
1213 0x8000008000000080ull, /* 91 */
1214 0x8000008000008000ull, /* 92 */
1215 0x8000008000008080ull, /* 93 */
1216 0x8000008000800000ull, /* 94 */
1217 0x8000008000800080ull, /* 95 */
1218 0x8000008000808000ull, /* 96 */
1219 0x8000008000808080ull, /* 97 */
1220 0x8000008080000000ull, /* 98 */
1221 0x8000008080000080ull, /* 99 */
1222 0x8000008080008000ull, /* 9A */
1223 0x8000008080008080ull, /* 9B */
1224 0x8000008080800000ull, /* 9C */
1225 0x8000008080800080ull, /* 9D */
1226 0x8000008080808000ull, /* 9E */
1227 0x8000008080808080ull, /* 9F */
1228 0x8000800000000000ull, /* A0 */
1229 0x8000800000000080ull, /* A1 */
1230 0x8000800000008000ull, /* A2 */
1231 0x8000800000008080ull, /* A3 */
1232 0x8000800000800000ull, /* A4 */
1233 0x8000800000800080ull, /* A5 */
1234 0x8000800000808000ull, /* A6 */
1235 0x8000800000808080ull, /* A7 */
1236 0x8000800080000000ull, /* A8 */
1237 0x8000800080000080ull, /* A9 */
1238 0x8000800080008000ull, /* AA */
1239 0x8000800080008080ull, /* AB */
1240 0x8000800080800000ull, /* AC */
1241 0x8000800080800080ull, /* AD */
1242 0x8000800080808000ull, /* AE */
1243 0x8000800080808080ull, /* AF */
1244 0x8000808000000000ull, /* B0 */
1245 0x8000808000000080ull, /* B1 */
1246 0x8000808000008000ull, /* B2 */
1247 0x8000808000008080ull, /* B3 */
1248 0x8000808000800000ull, /* B4 */
1249 0x8000808000800080ull, /* B5 */
1250 0x8000808000808000ull, /* B6 */
1251 0x8000808000808080ull, /* B7 */
1252 0x8000808080000000ull, /* B8 */
1253 0x8000808080000080ull, /* B9 */
1254 0x8000808080008000ull, /* BA */
1255 0x8000808080008080ull, /* BB */
1256 0x8000808080800000ull, /* BC */
1257 0x8000808080800080ull, /* BD */
1258 0x8000808080808000ull, /* BE */
1259 0x8000808080808080ull, /* BF */
1260 0x8080000000000000ull, /* C0 */
1261 0x8080000000000080ull, /* C1 */
1262 0x8080000000008000ull, /* C2 */
1263 0x8080000000008080ull, /* C3 */
1264 0x8080000000800000ull, /* C4 */
1265 0x8080000000800080ull, /* C5 */
1266 0x8080000000808000ull, /* C6 */
1267 0x8080000000808080ull, /* C7 */
1268 0x8080000080000000ull, /* C8 */
1269 0x8080000080000080ull, /* C9 */
1270 0x8080000080008000ull, /* CA */
1271 0x8080000080008080ull, /* CB */
1272 0x8080000080800000ull, /* CC */
1273 0x8080000080800080ull, /* CD */
1274 0x8080000080808000ull, /* CE */
1275 0x8080000080808080ull, /* CF */
1276 0x8080008000000000ull, /* D0 */
1277 0x8080008000000080ull, /* D1 */
1278 0x8080008000008000ull, /* D2 */
1279 0x8080008000008080ull, /* D3 */
1280 0x8080008000800000ull, /* D4 */
1281 0x8080008000800080ull, /* D5 */
1282 0x8080008000808000ull, /* D6 */
1283 0x8080008000808080ull, /* D7 */
1284 0x8080008080000000ull, /* D8 */
1285 0x8080008080000080ull, /* D9 */
1286 0x8080008080008000ull, /* DA */
1287 0x8080008080008080ull, /* DB */
1288 0x8080008080800000ull, /* DC */
1289 0x8080008080800080ull, /* DD */
1290 0x8080008080808000ull, /* DE */
1291 0x8080008080808080ull, /* DF */
1292 0x8080800000000000ull, /* E0 */
1293 0x8080800000000080ull, /* E1 */
1294 0x8080800000008000ull, /* E2 */
1295 0x8080800000008080ull, /* E3 */
1296 0x8080800000800000ull, /* E4 */
1297 0x8080800000800080ull, /* E5 */
1298 0x8080800000808000ull, /* E6 */
1299 0x8080800000808080ull, /* E7 */
1300 0x8080800080000000ull, /* E8 */
1301 0x8080800080000080ull, /* E9 */
1302 0x8080800080008000ull, /* EA */
1303 0x8080800080008080ull, /* EB */
1304 0x8080800080800000ull, /* EC */
1305 0x8080800080800080ull, /* ED */
1306 0x8080800080808000ull, /* EE */
1307 0x8080800080808080ull, /* EF */
1308 0x8080808000000000ull, /* F0 */
1309 0x8080808000000080ull, /* F1 */
1310 0x8080808000008000ull, /* F2 */
1311 0x8080808000008080ull, /* F3 */
1312 0x8080808000800000ull, /* F4 */
1313 0x8080808000800080ull, /* F5 */
1314 0x8080808000808000ull, /* F6 */
1315 0x8080808000808080ull, /* F7 */
1316 0x8080808080000000ull, /* F8 */
1317 0x8080808080000080ull, /* F9 */
1318 0x8080808080008000ull, /* FA */
1319 0x8080808080008080ull, /* FB */
1320 0x8080808080800000ull, /* FC */
1321 0x8080808080800080ull, /* FD */
1322 0x8080808080808000ull, /* FE */
1323 0x8080808080808080ull, /* FF */
1326 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1328 int i;
1329 uint64_t t[2] = { 0, 0 };
1331 VECTOR_FOR_INORDER_I(i, u8) {
1332 #if defined(HOST_WORDS_BIGENDIAN)
1333 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1334 #else
1335 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1336 #endif
1339 r->u64[0] = t[0];
1340 r->u64[1] = t[1];
1343 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1344 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1346 int i, j; \
1347 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1349 VECTOR_FOR_INORDER_I(i, srcfld) { \
1350 prod[i] = 0; \
1351 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1352 if (a->srcfld[i] & (1ull<<j)) { \
1353 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1358 VECTOR_FOR_INORDER_I(i, trgfld) { \
1359 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1363 PMSUM(vpmsumb, u8, u16, uint16_t)
1364 PMSUM(vpmsumh, u16, u32, uint32_t)
1365 PMSUM(vpmsumw, u32, u64, uint64_t)
1367 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1370 #ifdef CONFIG_INT128
1371 int i, j;
1372 __uint128_t prod[2];
1374 VECTOR_FOR_INORDER_I(i, u64) {
1375 prod[i] = 0;
1376 for (j = 0; j < 64; j++) {
1377 if (a->u64[i] & (1ull<<j)) {
1378 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1383 r->u128 = prod[0] ^ prod[1];
1385 #else
1386 int i, j;
1387 ppc_avr_t prod[2];
1389 VECTOR_FOR_INORDER_I(i, u64) {
1390 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1391 for (j = 0; j < 64; j++) {
1392 if (a->u64[i] & (1ull<<j)) {
1393 ppc_avr_t bshift;
1394 if (j == 0) {
1395 bshift.u64[HI_IDX] = 0;
1396 bshift.u64[LO_IDX] = b->u64[i];
1397 } else {
1398 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1399 bshift.u64[LO_IDX] = b->u64[i] << j;
1401 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1402 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1407 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1408 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1409 #endif
1413 #if defined(HOST_WORDS_BIGENDIAN)
1414 #define PKBIG 1
1415 #else
1416 #define PKBIG 0
1417 #endif
1418 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1420 int i, j;
1421 ppc_avr_t result;
1422 #if defined(HOST_WORDS_BIGENDIAN)
1423 const ppc_avr_t *x[2] = { a, b };
1424 #else
1425 const ppc_avr_t *x[2] = { b, a };
1426 #endif
1428 VECTOR_FOR_INORDER_I(i, u64) {
1429 VECTOR_FOR_INORDER_I(j, u32) {
1430 uint32_t e = x[i]->u32[j];
1432 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1433 ((e >> 6) & 0x3e0) |
1434 ((e >> 3) & 0x1f));
1437 *r = result;
1440 #define VPK(suffix, from, to, cvt, dosat) \
1441 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1442 ppc_avr_t *a, ppc_avr_t *b) \
1444 int i; \
1445 int sat = 0; \
1446 ppc_avr_t result; \
1447 ppc_avr_t *a0 = PKBIG ? a : b; \
1448 ppc_avr_t *a1 = PKBIG ? b : a; \
1450 VECTOR_FOR_INORDER_I(i, from) { \
1451 result.to[i] = cvt(a0->from[i], &sat); \
1452 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1454 *r = result; \
1455 if (dosat && sat) { \
1456 env->vscr |= (1 << VSCR_SAT); \
1459 #define I(x, y) (x)
1460 VPK(shss, s16, s8, cvtshsb, 1)
1461 VPK(shus, s16, u8, cvtshub, 1)
1462 VPK(swss, s32, s16, cvtswsh, 1)
1463 VPK(swus, s32, u16, cvtswuh, 1)
1464 VPK(sdss, s64, s32, cvtsdsw, 1)
1465 VPK(sdus, s64, u32, cvtsduw, 1)
1466 VPK(uhus, u16, u8, cvtuhub, 1)
1467 VPK(uwus, u32, u16, cvtuwuh, 1)
1468 VPK(udus, u64, u32, cvtuduw, 1)
1469 VPK(uhum, u16, u8, I, 0)
1470 VPK(uwum, u32, u16, I, 0)
1471 VPK(udum, u64, u32, I, 0)
1472 #undef I
1473 #undef VPK
1474 #undef PKBIG
1476 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1478 int i;
1480 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1481 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1485 #define VRFI(suffix, rounding) \
1486 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1487 ppc_avr_t *b) \
1489 int i; \
1490 float_status s = env->vec_status; \
1492 set_float_rounding_mode(rounding, &s); \
1493 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1494 r->f[i] = float32_round_to_int (b->f[i], &s); \
1497 VRFI(n, float_round_nearest_even)
1498 VRFI(m, float_round_down)
1499 VRFI(p, float_round_up)
1500 VRFI(z, float_round_to_zero)
1501 #undef VRFI
1503 #define VROTATE(suffix, element, mask) \
1504 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1506 int i; \
1508 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1509 unsigned int shift = b->element[i] & mask; \
1510 r->element[i] = (a->element[i] << shift) | \
1511 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1514 VROTATE(b, u8, 0x7)
1515 VROTATE(h, u16, 0xF)
1516 VROTATE(w, u32, 0x1F)
1517 VROTATE(d, u64, 0x3F)
1518 #undef VROTATE
1520 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1522 int i;
1524 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1525 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1527 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1531 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1532 ppc_avr_t *c)
1534 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1535 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1538 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1540 int i;
1542 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1543 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1547 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1549 int i;
1551 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1552 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1556 /* The specification says that the results are undefined if all of the
1557 * shift counts are not identical. We check to make sure that they are
1558 * to conform to what real hardware appears to do. */
1559 #define VSHIFT(suffix, leftp) \
1560 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1562 int shift = b->u8[LO_IDX*15] & 0x7; \
1563 int doit = 1; \
1564 int i; \
1566 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1567 doit = doit && ((b->u8[i] & 0x7) == shift); \
1569 if (doit) { \
1570 if (shift == 0) { \
1571 *r = *a; \
1572 } else if (leftp) { \
1573 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1575 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1576 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1577 } else { \
1578 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1580 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1581 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1585 VSHIFT(l, 1)
1586 VSHIFT(r, 0)
1587 #undef VSHIFT
1589 #define VSL(suffix, element, mask) \
1590 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1592 int i; \
1594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1595 unsigned int shift = b->element[i] & mask; \
1597 r->element[i] = a->element[i] << shift; \
1600 VSL(b, u8, 0x7)
1601 VSL(h, u16, 0x0F)
1602 VSL(w, u32, 0x1F)
1603 VSL(d, u64, 0x3F)
1604 #undef VSL
1606 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1608 int sh = shift & 0xf;
1609 int i;
1610 ppc_avr_t result;
1612 #if defined(HOST_WORDS_BIGENDIAN)
1613 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1614 int index = sh + i;
1615 if (index > 0xf) {
1616 result.u8[i] = b->u8[index - 0x10];
1617 } else {
1618 result.u8[i] = a->u8[index];
1621 #else
1622 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1623 int index = (16 - sh) + i;
1624 if (index > 0xf) {
1625 result.u8[i] = a->u8[index - 0x10];
1626 } else {
1627 result.u8[i] = b->u8[index];
1630 #endif
1631 *r = result;
1634 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1636 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1638 #if defined(HOST_WORDS_BIGENDIAN)
1639 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1640 memset(&r->u8[16-sh], 0, sh);
1641 #else
1642 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1643 memset(&r->u8[0], 0, sh);
1644 #endif
1647 /* Experimental testing shows that hardware masks the immediate. */
1648 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1649 #if defined(HOST_WORDS_BIGENDIAN)
1650 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1651 #else
1652 #define SPLAT_ELEMENT(element) \
1653 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1654 #endif
1655 #define VSPLT(suffix, element) \
1656 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1658 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1659 int i; \
1661 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1662 r->element[i] = s; \
1665 VSPLT(b, u8)
1666 VSPLT(h, u16)
1667 VSPLT(w, u32)
1668 #undef VSPLT
1669 #undef SPLAT_ELEMENT
1670 #undef _SPLAT_MASKED
1672 #define VSPLTI(suffix, element, splat_type) \
1673 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1675 splat_type x = (int8_t)(splat << 3) >> 3; \
1676 int i; \
1678 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1679 r->element[i] = x; \
1682 VSPLTI(b, s8, int8_t)
1683 VSPLTI(h, s16, int16_t)
1684 VSPLTI(w, s32, int32_t)
1685 #undef VSPLTI
1687 #define VSR(suffix, element, mask) \
1688 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1690 int i; \
1692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1693 unsigned int shift = b->element[i] & mask; \
1694 r->element[i] = a->element[i] >> shift; \
1697 VSR(ab, s8, 0x7)
1698 VSR(ah, s16, 0xF)
1699 VSR(aw, s32, 0x1F)
1700 VSR(ad, s64, 0x3F)
1701 VSR(b, u8, 0x7)
1702 VSR(h, u16, 0xF)
1703 VSR(w, u32, 0x1F)
1704 VSR(d, u64, 0x3F)
1705 #undef VSR
1707 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1709 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1711 #if defined(HOST_WORDS_BIGENDIAN)
1712 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1713 memset(&r->u8[0], 0, sh);
1714 #else
1715 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1716 memset(&r->u8[16 - sh], 0, sh);
1717 #endif
1720 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1722 int i;
1724 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1725 r->u32[i] = a->u32[i] >= b->u32[i];
1729 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1731 int64_t t;
1732 int i, upper;
1733 ppc_avr_t result;
1734 int sat = 0;
1736 #if defined(HOST_WORDS_BIGENDIAN)
1737 upper = ARRAY_SIZE(r->s32)-1;
1738 #else
1739 upper = 0;
1740 #endif
1741 t = (int64_t)b->s32[upper];
1742 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1743 t += a->s32[i];
1744 result.s32[i] = 0;
1746 result.s32[upper] = cvtsdsw(t, &sat);
1747 *r = result;
1749 if (sat) {
1750 env->vscr |= (1 << VSCR_SAT);
1754 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1756 int i, j, upper;
1757 ppc_avr_t result;
1758 int sat = 0;
1760 #if defined(HOST_WORDS_BIGENDIAN)
1761 upper = 1;
1762 #else
1763 upper = 0;
1764 #endif
1765 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1766 int64_t t = (int64_t)b->s32[upper + i * 2];
1768 result.u64[i] = 0;
1769 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1770 t += a->s32[2 * i + j];
1772 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1775 *r = result;
1776 if (sat) {
1777 env->vscr |= (1 << VSCR_SAT);
1781 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1783 int i, j;
1784 int sat = 0;
1786 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1787 int64_t t = (int64_t)b->s32[i];
1789 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1790 t += a->s8[4 * i + j];
1792 r->s32[i] = cvtsdsw(t, &sat);
1795 if (sat) {
1796 env->vscr |= (1 << VSCR_SAT);
1800 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1802 int sat = 0;
1803 int i;
1805 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1806 int64_t t = (int64_t)b->s32[i];
1808 t += a->s16[2 * i] + a->s16[2 * i + 1];
1809 r->s32[i] = cvtsdsw(t, &sat);
1812 if (sat) {
1813 env->vscr |= (1 << VSCR_SAT);
1817 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1819 int i, j;
1820 int sat = 0;
1822 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1823 uint64_t t = (uint64_t)b->u32[i];
1825 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1826 t += a->u8[4 * i + j];
1828 r->u32[i] = cvtuduw(t, &sat);
1831 if (sat) {
1832 env->vscr |= (1 << VSCR_SAT);
1836 #if defined(HOST_WORDS_BIGENDIAN)
1837 #define UPKHI 1
1838 #define UPKLO 0
1839 #else
1840 #define UPKHI 0
1841 #define UPKLO 1
1842 #endif
1843 #define VUPKPX(suffix, hi) \
1844 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1846 int i; \
1847 ppc_avr_t result; \
1849 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1850 uint16_t e = b->u16[hi ? i : i+4]; \
1851 uint8_t a = (e >> 15) ? 0xff : 0; \
1852 uint8_t r = (e >> 10) & 0x1f; \
1853 uint8_t g = (e >> 5) & 0x1f; \
1854 uint8_t b = e & 0x1f; \
1856 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1858 *r = result; \
1860 VUPKPX(lpx, UPKLO)
1861 VUPKPX(hpx, UPKHI)
1862 #undef VUPKPX
1864 #define VUPK(suffix, unpacked, packee, hi) \
1865 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1867 int i; \
1868 ppc_avr_t result; \
1870 if (hi) { \
1871 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1872 result.unpacked[i] = b->packee[i]; \
1874 } else { \
1875 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1876 i++) { \
1877 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1880 *r = result; \
1882 VUPK(hsb, s16, s8, UPKHI)
1883 VUPK(hsh, s32, s16, UPKHI)
1884 VUPK(hsw, s64, s32, UPKHI)
1885 VUPK(lsb, s16, s8, UPKLO)
1886 VUPK(lsh, s32, s16, UPKLO)
1887 VUPK(lsw, s64, s32, UPKLO)
1888 #undef VUPK
1889 #undef UPKHI
1890 #undef UPKLO
1892 #define VGENERIC_DO(name, element) \
1893 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1895 int i; \
1897 VECTOR_FOR_INORDER_I(i, element) { \
1898 r->element[i] = name(b->element[i]); \
1902 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1903 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1904 #define clzw(v) clz32((v))
1905 #define clzd(v) clz64((v))
1907 VGENERIC_DO(clzb, u8)
1908 VGENERIC_DO(clzh, u16)
1909 VGENERIC_DO(clzw, u32)
1910 VGENERIC_DO(clzd, u64)
1912 #undef clzb
1913 #undef clzh
1914 #undef clzw
1915 #undef clzd
1917 #define popcntb(v) ctpop8(v)
1918 #define popcnth(v) ctpop16(v)
1919 #define popcntw(v) ctpop32(v)
1920 #define popcntd(v) ctpop64(v)
1922 VGENERIC_DO(popcntb, u8)
1923 VGENERIC_DO(popcnth, u16)
1924 VGENERIC_DO(popcntw, u32)
1925 VGENERIC_DO(popcntd, u64)
1927 #undef popcntb
1928 #undef popcnth
1929 #undef popcntw
1930 #undef popcntd
1932 #undef VGENERIC_DO
1934 #if defined(HOST_WORDS_BIGENDIAN)
1935 #define QW_ONE { .u64 = { 0, 1 } }
1936 #else
1937 #define QW_ONE { .u64 = { 1, 0 } }
1938 #endif
1940 #ifndef CONFIG_INT128
1942 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1944 t->u64[0] = ~a.u64[0];
1945 t->u64[1] = ~a.u64[1];
1948 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1950 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1951 return -1;
1952 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1953 return 1;
1954 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1955 return -1;
1956 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1957 return 1;
1958 } else {
1959 return 0;
1963 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1965 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1966 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1967 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1970 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1972 ppc_avr_t not_a;
1973 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1974 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1975 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1976 avr_qw_not(&not_a, a);
1977 return avr_qw_cmpu(not_a, b) < 0;
1980 #endif
1982 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1984 #ifdef CONFIG_INT128
1985 r->u128 = a->u128 + b->u128;
1986 #else
1987 avr_qw_add(r, *a, *b);
1988 #endif
1991 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1993 #ifdef CONFIG_INT128
1994 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1995 #else
1997 if (c->u64[LO_IDX] & 1) {
1998 ppc_avr_t tmp;
2000 tmp.u64[HI_IDX] = 0;
2001 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2002 avr_qw_add(&tmp, *a, tmp);
2003 avr_qw_add(r, tmp, *b);
2004 } else {
2005 avr_qw_add(r, *a, *b);
2007 #endif
2010 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2012 #ifdef CONFIG_INT128
2013 r->u128 = (~a->u128 < b->u128);
2014 #else
2015 ppc_avr_t not_a;
2017 avr_qw_not(&not_a, *a);
2019 r->u64[HI_IDX] = 0;
2020 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2021 #endif
2024 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2026 #ifdef CONFIG_INT128
2027 int carry_out = (~a->u128 < b->u128);
2028 if (!carry_out && (c->u128 & 1)) {
2029 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2030 ((a->u128 != 0) || (b->u128 != 0));
2032 r->u128 = carry_out;
2033 #else
2035 int carry_in = c->u64[LO_IDX] & 1;
2036 int carry_out = 0;
2037 ppc_avr_t tmp;
2039 carry_out = avr_qw_addc(&tmp, *a, *b);
2041 if (!carry_out && carry_in) {
2042 ppc_avr_t one = QW_ONE;
2043 carry_out = avr_qw_addc(&tmp, tmp, one);
2045 r->u64[HI_IDX] = 0;
2046 r->u64[LO_IDX] = carry_out;
2047 #endif
2050 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2052 #ifdef CONFIG_INT128
2053 r->u128 = a->u128 - b->u128;
2054 #else
2055 ppc_avr_t tmp;
2056 ppc_avr_t one = QW_ONE;
2058 avr_qw_not(&tmp, *b);
2059 avr_qw_add(&tmp, *a, tmp);
2060 avr_qw_add(r, tmp, one);
2061 #endif
2064 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2066 #ifdef CONFIG_INT128
2067 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2068 #else
2069 ppc_avr_t tmp, sum;
2071 avr_qw_not(&tmp, *b);
2072 avr_qw_add(&sum, *a, tmp);
2074 tmp.u64[HI_IDX] = 0;
2075 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2076 avr_qw_add(r, sum, tmp);
2077 #endif
2080 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2082 #ifdef CONFIG_INT128
2083 r->u128 = (~a->u128 < ~b->u128) ||
2084 (a->u128 + ~b->u128 == (__uint128_t)-1);
2085 #else
2086 int carry = (avr_qw_cmpu(*a, *b) > 0);
2087 if (!carry) {
2088 ppc_avr_t tmp;
2089 avr_qw_not(&tmp, *b);
2090 avr_qw_add(&tmp, *a, tmp);
2091 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2093 r->u64[HI_IDX] = 0;
2094 r->u64[LO_IDX] = carry;
2095 #endif
2098 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2100 #ifdef CONFIG_INT128
2101 r->u128 =
2102 (~a->u128 < ~b->u128) ||
2103 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2104 #else
2105 int carry_in = c->u64[LO_IDX] & 1;
2106 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2107 if (!carry_out && carry_in) {
2108 ppc_avr_t tmp;
2109 avr_qw_not(&tmp, *b);
2110 avr_qw_add(&tmp, *a, tmp);
2111 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2114 r->u64[HI_IDX] = 0;
2115 r->u64[LO_IDX] = carry_out;
2116 #endif
2119 #define BCD_PLUS_PREF_1 0xC
2120 #define BCD_PLUS_PREF_2 0xF
2121 #define BCD_PLUS_ALT_1 0xA
2122 #define BCD_NEG_PREF 0xD
2123 #define BCD_NEG_ALT 0xB
2124 #define BCD_PLUS_ALT_2 0xE
2126 #if defined(HOST_WORDS_BIGENDIAN)
2127 #define BCD_DIG_BYTE(n) (15 - (n/2))
2128 #else
2129 #define BCD_DIG_BYTE(n) (n/2)
2130 #endif
2132 static int bcd_get_sgn(ppc_avr_t *bcd)
2134 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2135 case BCD_PLUS_PREF_1:
2136 case BCD_PLUS_PREF_2:
2137 case BCD_PLUS_ALT_1:
2138 case BCD_PLUS_ALT_2:
2140 return 1;
2143 case BCD_NEG_PREF:
2144 case BCD_NEG_ALT:
2146 return -1;
2149 default:
2151 return 0;
2156 static int bcd_preferred_sgn(int sgn, int ps)
2158 if (sgn >= 0) {
2159 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2160 } else {
2161 return BCD_NEG_PREF;
2165 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2167 uint8_t result;
2168 if (n & 1) {
2169 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2170 } else {
2171 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2174 if (unlikely(result > 9)) {
2175 *invalid = true;
2177 return result;
2180 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2182 if (n & 1) {
2183 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2184 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2185 } else {
2186 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2187 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2191 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2193 int i;
2194 int invalid = 0;
2195 for (i = 31; i > 0; i--) {
2196 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2197 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2198 if (unlikely(invalid)) {
2199 return 0; /* doesn't matter */
2200 } else if (dig_a > dig_b) {
2201 return 1;
2202 } else if (dig_a < dig_b) {
2203 return -1;
2207 return 0;
2210 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2211 int *overflow)
2213 int carry = 0;
2214 int i;
2215 int is_zero = 1;
2216 for (i = 1; i <= 31; i++) {
2217 uint8_t digit = bcd_get_digit(a, i, invalid) +
2218 bcd_get_digit(b, i, invalid) + carry;
2219 is_zero &= (digit == 0);
2220 if (digit > 9) {
2221 carry = 1;
2222 digit -= 10;
2223 } else {
2224 carry = 0;
2227 bcd_put_digit(t, digit, i);
2229 if (unlikely(*invalid)) {
2230 return -1;
2234 *overflow = carry;
2235 return is_zero;
2238 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2239 int *overflow)
2241 int carry = 0;
2242 int i;
2243 int is_zero = 1;
2244 for (i = 1; i <= 31; i++) {
2245 uint8_t digit = bcd_get_digit(a, i, invalid) -
2246 bcd_get_digit(b, i, invalid) + carry;
2247 is_zero &= (digit == 0);
2248 if (digit & 0x80) {
2249 carry = -1;
2250 digit += 10;
2251 } else {
2252 carry = 0;
2255 bcd_put_digit(t, digit, i);
2257 if (unlikely(*invalid)) {
2258 return -1;
2262 *overflow = carry;
2263 return is_zero;
2266 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2269 int sgna = bcd_get_sgn(a);
2270 int sgnb = bcd_get_sgn(b);
2271 int invalid = (sgna == 0) || (sgnb == 0);
2272 int overflow = 0;
2273 int zero = 0;
2274 uint32_t cr = 0;
2275 ppc_avr_t result = { .u64 = { 0, 0 } };
2277 if (!invalid) {
2278 if (sgna == sgnb) {
2279 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2280 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2281 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2282 } else if (bcd_cmp_mag(a, b) > 0) {
2283 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2284 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2285 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2286 } else {
2287 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2288 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2289 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2293 if (unlikely(invalid)) {
2294 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2295 cr = 1 << CRF_SO;
2296 } else if (overflow) {
2297 cr |= 1 << CRF_SO;
2298 } else if (zero) {
2299 cr = 1 << CRF_EQ;
2302 *r = result;
2304 return cr;
2307 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2309 ppc_avr_t bcopy = *b;
2310 int sgnb = bcd_get_sgn(b);
2311 if (sgnb < 0) {
2312 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2313 } else if (sgnb > 0) {
2314 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2316 /* else invalid ... defer to bcdadd code for proper handling */
2318 return helper_bcdadd(r, a, &bcopy, ps);
2321 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2323 int i;
2324 VECTOR_FOR_INORDER_I(i, u8) {
2325 r->u8[i] = AES_sbox[a->u8[i]];
2329 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2331 ppc_avr_t result;
2332 int i;
2334 VECTOR_FOR_INORDER_I(i, u32) {
2335 result.AVRW(i) = b->AVRW(i) ^
2336 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2337 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2338 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2339 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2341 *r = result;
2344 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2346 ppc_avr_t result;
2347 int i;
2349 VECTOR_FOR_INORDER_I(i, u8) {
2350 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2352 *r = result;
2355 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2357 /* This differs from what is written in ISA V2.07. The RTL is */
2358 /* incorrect and will be fixed in V2.07B. */
2359 int i;
2360 ppc_avr_t tmp;
2362 VECTOR_FOR_INORDER_I(i, u8) {
2363 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2366 VECTOR_FOR_INORDER_I(i, u32) {
2367 r->AVRW(i) =
2368 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2369 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2370 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2371 AES_imc[tmp.AVRB(4*i + 3)][3];
2375 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2377 ppc_avr_t result;
2378 int i;
2380 VECTOR_FOR_INORDER_I(i, u8) {
2381 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2383 *r = result;
2386 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2387 #if defined(HOST_WORDS_BIGENDIAN)
2388 #define EL_IDX(i) (i)
2389 #else
2390 #define EL_IDX(i) (3 - (i))
2391 #endif
2393 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2395 int st = (st_six & 0x10) != 0;
2396 int six = st_six & 0xF;
2397 int i;
2399 VECTOR_FOR_INORDER_I(i, u32) {
2400 if (st == 0) {
2401 if ((six & (0x8 >> i)) == 0) {
2402 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2403 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2404 (a->u32[EL_IDX(i)] >> 3);
2405 } else { /* six.bit[i] == 1 */
2406 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2407 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2408 (a->u32[EL_IDX(i)] >> 10);
2410 } else { /* st == 1 */
2411 if ((six & (0x8 >> i)) == 0) {
2412 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2413 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2414 ROTRu32(a->u32[EL_IDX(i)], 22);
2415 } else { /* six.bit[i] == 1 */
2416 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2417 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2418 ROTRu32(a->u32[EL_IDX(i)], 25);
2424 #undef ROTRu32
2425 #undef EL_IDX
2427 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2428 #if defined(HOST_WORDS_BIGENDIAN)
2429 #define EL_IDX(i) (i)
2430 #else
2431 #define EL_IDX(i) (1 - (i))
2432 #endif
2434 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2436 int st = (st_six & 0x10) != 0;
2437 int six = st_six & 0xF;
2438 int i;
2440 VECTOR_FOR_INORDER_I(i, u64) {
2441 if (st == 0) {
2442 if ((six & (0x8 >> (2*i))) == 0) {
2443 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2444 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2445 (a->u64[EL_IDX(i)] >> 7);
2446 } else { /* six.bit[2*i] == 1 */
2447 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2448 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2449 (a->u64[EL_IDX(i)] >> 6);
2451 } else { /* st == 1 */
2452 if ((six & (0x8 >> (2*i))) == 0) {
2453 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2454 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2455 ROTRu64(a->u64[EL_IDX(i)], 39);
2456 } else { /* six.bit[2*i] == 1 */
2457 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2458 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2459 ROTRu64(a->u64[EL_IDX(i)], 41);
2465 #undef ROTRu64
2466 #undef EL_IDX
2468 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2470 ppc_avr_t result;
2471 int i;
2473 VECTOR_FOR_INORDER_I(i, u8) {
2474 int indexA = c->u8[i] >> 4;
2475 int indexB = c->u8[i] & 0xF;
2476 #if defined(HOST_WORDS_BIGENDIAN)
2477 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2478 #else
2479 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2480 #endif
2482 *r = result;
2485 #undef VECTOR_FOR_INORDER_I
2486 #undef HI_IDX
2487 #undef LO_IDX
2489 /*****************************************************************************/
2490 /* SPE extension helpers */
2491 /* Use a table to make this quicker */
2492 static const uint8_t hbrev[16] = {
2493 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2494 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2497 static inline uint8_t byte_reverse(uint8_t val)
2499 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2502 static inline uint32_t word_reverse(uint32_t val)
2504 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2505 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2508 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2509 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2511 uint32_t a, b, d, mask;
2513 mask = UINT32_MAX >> (32 - MASKBITS);
2514 a = arg1 & mask;
2515 b = arg2 & mask;
2516 d = word_reverse(1 + word_reverse(a | ~b));
2517 return (arg1 & ~mask) | (d & b);
2520 uint32_t helper_cntlsw32(uint32_t val)
2522 if (val & 0x80000000) {
2523 return clz32(~val);
2524 } else {
2525 return clz32(val);
2529 uint32_t helper_cntlzw32(uint32_t val)
2531 return clz32(val);
2534 /* 440 specific */
2535 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2536 target_ulong low, uint32_t update_Rc)
2538 target_ulong mask;
2539 int i;
2541 i = 1;
2542 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2543 if ((high & mask) == 0) {
2544 if (update_Rc) {
2545 env->crf[0] = 0x4;
2547 goto done;
2549 i++;
2551 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2552 if ((low & mask) == 0) {
2553 if (update_Rc) {
2554 env->crf[0] = 0x8;
2556 goto done;
2558 i++;
2560 i = 8;
2561 if (update_Rc) {
2562 env->crf[0] = 0x2;
2564 done:
2565 env->xer = (env->xer & ~0x7F) | i;
2566 if (update_Rc) {
2567 env->crf[0] |= xer_so;
2569 return i;