virtio-net: mark VIRTIO_NET_F_GSO as legacy
[qemu.git] / target-ppc / int_helper.c
blobdca479838df4b399a6775284809bad0df8d457a3
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/exec-all.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
26 #include "helper_regs.h"
27 /*****************************************************************************/
28 /* Fixed point operations helpers */
30 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
33 uint64_t rt = 0;
34 int overflow = 0;
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
58 return (target_ulong)rt;
61 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
64 int64_t rt = 0;
65 int overflow = 0;
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
90 return (target_ulong)rt;
93 #if defined(TARGET_PPC64)
95 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 uint64_t rt = 0;
98 int overflow = 0;
100 overflow = divu128(&rt, &ra, rb);
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
114 return rt;
117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
128 if (oe) {
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
137 return rt;
140 #endif
143 target_ulong helper_cntlzw(target_ulong t)
145 return clz32(t);
148 target_ulong helper_cnttzw(target_ulong t)
150 return ctz32(t);
153 #if defined(TARGET_PPC64)
154 /* if x = 0xab, returns 0xababababababababa */
155 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
157 /* substract 1 from each byte, and with inverse, check if MSB is set at each
158 * byte.
159 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
160 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
162 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
164 /* When you XOR the pattern and there is a match, that byte will be zero */
165 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
167 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
169 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
172 #undef pattern
173 #undef haszero
174 #undef hasvalue
176 target_ulong helper_cntlzd(target_ulong t)
178 return clz64(t);
181 target_ulong helper_cnttzd(target_ulong t)
183 return ctz64(t);
186 /* Return invalid random number.
188 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
189 * random number
191 target_ulong helper_darn32(void)
193 return -1;
196 target_ulong helper_darn64(void)
198 return -1;
201 #endif
203 #if defined(TARGET_PPC64)
205 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
207 int i;
208 uint64_t ra = 0;
210 for (i = 0; i < 8; i++) {
211 int index = (rs >> (i*8)) & 0xFF;
212 if (index < 64) {
213 if (rb & (1ull << (63-index))) {
214 ra |= 1 << i;
218 return ra;
221 #endif
223 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
225 target_ulong mask = 0xff;
226 target_ulong ra = 0;
227 int i;
229 for (i = 0; i < sizeof(target_ulong); i++) {
230 if ((rs & mask) == (rb & mask)) {
231 ra |= mask;
233 mask <<= 8;
235 return ra;
238 /* shift right arithmetic helper */
239 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
240 target_ulong shift)
242 int32_t ret;
244 if (likely(!(shift & 0x20))) {
245 if (likely((uint32_t)shift != 0)) {
246 shift &= 0x1f;
247 ret = (int32_t)value >> shift;
248 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
249 env->ca = 0;
250 } else {
251 env->ca = 1;
253 } else {
254 ret = (int32_t)value;
255 env->ca = 0;
257 } else {
258 ret = (int32_t)value >> 31;
259 env->ca = (ret != 0);
261 return (target_long)ret;
264 #if defined(TARGET_PPC64)
265 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
266 target_ulong shift)
268 int64_t ret;
270 if (likely(!(shift & 0x40))) {
271 if (likely((uint64_t)shift != 0)) {
272 shift &= 0x3f;
273 ret = (int64_t)value >> shift;
274 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
275 env->ca = 0;
276 } else {
277 env->ca = 1;
279 } else {
280 ret = (int64_t)value;
281 env->ca = 0;
283 } else {
284 ret = (int64_t)value >> 63;
285 env->ca = (ret != 0);
287 return ret;
289 #endif
291 #if defined(TARGET_PPC64)
292 target_ulong helper_popcntb(target_ulong val)
294 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
295 0x5555555555555555ULL);
296 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
297 0x3333333333333333ULL);
298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
299 0x0f0f0f0f0f0f0f0fULL);
300 return val;
303 target_ulong helper_popcntw(target_ulong val)
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
318 target_ulong helper_popcntd(target_ulong val)
320 return ctpop64(val);
322 #else
323 target_ulong helper_popcntb(target_ulong val)
325 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
326 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
327 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
328 return val;
331 target_ulong helper_popcntw(target_ulong val)
333 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
334 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
335 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
336 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
337 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
338 return val;
340 #endif
342 /*****************************************************************************/
343 /* PowerPC 601 specific instructions (POWER bridge) */
344 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
346 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
348 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
349 (int32_t)arg2 == 0) {
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->spr[SPR_MQ] = tmp % arg2;
354 return tmp / (int32_t)arg2;
358 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
359 target_ulong arg2)
361 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
363 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->so = env->ov = 1;
366 env->spr[SPR_MQ] = 0;
367 return INT32_MIN;
368 } else {
369 env->spr[SPR_MQ] = tmp % arg2;
370 tmp /= (int32_t)arg2;
371 if ((int32_t)tmp != tmp) {
372 env->so = env->ov = 1;
373 } else {
374 env->ov = 0;
376 return tmp;
380 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
381 target_ulong arg2)
383 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
384 (int32_t)arg2 == 0) {
385 env->spr[SPR_MQ] = 0;
386 return INT32_MIN;
387 } else {
388 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
389 return (int32_t)arg1 / (int32_t)arg2;
393 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
394 target_ulong arg2)
396 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
397 (int32_t)arg2 == 0) {
398 env->so = env->ov = 1;
399 env->spr[SPR_MQ] = 0;
400 return INT32_MIN;
401 } else {
402 env->ov = 0;
403 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
404 return (int32_t)arg1 / (int32_t)arg2;
408 /*****************************************************************************/
409 /* 602 specific instructions */
410 /* mfrom is the most crazy instruction ever seen, imho ! */
411 /* Real implementation uses a ROM table. Do the same */
412 /* Extremely decomposed:
413 * -arg / 256
414 * return 256 * log10(10 + 1.0) + 0.5
416 #if !defined(CONFIG_USER_ONLY)
417 target_ulong helper_602_mfrom(target_ulong arg)
419 if (likely(arg < 602)) {
420 #include "mfrom_table.c"
421 return mfrom_ROM_table[arg];
422 } else {
423 return 0;
426 #endif
428 /*****************************************************************************/
429 /* Altivec extension helpers */
430 #if defined(HOST_WORDS_BIGENDIAN)
431 #define HI_IDX 0
432 #define LO_IDX 1
433 #define AVRB(i) u8[i]
434 #define AVRW(i) u32[i]
435 #else
436 #define HI_IDX 1
437 #define LO_IDX 0
438 #define AVRB(i) u8[15-(i)]
439 #define AVRW(i) u32[3-(i)]
440 #endif
442 #if defined(HOST_WORDS_BIGENDIAN)
443 #define VECTOR_FOR_INORDER_I(index, element) \
444 for (index = 0; index < ARRAY_SIZE(r->element); index++)
445 #else
446 #define VECTOR_FOR_INORDER_I(index, element) \
447 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
448 #endif
450 /* Saturating arithmetic helpers. */
451 #define SATCVT(from, to, from_type, to_type, min, max) \
452 static inline to_type cvt##from##to(from_type x, int *sat) \
454 to_type r; \
456 if (x < (from_type)min) { \
457 r = min; \
458 *sat = 1; \
459 } else if (x > (from_type)max) { \
460 r = max; \
461 *sat = 1; \
462 } else { \
463 r = x; \
465 return r; \
467 #define SATCVTU(from, to, from_type, to_type, min, max) \
468 static inline to_type cvt##from##to(from_type x, int *sat) \
470 to_type r; \
472 if (x > (from_type)max) { \
473 r = max; \
474 *sat = 1; \
475 } else { \
476 r = x; \
478 return r; \
480 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
481 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
482 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
484 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
485 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
486 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
487 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
488 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
489 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
490 #undef SATCVT
491 #undef SATCVTU
493 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
495 int i, j = (sh & 0xf);
497 VECTOR_FOR_INORDER_I(i, u8) {
498 r->u8[i] = j++;
502 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
504 int i, j = 0x10 - (sh & 0xf);
506 VECTOR_FOR_INORDER_I(i, u8) {
507 r->u8[i] = j++;
511 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
513 #if defined(HOST_WORDS_BIGENDIAN)
514 env->vscr = r->u32[3];
515 #else
516 env->vscr = r->u32[0];
517 #endif
518 set_flush_to_zero(vscr_nj, &env->vec_status);
521 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
523 int i;
525 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
526 r->u32[i] = ~a->u32[i] < b->u32[i];
530 #define VARITH_DO(name, op, element) \
531 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
533 int i; \
535 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
536 r->element[i] = a->element[i] op b->element[i]; \
539 #define VARITH(suffix, element) \
540 VARITH_DO(add##suffix, +, element) \
541 VARITH_DO(sub##suffix, -, element)
542 VARITH(ubm, u8)
543 VARITH(uhm, u16)
544 VARITH(uwm, u32)
545 VARITH(udm, u64)
546 VARITH_DO(muluwm, *, u32)
547 #undef VARITH_DO
548 #undef VARITH
550 #define VARITHFP(suffix, func) \
551 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 ppc_avr_t *b) \
554 int i; \
556 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
557 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
560 VARITHFP(addfp, float32_add)
561 VARITHFP(subfp, float32_sub)
562 VARITHFP(minfp, float32_min)
563 VARITHFP(maxfp, float32_max)
564 #undef VARITHFP
566 #define VARITHFPFMA(suffix, type) \
567 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
568 ppc_avr_t *b, ppc_avr_t *c) \
570 int i; \
571 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
572 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
573 type, &env->vec_status); \
576 VARITHFPFMA(maddfp, 0);
577 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
578 #undef VARITHFPFMA
580 #define VARITHSAT_CASE(type, op, cvt, element) \
582 type result = (type)a->element[i] op (type)b->element[i]; \
583 r->element[i] = cvt(result, &sat); \
586 #define VARITHSAT_DO(name, op, optype, cvt, element) \
587 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
588 ppc_avr_t *b) \
590 int sat = 0; \
591 int i; \
593 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
594 switch (sizeof(r->element[0])) { \
595 case 1: \
596 VARITHSAT_CASE(optype, op, cvt, element); \
597 break; \
598 case 2: \
599 VARITHSAT_CASE(optype, op, cvt, element); \
600 break; \
601 case 4: \
602 VARITHSAT_CASE(optype, op, cvt, element); \
603 break; \
606 if (sat) { \
607 env->vscr |= (1 << VSCR_SAT); \
610 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
611 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
612 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
613 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
614 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
615 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
616 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
617 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
618 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
619 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
620 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
621 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
622 #undef VARITHSAT_CASE
623 #undef VARITHSAT_DO
624 #undef VARITHSAT_SIGNED
625 #undef VARITHSAT_UNSIGNED
627 #define VAVG_DO(name, element, etype) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
630 int i; \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
634 r->element[i] = x >> 1; \
638 #define VAVG(type, signed_element, signed_type, unsigned_element, \
639 unsigned_type) \
640 VAVG_DO(avgs##type, signed_element, signed_type) \
641 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
642 VAVG(b, s8, int16_t, u8, uint16_t)
643 VAVG(h, s16, int32_t, u16, uint32_t)
644 VAVG(w, s32, int64_t, u32, uint64_t)
645 #undef VAVG_DO
646 #undef VAVG
648 #define VABSDU_DO(name, element) \
649 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
651 int i; \
653 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
654 r->element[i] = (a->element[i] > b->element[i]) ? \
655 (a->element[i] - b->element[i]) : \
656 (b->element[i] - a->element[i]); \
660 /* VABSDU - Vector absolute difference unsigned
661 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
662 * element - element type to access from vector
664 #define VABSDU(type, element) \
665 VABSDU_DO(absdu##type, element)
666 VABSDU(b, u8)
667 VABSDU(h, u16)
668 VABSDU(w, u32)
669 #undef VABSDU_DO
670 #undef VABSDU
672 #define VCF(suffix, cvt, element) \
673 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
674 ppc_avr_t *b, uint32_t uim) \
676 int i; \
678 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
679 float32 t = cvt(b->element[i], &env->vec_status); \
680 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
683 VCF(ux, uint32_to_float32, u32)
684 VCF(sx, int32_to_float32, s32)
685 #undef VCF
687 #define VCMP_DO(suffix, compare, element, record) \
688 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
689 ppc_avr_t *a, ppc_avr_t *b) \
691 uint64_t ones = (uint64_t)-1; \
692 uint64_t all = ones; \
693 uint64_t none = 0; \
694 int i; \
696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
697 uint64_t result = (a->element[i] compare b->element[i] ? \
698 ones : 0x0); \
699 switch (sizeof(a->element[0])) { \
700 case 8: \
701 r->u64[i] = result; \
702 break; \
703 case 4: \
704 r->u32[i] = result; \
705 break; \
706 case 2: \
707 r->u16[i] = result; \
708 break; \
709 case 1: \
710 r->u8[i] = result; \
711 break; \
713 all &= result; \
714 none |= result; \
716 if (record) { \
717 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
720 #define VCMP(suffix, compare, element) \
721 VCMP_DO(suffix, compare, element, 0) \
722 VCMP_DO(suffix##_dot, compare, element, 1)
723 VCMP(equb, ==, u8)
724 VCMP(equh, ==, u16)
725 VCMP(equw, ==, u32)
726 VCMP(equd, ==, u64)
727 VCMP(gtub, >, u8)
728 VCMP(gtuh, >, u16)
729 VCMP(gtuw, >, u32)
730 VCMP(gtud, >, u64)
731 VCMP(gtsb, >, s8)
732 VCMP(gtsh, >, s16)
733 VCMP(gtsw, >, s32)
734 VCMP(gtsd, >, s64)
735 #undef VCMP_DO
736 #undef VCMP
738 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
739 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
740 ppc_avr_t *a, ppc_avr_t *b) \
742 etype ones = (etype)-1; \
743 etype all = ones; \
744 etype result, none = 0; \
745 int i; \
747 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
748 if (cmpzero) { \
749 result = ((a->element[i] == 0) \
750 || (b->element[i] == 0) \
751 || (a->element[i] != b->element[i]) ? \
752 ones : 0x0); \
753 } else { \
754 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
756 r->element[i] = result; \
757 all &= result; \
758 none |= result; \
760 if (record) { \
761 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
765 /* VCMPNEZ - Vector compare not equal to zero
766 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
767 * element - element type to access from vector
769 #define VCMPNE(suffix, element, etype, cmpzero) \
770 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
771 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
772 VCMPNE(zb, u8, uint8_t, 1)
773 VCMPNE(zh, u16, uint16_t, 1)
774 VCMPNE(zw, u32, uint32_t, 1)
775 VCMPNE(b, u8, uint8_t, 0)
776 VCMPNE(h, u16, uint16_t, 0)
777 VCMPNE(w, u32, uint32_t, 0)
778 #undef VCMPNE_DO
779 #undef VCMPNE
781 #define VCMPFP_DO(suffix, compare, order, record) \
782 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
783 ppc_avr_t *a, ppc_avr_t *b) \
785 uint32_t ones = (uint32_t)-1; \
786 uint32_t all = ones; \
787 uint32_t none = 0; \
788 int i; \
790 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
791 uint32_t result; \
792 int rel = float32_compare_quiet(a->f[i], b->f[i], \
793 &env->vec_status); \
794 if (rel == float_relation_unordered) { \
795 result = 0; \
796 } else if (rel compare order) { \
797 result = ones; \
798 } else { \
799 result = 0; \
801 r->u32[i] = result; \
802 all &= result; \
803 none |= result; \
805 if (record) { \
806 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
809 #define VCMPFP(suffix, compare, order) \
810 VCMPFP_DO(suffix, compare, order, 0) \
811 VCMPFP_DO(suffix##_dot, compare, order, 1)
812 VCMPFP(eqfp, ==, float_relation_equal)
813 VCMPFP(gefp, !=, float_relation_less)
814 VCMPFP(gtfp, ==, float_relation_greater)
815 #undef VCMPFP_DO
816 #undef VCMPFP
818 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
819 ppc_avr_t *a, ppc_avr_t *b, int record)
821 int i;
822 int all_in = 0;
824 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
825 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
826 if (le_rel == float_relation_unordered) {
827 r->u32[i] = 0xc0000000;
828 all_in = 1;
829 } else {
830 float32 bneg = float32_chs(b->f[i]);
831 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
832 int le = le_rel != float_relation_greater;
833 int ge = ge_rel != float_relation_less;
835 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
836 all_in |= (!le | !ge);
839 if (record) {
840 env->crf[6] = (all_in == 0) << 1;
844 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
846 vcmpbfp_internal(env, r, a, b, 0);
849 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
850 ppc_avr_t *b)
852 vcmpbfp_internal(env, r, a, b, 1);
855 #define VCT(suffix, satcvt, element) \
856 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
857 ppc_avr_t *b, uint32_t uim) \
859 int i; \
860 int sat = 0; \
861 float_status s = env->vec_status; \
863 set_float_rounding_mode(float_round_to_zero, &s); \
864 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
865 if (float32_is_any_nan(b->f[i])) { \
866 r->element[i] = 0; \
867 } else { \
868 float64 t = float32_to_float64(b->f[i], &s); \
869 int64_t j; \
871 t = float64_scalbn(t, uim, &s); \
872 j = float64_to_int64(t, &s); \
873 r->element[i] = satcvt(j, &sat); \
876 if (sat) { \
877 env->vscr |= (1 << VSCR_SAT); \
880 VCT(uxs, cvtsduw, u32)
881 VCT(sxs, cvtsdsw, s32)
882 #undef VCT
884 target_ulong helper_vclzlsbb(ppc_avr_t *r)
886 target_ulong count = 0;
887 int i;
888 VECTOR_FOR_INORDER_I(i, u8) {
889 if (r->u8[i] & 0x01) {
890 break;
892 count++;
894 return count;
897 target_ulong helper_vctzlsbb(ppc_avr_t *r)
899 target_ulong count = 0;
900 int i;
901 #if defined(HOST_WORDS_BIGENDIAN)
902 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
903 #else
904 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
905 #endif
906 if (r->u8[i] & 0x01) {
907 break;
909 count++;
911 return count;
914 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
915 ppc_avr_t *b, ppc_avr_t *c)
917 int sat = 0;
918 int i;
920 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
921 int32_t prod = a->s16[i] * b->s16[i];
922 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
924 r->s16[i] = cvtswsh(t, &sat);
927 if (sat) {
928 env->vscr |= (1 << VSCR_SAT);
932 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
933 ppc_avr_t *b, ppc_avr_t *c)
935 int sat = 0;
936 int i;
938 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
939 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
940 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
941 r->s16[i] = cvtswsh(t, &sat);
944 if (sat) {
945 env->vscr |= (1 << VSCR_SAT);
949 #define VMINMAX_DO(name, compare, element) \
950 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
952 int i; \
954 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
955 if (a->element[i] compare b->element[i]) { \
956 r->element[i] = b->element[i]; \
957 } else { \
958 r->element[i] = a->element[i]; \
962 #define VMINMAX(suffix, element) \
963 VMINMAX_DO(min##suffix, >, element) \
964 VMINMAX_DO(max##suffix, <, element)
965 VMINMAX(sb, s8)
966 VMINMAX(sh, s16)
967 VMINMAX(sw, s32)
968 VMINMAX(sd, s64)
969 VMINMAX(ub, u8)
970 VMINMAX(uh, u16)
971 VMINMAX(uw, u32)
972 VMINMAX(ud, u64)
973 #undef VMINMAX_DO
974 #undef VMINMAX
976 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
978 int i;
980 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
981 int32_t prod = a->s16[i] * b->s16[i];
982 r->s16[i] = (int16_t) (prod + c->s16[i]);
986 #define VMRG_DO(name, element, highp) \
987 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
989 ppc_avr_t result; \
990 int i; \
991 size_t n_elems = ARRAY_SIZE(r->element); \
993 for (i = 0; i < n_elems / 2; i++) { \
994 if (highp) { \
995 result.element[i*2+HI_IDX] = a->element[i]; \
996 result.element[i*2+LO_IDX] = b->element[i]; \
997 } else { \
998 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
999 b->element[n_elems - i - 1]; \
1000 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1001 a->element[n_elems - i - 1]; \
1004 *r = result; \
1006 #if defined(HOST_WORDS_BIGENDIAN)
1007 #define MRGHI 0
1008 #define MRGLO 1
1009 #else
1010 #define MRGHI 1
1011 #define MRGLO 0
1012 #endif
1013 #define VMRG(suffix, element) \
1014 VMRG_DO(mrgl##suffix, element, MRGHI) \
1015 VMRG_DO(mrgh##suffix, element, MRGLO)
1016 VMRG(b, u8)
1017 VMRG(h, u16)
1018 VMRG(w, u32)
1019 #undef VMRG_DO
1020 #undef VMRG
1021 #undef MRGHI
1022 #undef MRGLO
1024 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1025 ppc_avr_t *b, ppc_avr_t *c)
1027 int32_t prod[16];
1028 int i;
1030 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1031 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1034 VECTOR_FOR_INORDER_I(i, s32) {
1035 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1036 prod[4 * i + 2] + prod[4 * i + 3];
1040 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1041 ppc_avr_t *b, ppc_avr_t *c)
1043 int32_t prod[8];
1044 int i;
1046 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1047 prod[i] = a->s16[i] * b->s16[i];
1050 VECTOR_FOR_INORDER_I(i, s32) {
1051 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1055 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1056 ppc_avr_t *b, ppc_avr_t *c)
1058 int32_t prod[8];
1059 int i;
1060 int sat = 0;
1062 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1063 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1066 VECTOR_FOR_INORDER_I(i, s32) {
1067 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1069 r->u32[i] = cvtsdsw(t, &sat);
1072 if (sat) {
1073 env->vscr |= (1 << VSCR_SAT);
1077 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1078 ppc_avr_t *b, ppc_avr_t *c)
1080 uint16_t prod[16];
1081 int i;
1083 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1084 prod[i] = a->u8[i] * b->u8[i];
1087 VECTOR_FOR_INORDER_I(i, u32) {
1088 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1089 prod[4 * i + 2] + prod[4 * i + 3];
1093 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1094 ppc_avr_t *b, ppc_avr_t *c)
1096 uint32_t prod[8];
1097 int i;
1099 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1100 prod[i] = a->u16[i] * b->u16[i];
1103 VECTOR_FOR_INORDER_I(i, u32) {
1104 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1108 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1109 ppc_avr_t *b, ppc_avr_t *c)
1111 uint32_t prod[8];
1112 int i;
1113 int sat = 0;
1115 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1116 prod[i] = a->u16[i] * b->u16[i];
1119 VECTOR_FOR_INORDER_I(i, s32) {
1120 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1122 r->u32[i] = cvtuduw(t, &sat);
1125 if (sat) {
1126 env->vscr |= (1 << VSCR_SAT);
1130 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1131 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1133 int i; \
1135 VECTOR_FOR_INORDER_I(i, prod_element) { \
1136 if (evenp) { \
1137 r->prod_element[i] = \
1138 (cast)a->mul_element[i * 2 + HI_IDX] * \
1139 (cast)b->mul_element[i * 2 + HI_IDX]; \
1140 } else { \
1141 r->prod_element[i] = \
1142 (cast)a->mul_element[i * 2 + LO_IDX] * \
1143 (cast)b->mul_element[i * 2 + LO_IDX]; \
1147 #define VMUL(suffix, mul_element, prod_element, cast) \
1148 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1149 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1150 VMUL(sb, s8, s16, int16_t)
1151 VMUL(sh, s16, s32, int32_t)
1152 VMUL(sw, s32, s64, int64_t)
1153 VMUL(ub, u8, u16, uint16_t)
1154 VMUL(uh, u16, u32, uint32_t)
1155 VMUL(uw, u32, u64, uint64_t)
1156 #undef VMUL_DO
1157 #undef VMUL
1159 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1160 ppc_avr_t *c)
1162 ppc_avr_t result;
1163 int i;
1165 VECTOR_FOR_INORDER_I(i, u8) {
1166 int s = c->u8[i] & 0x1f;
1167 #if defined(HOST_WORDS_BIGENDIAN)
1168 int index = s & 0xf;
1169 #else
1170 int index = 15 - (s & 0xf);
1171 #endif
1173 if (s & 0x10) {
1174 result.u8[i] = b->u8[index];
1175 } else {
1176 result.u8[i] = a->u8[index];
1179 *r = result;
1182 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1183 ppc_avr_t *c)
1185 ppc_avr_t result;
1186 int i;
1188 VECTOR_FOR_INORDER_I(i, u8) {
1189 int s = c->u8[i] & 0x1f;
1190 #if defined(HOST_WORDS_BIGENDIAN)
1191 int index = 15 - (s & 0xf);
1192 #else
1193 int index = s & 0xf;
1194 #endif
1196 if (s & 0x10) {
1197 result.u8[i] = a->u8[index];
1198 } else {
1199 result.u8[i] = b->u8[index];
1202 *r = result;
1205 #if defined(HOST_WORDS_BIGENDIAN)
1206 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1207 #define VBPERMD_INDEX(i) (i)
1208 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1209 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1210 #else
1211 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1212 #define VBPERMD_INDEX(i) (1 - i)
1213 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1214 #define EXTRACT_BIT(avr, i, index) \
1215 (extract64((avr)->u64[1 - i], 63 - index, 1))
1216 #endif
1218 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1220 int i, j;
1221 ppc_avr_t result = { .u64 = { 0, 0 } };
1222 VECTOR_FOR_INORDER_I(i, u64) {
1223 for (j = 0; j < 8; j++) {
1224 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1225 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1226 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1230 *r = result;
1233 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1235 int i;
1236 uint64_t perm = 0;
1238 VECTOR_FOR_INORDER_I(i, u8) {
1239 int index = VBPERMQ_INDEX(b, i);
1241 if (index < 128) {
1242 uint64_t mask = (1ull << (63-(index & 0x3F)));
1243 if (a->u64[VBPERMQ_DW(index)] & mask) {
1244 perm |= (0x8000 >> i);
1249 r->u64[HI_IDX] = perm;
1250 r->u64[LO_IDX] = 0;
1253 #undef VBPERMQ_INDEX
1254 #undef VBPERMQ_DW
1256 static const uint64_t VGBBD_MASKS[256] = {
1257 0x0000000000000000ull, /* 00 */
1258 0x0000000000000080ull, /* 01 */
1259 0x0000000000008000ull, /* 02 */
1260 0x0000000000008080ull, /* 03 */
1261 0x0000000000800000ull, /* 04 */
1262 0x0000000000800080ull, /* 05 */
1263 0x0000000000808000ull, /* 06 */
1264 0x0000000000808080ull, /* 07 */
1265 0x0000000080000000ull, /* 08 */
1266 0x0000000080000080ull, /* 09 */
1267 0x0000000080008000ull, /* 0A */
1268 0x0000000080008080ull, /* 0B */
1269 0x0000000080800000ull, /* 0C */
1270 0x0000000080800080ull, /* 0D */
1271 0x0000000080808000ull, /* 0E */
1272 0x0000000080808080ull, /* 0F */
1273 0x0000008000000000ull, /* 10 */
1274 0x0000008000000080ull, /* 11 */
1275 0x0000008000008000ull, /* 12 */
1276 0x0000008000008080ull, /* 13 */
1277 0x0000008000800000ull, /* 14 */
1278 0x0000008000800080ull, /* 15 */
1279 0x0000008000808000ull, /* 16 */
1280 0x0000008000808080ull, /* 17 */
1281 0x0000008080000000ull, /* 18 */
1282 0x0000008080000080ull, /* 19 */
1283 0x0000008080008000ull, /* 1A */
1284 0x0000008080008080ull, /* 1B */
1285 0x0000008080800000ull, /* 1C */
1286 0x0000008080800080ull, /* 1D */
1287 0x0000008080808000ull, /* 1E */
1288 0x0000008080808080ull, /* 1F */
1289 0x0000800000000000ull, /* 20 */
1290 0x0000800000000080ull, /* 21 */
1291 0x0000800000008000ull, /* 22 */
1292 0x0000800000008080ull, /* 23 */
1293 0x0000800000800000ull, /* 24 */
1294 0x0000800000800080ull, /* 25 */
1295 0x0000800000808000ull, /* 26 */
1296 0x0000800000808080ull, /* 27 */
1297 0x0000800080000000ull, /* 28 */
1298 0x0000800080000080ull, /* 29 */
1299 0x0000800080008000ull, /* 2A */
1300 0x0000800080008080ull, /* 2B */
1301 0x0000800080800000ull, /* 2C */
1302 0x0000800080800080ull, /* 2D */
1303 0x0000800080808000ull, /* 2E */
1304 0x0000800080808080ull, /* 2F */
1305 0x0000808000000000ull, /* 30 */
1306 0x0000808000000080ull, /* 31 */
1307 0x0000808000008000ull, /* 32 */
1308 0x0000808000008080ull, /* 33 */
1309 0x0000808000800000ull, /* 34 */
1310 0x0000808000800080ull, /* 35 */
1311 0x0000808000808000ull, /* 36 */
1312 0x0000808000808080ull, /* 37 */
1313 0x0000808080000000ull, /* 38 */
1314 0x0000808080000080ull, /* 39 */
1315 0x0000808080008000ull, /* 3A */
1316 0x0000808080008080ull, /* 3B */
1317 0x0000808080800000ull, /* 3C */
1318 0x0000808080800080ull, /* 3D */
1319 0x0000808080808000ull, /* 3E */
1320 0x0000808080808080ull, /* 3F */
1321 0x0080000000000000ull, /* 40 */
1322 0x0080000000000080ull, /* 41 */
1323 0x0080000000008000ull, /* 42 */
1324 0x0080000000008080ull, /* 43 */
1325 0x0080000000800000ull, /* 44 */
1326 0x0080000000800080ull, /* 45 */
1327 0x0080000000808000ull, /* 46 */
1328 0x0080000000808080ull, /* 47 */
1329 0x0080000080000000ull, /* 48 */
1330 0x0080000080000080ull, /* 49 */
1331 0x0080000080008000ull, /* 4A */
1332 0x0080000080008080ull, /* 4B */
1333 0x0080000080800000ull, /* 4C */
1334 0x0080000080800080ull, /* 4D */
1335 0x0080000080808000ull, /* 4E */
1336 0x0080000080808080ull, /* 4F */
1337 0x0080008000000000ull, /* 50 */
1338 0x0080008000000080ull, /* 51 */
1339 0x0080008000008000ull, /* 52 */
1340 0x0080008000008080ull, /* 53 */
1341 0x0080008000800000ull, /* 54 */
1342 0x0080008000800080ull, /* 55 */
1343 0x0080008000808000ull, /* 56 */
1344 0x0080008000808080ull, /* 57 */
1345 0x0080008080000000ull, /* 58 */
1346 0x0080008080000080ull, /* 59 */
1347 0x0080008080008000ull, /* 5A */
1348 0x0080008080008080ull, /* 5B */
1349 0x0080008080800000ull, /* 5C */
1350 0x0080008080800080ull, /* 5D */
1351 0x0080008080808000ull, /* 5E */
1352 0x0080008080808080ull, /* 5F */
1353 0x0080800000000000ull, /* 60 */
1354 0x0080800000000080ull, /* 61 */
1355 0x0080800000008000ull, /* 62 */
1356 0x0080800000008080ull, /* 63 */
1357 0x0080800000800000ull, /* 64 */
1358 0x0080800000800080ull, /* 65 */
1359 0x0080800000808000ull, /* 66 */
1360 0x0080800000808080ull, /* 67 */
1361 0x0080800080000000ull, /* 68 */
1362 0x0080800080000080ull, /* 69 */
1363 0x0080800080008000ull, /* 6A */
1364 0x0080800080008080ull, /* 6B */
1365 0x0080800080800000ull, /* 6C */
1366 0x0080800080800080ull, /* 6D */
1367 0x0080800080808000ull, /* 6E */
1368 0x0080800080808080ull, /* 6F */
1369 0x0080808000000000ull, /* 70 */
1370 0x0080808000000080ull, /* 71 */
1371 0x0080808000008000ull, /* 72 */
1372 0x0080808000008080ull, /* 73 */
1373 0x0080808000800000ull, /* 74 */
1374 0x0080808000800080ull, /* 75 */
1375 0x0080808000808000ull, /* 76 */
1376 0x0080808000808080ull, /* 77 */
1377 0x0080808080000000ull, /* 78 */
1378 0x0080808080000080ull, /* 79 */
1379 0x0080808080008000ull, /* 7A */
1380 0x0080808080008080ull, /* 7B */
1381 0x0080808080800000ull, /* 7C */
1382 0x0080808080800080ull, /* 7D */
1383 0x0080808080808000ull, /* 7E */
1384 0x0080808080808080ull, /* 7F */
1385 0x8000000000000000ull, /* 80 */
1386 0x8000000000000080ull, /* 81 */
1387 0x8000000000008000ull, /* 82 */
1388 0x8000000000008080ull, /* 83 */
1389 0x8000000000800000ull, /* 84 */
1390 0x8000000000800080ull, /* 85 */
1391 0x8000000000808000ull, /* 86 */
1392 0x8000000000808080ull, /* 87 */
1393 0x8000000080000000ull, /* 88 */
1394 0x8000000080000080ull, /* 89 */
1395 0x8000000080008000ull, /* 8A */
1396 0x8000000080008080ull, /* 8B */
1397 0x8000000080800000ull, /* 8C */
1398 0x8000000080800080ull, /* 8D */
1399 0x8000000080808000ull, /* 8E */
1400 0x8000000080808080ull, /* 8F */
1401 0x8000008000000000ull, /* 90 */
1402 0x8000008000000080ull, /* 91 */
1403 0x8000008000008000ull, /* 92 */
1404 0x8000008000008080ull, /* 93 */
1405 0x8000008000800000ull, /* 94 */
1406 0x8000008000800080ull, /* 95 */
1407 0x8000008000808000ull, /* 96 */
1408 0x8000008000808080ull, /* 97 */
1409 0x8000008080000000ull, /* 98 */
1410 0x8000008080000080ull, /* 99 */
1411 0x8000008080008000ull, /* 9A */
1412 0x8000008080008080ull, /* 9B */
1413 0x8000008080800000ull, /* 9C */
1414 0x8000008080800080ull, /* 9D */
1415 0x8000008080808000ull, /* 9E */
1416 0x8000008080808080ull, /* 9F */
1417 0x8000800000000000ull, /* A0 */
1418 0x8000800000000080ull, /* A1 */
1419 0x8000800000008000ull, /* A2 */
1420 0x8000800000008080ull, /* A3 */
1421 0x8000800000800000ull, /* A4 */
1422 0x8000800000800080ull, /* A5 */
1423 0x8000800000808000ull, /* A6 */
1424 0x8000800000808080ull, /* A7 */
1425 0x8000800080000000ull, /* A8 */
1426 0x8000800080000080ull, /* A9 */
1427 0x8000800080008000ull, /* AA */
1428 0x8000800080008080ull, /* AB */
1429 0x8000800080800000ull, /* AC */
1430 0x8000800080800080ull, /* AD */
1431 0x8000800080808000ull, /* AE */
1432 0x8000800080808080ull, /* AF */
1433 0x8000808000000000ull, /* B0 */
1434 0x8000808000000080ull, /* B1 */
1435 0x8000808000008000ull, /* B2 */
1436 0x8000808000008080ull, /* B3 */
1437 0x8000808000800000ull, /* B4 */
1438 0x8000808000800080ull, /* B5 */
1439 0x8000808000808000ull, /* B6 */
1440 0x8000808000808080ull, /* B7 */
1441 0x8000808080000000ull, /* B8 */
1442 0x8000808080000080ull, /* B9 */
1443 0x8000808080008000ull, /* BA */
1444 0x8000808080008080ull, /* BB */
1445 0x8000808080800000ull, /* BC */
1446 0x8000808080800080ull, /* BD */
1447 0x8000808080808000ull, /* BE */
1448 0x8000808080808080ull, /* BF */
1449 0x8080000000000000ull, /* C0 */
1450 0x8080000000000080ull, /* C1 */
1451 0x8080000000008000ull, /* C2 */
1452 0x8080000000008080ull, /* C3 */
1453 0x8080000000800000ull, /* C4 */
1454 0x8080000000800080ull, /* C5 */
1455 0x8080000000808000ull, /* C6 */
1456 0x8080000000808080ull, /* C7 */
1457 0x8080000080000000ull, /* C8 */
1458 0x8080000080000080ull, /* C9 */
1459 0x8080000080008000ull, /* CA */
1460 0x8080000080008080ull, /* CB */
1461 0x8080000080800000ull, /* CC */
1462 0x8080000080800080ull, /* CD */
1463 0x8080000080808000ull, /* CE */
1464 0x8080000080808080ull, /* CF */
1465 0x8080008000000000ull, /* D0 */
1466 0x8080008000000080ull, /* D1 */
1467 0x8080008000008000ull, /* D2 */
1468 0x8080008000008080ull, /* D3 */
1469 0x8080008000800000ull, /* D4 */
1470 0x8080008000800080ull, /* D5 */
1471 0x8080008000808000ull, /* D6 */
1472 0x8080008000808080ull, /* D7 */
1473 0x8080008080000000ull, /* D8 */
1474 0x8080008080000080ull, /* D9 */
1475 0x8080008080008000ull, /* DA */
1476 0x8080008080008080ull, /* DB */
1477 0x8080008080800000ull, /* DC */
1478 0x8080008080800080ull, /* DD */
1479 0x8080008080808000ull, /* DE */
1480 0x8080008080808080ull, /* DF */
1481 0x8080800000000000ull, /* E0 */
1482 0x8080800000000080ull, /* E1 */
1483 0x8080800000008000ull, /* E2 */
1484 0x8080800000008080ull, /* E3 */
1485 0x8080800000800000ull, /* E4 */
1486 0x8080800000800080ull, /* E5 */
1487 0x8080800000808000ull, /* E6 */
1488 0x8080800000808080ull, /* E7 */
1489 0x8080800080000000ull, /* E8 */
1490 0x8080800080000080ull, /* E9 */
1491 0x8080800080008000ull, /* EA */
1492 0x8080800080008080ull, /* EB */
1493 0x8080800080800000ull, /* EC */
1494 0x8080800080800080ull, /* ED */
1495 0x8080800080808000ull, /* EE */
1496 0x8080800080808080ull, /* EF */
1497 0x8080808000000000ull, /* F0 */
1498 0x8080808000000080ull, /* F1 */
1499 0x8080808000008000ull, /* F2 */
1500 0x8080808000008080ull, /* F3 */
1501 0x8080808000800000ull, /* F4 */
1502 0x8080808000800080ull, /* F5 */
1503 0x8080808000808000ull, /* F6 */
1504 0x8080808000808080ull, /* F7 */
1505 0x8080808080000000ull, /* F8 */
1506 0x8080808080000080ull, /* F9 */
1507 0x8080808080008000ull, /* FA */
1508 0x8080808080008080ull, /* FB */
1509 0x8080808080800000ull, /* FC */
1510 0x8080808080800080ull, /* FD */
1511 0x8080808080808000ull, /* FE */
1512 0x8080808080808080ull, /* FF */
1515 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1517 int i;
1518 uint64_t t[2] = { 0, 0 };
1520 VECTOR_FOR_INORDER_I(i, u8) {
1521 #if defined(HOST_WORDS_BIGENDIAN)
1522 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1523 #else
1524 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1525 #endif
1528 r->u64[0] = t[0];
1529 r->u64[1] = t[1];
1532 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1533 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1535 int i, j; \
1536 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1538 VECTOR_FOR_INORDER_I(i, srcfld) { \
1539 prod[i] = 0; \
1540 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1541 if (a->srcfld[i] & (1ull<<j)) { \
1542 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1547 VECTOR_FOR_INORDER_I(i, trgfld) { \
1548 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1552 PMSUM(vpmsumb, u8, u16, uint16_t)
1553 PMSUM(vpmsumh, u16, u32, uint32_t)
1554 PMSUM(vpmsumw, u32, u64, uint64_t)
1556 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1559 #ifdef CONFIG_INT128
1560 int i, j;
1561 __uint128_t prod[2];
1563 VECTOR_FOR_INORDER_I(i, u64) {
1564 prod[i] = 0;
1565 for (j = 0; j < 64; j++) {
1566 if (a->u64[i] & (1ull<<j)) {
1567 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1572 r->u128 = prod[0] ^ prod[1];
1574 #else
1575 int i, j;
1576 ppc_avr_t prod[2];
1578 VECTOR_FOR_INORDER_I(i, u64) {
1579 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1580 for (j = 0; j < 64; j++) {
1581 if (a->u64[i] & (1ull<<j)) {
1582 ppc_avr_t bshift;
1583 if (j == 0) {
1584 bshift.u64[HI_IDX] = 0;
1585 bshift.u64[LO_IDX] = b->u64[i];
1586 } else {
1587 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1588 bshift.u64[LO_IDX] = b->u64[i] << j;
1590 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1591 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1596 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1597 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1598 #endif
1602 #if defined(HOST_WORDS_BIGENDIAN)
1603 #define PKBIG 1
1604 #else
1605 #define PKBIG 0
1606 #endif
1607 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1609 int i, j;
1610 ppc_avr_t result;
1611 #if defined(HOST_WORDS_BIGENDIAN)
1612 const ppc_avr_t *x[2] = { a, b };
1613 #else
1614 const ppc_avr_t *x[2] = { b, a };
1615 #endif
1617 VECTOR_FOR_INORDER_I(i, u64) {
1618 VECTOR_FOR_INORDER_I(j, u32) {
1619 uint32_t e = x[i]->u32[j];
1621 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1622 ((e >> 6) & 0x3e0) |
1623 ((e >> 3) & 0x1f));
1626 *r = result;
1629 #define VPK(suffix, from, to, cvt, dosat) \
1630 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1631 ppc_avr_t *a, ppc_avr_t *b) \
1633 int i; \
1634 int sat = 0; \
1635 ppc_avr_t result; \
1636 ppc_avr_t *a0 = PKBIG ? a : b; \
1637 ppc_avr_t *a1 = PKBIG ? b : a; \
1639 VECTOR_FOR_INORDER_I(i, from) { \
1640 result.to[i] = cvt(a0->from[i], &sat); \
1641 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1643 *r = result; \
1644 if (dosat && sat) { \
1645 env->vscr |= (1 << VSCR_SAT); \
1648 #define I(x, y) (x)
1649 VPK(shss, s16, s8, cvtshsb, 1)
1650 VPK(shus, s16, u8, cvtshub, 1)
1651 VPK(swss, s32, s16, cvtswsh, 1)
1652 VPK(swus, s32, u16, cvtswuh, 1)
1653 VPK(sdss, s64, s32, cvtsdsw, 1)
1654 VPK(sdus, s64, u32, cvtsduw, 1)
1655 VPK(uhus, u16, u8, cvtuhub, 1)
1656 VPK(uwus, u32, u16, cvtuwuh, 1)
1657 VPK(udus, u64, u32, cvtuduw, 1)
1658 VPK(uhum, u16, u8, I, 0)
1659 VPK(uwum, u32, u16, I, 0)
1660 VPK(udum, u64, u32, I, 0)
1661 #undef I
1662 #undef VPK
1663 #undef PKBIG
1665 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1667 int i;
1669 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1670 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1674 #define VRFI(suffix, rounding) \
1675 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1676 ppc_avr_t *b) \
1678 int i; \
1679 float_status s = env->vec_status; \
1681 set_float_rounding_mode(rounding, &s); \
1682 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1683 r->f[i] = float32_round_to_int (b->f[i], &s); \
1686 VRFI(n, float_round_nearest_even)
1687 VRFI(m, float_round_down)
1688 VRFI(p, float_round_up)
1689 VRFI(z, float_round_to_zero)
1690 #undef VRFI
1692 #define VROTATE(suffix, element, mask) \
1693 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1695 int i; \
1697 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1698 unsigned int shift = b->element[i] & mask; \
1699 r->element[i] = (a->element[i] << shift) | \
1700 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1703 VROTATE(b, u8, 0x7)
1704 VROTATE(h, u16, 0xF)
1705 VROTATE(w, u32, 0x1F)
1706 VROTATE(d, u64, 0x3F)
1707 #undef VROTATE
1709 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1711 int i;
1713 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1714 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1716 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1720 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1721 ppc_avr_t *c)
1723 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1724 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1727 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1729 int i;
1731 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1732 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1736 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1738 int i;
1740 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1741 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1745 /* The specification says that the results are undefined if all of the
1746 * shift counts are not identical. We check to make sure that they are
1747 * to conform to what real hardware appears to do. */
1748 #define VSHIFT(suffix, leftp) \
1749 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1751 int shift = b->u8[LO_IDX*15] & 0x7; \
1752 int doit = 1; \
1753 int i; \
1755 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1756 doit = doit && ((b->u8[i] & 0x7) == shift); \
1758 if (doit) { \
1759 if (shift == 0) { \
1760 *r = *a; \
1761 } else if (leftp) { \
1762 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1764 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1765 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1766 } else { \
1767 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1769 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1770 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1774 VSHIFT(l, 1)
1775 VSHIFT(r, 0)
1776 #undef VSHIFT
1778 #define VSL(suffix, element, mask) \
1779 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1781 int i; \
1783 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1784 unsigned int shift = b->element[i] & mask; \
1786 r->element[i] = a->element[i] << shift; \
1789 VSL(b, u8, 0x7)
1790 VSL(h, u16, 0x0F)
1791 VSL(w, u32, 0x1F)
1792 VSL(d, u64, 0x3F)
1793 #undef VSL
1795 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1797 int i;
1798 unsigned int shift, bytes, size;
1800 size = ARRAY_SIZE(r->u8);
1801 for (i = 0; i < size; i++) {
1802 shift = b->u8[i] & 0x7; /* extract shift value */
1803 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1804 (((i + 1) < size) ? a->u8[i + 1] : 0);
1805 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1809 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1811 int i;
1812 unsigned int shift, bytes;
1814 /* Use reverse order, as destination and source register can be same. Its
1815 * being modified in place saving temporary, reverse order will guarantee
1816 * that computed result is not fed back.
1818 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1819 shift = b->u8[i] & 0x7; /* extract shift value */
1820 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1821 /* extract adjacent bytes */
1822 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1826 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1828 int sh = shift & 0xf;
1829 int i;
1830 ppc_avr_t result;
1832 #if defined(HOST_WORDS_BIGENDIAN)
1833 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1834 int index = sh + i;
1835 if (index > 0xf) {
1836 result.u8[i] = b->u8[index - 0x10];
1837 } else {
1838 result.u8[i] = a->u8[index];
1841 #else
1842 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1843 int index = (16 - sh) + i;
1844 if (index > 0xf) {
1845 result.u8[i] = a->u8[index - 0x10];
1846 } else {
1847 result.u8[i] = b->u8[index];
1850 #endif
1851 *r = result;
1854 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1856 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1858 #if defined(HOST_WORDS_BIGENDIAN)
1859 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1860 memset(&r->u8[16-sh], 0, sh);
1861 #else
1862 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1863 memset(&r->u8[0], 0, sh);
1864 #endif
1867 /* Experimental testing shows that hardware masks the immediate. */
1868 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1869 #if defined(HOST_WORDS_BIGENDIAN)
1870 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1871 #else
1872 #define SPLAT_ELEMENT(element) \
1873 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1874 #endif
1875 #define VSPLT(suffix, element) \
1876 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1878 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1879 int i; \
1881 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1882 r->element[i] = s; \
1885 VSPLT(b, u8)
1886 VSPLT(h, u16)
1887 VSPLT(w, u32)
1888 #undef VSPLT
1889 #undef SPLAT_ELEMENT
1890 #undef _SPLAT_MASKED
1891 #if defined(HOST_WORDS_BIGENDIAN)
1892 #define VINSERT(suffix, element) \
1893 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1895 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1896 sizeof(r->element[0])); \
1898 #else
1899 #define VINSERT(suffix, element) \
1900 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1902 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1903 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1905 #endif
1906 VINSERT(b, u8)
1907 VINSERT(h, u16)
1908 VINSERT(w, u32)
1909 VINSERT(d, u64)
1910 #undef VINSERT
1911 #if defined(HOST_WORDS_BIGENDIAN)
1912 #define VEXTRACT(suffix, element) \
1913 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1915 uint32_t es = sizeof(r->element[0]); \
1916 memmove(&r->u8[8 - es], &b->u8[index], es); \
1917 memset(&r->u8[8], 0, 8); \
1918 memset(&r->u8[0], 0, 8 - es); \
1920 #else
1921 #define VEXTRACT(suffix, element) \
1922 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1924 uint32_t es = sizeof(r->element[0]); \
1925 uint32_t s = (16 - index) - es; \
1926 memmove(&r->u8[8], &b->u8[s], es); \
1927 memset(&r->u8[0], 0, 8); \
1928 memset(&r->u8[8 + es], 0, 8 - es); \
1930 #endif
1931 VEXTRACT(ub, u8)
1932 VEXTRACT(uh, u16)
1933 VEXTRACT(uw, u32)
1934 VEXTRACT(d, u64)
1935 #undef VEXTRACT
1937 #define VEXT_SIGNED(name, element, mask, cast, recast) \
1938 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1940 int i; \
1941 VECTOR_FOR_INORDER_I(i, element) { \
1942 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
1945 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
1946 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
1947 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
1948 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
1949 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
1950 #undef VEXT_SIGNED
1952 #define VNEG(name, element) \
1953 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1955 int i; \
1956 VECTOR_FOR_INORDER_I(i, element) { \
1957 r->element[i] = -b->element[i]; \
1960 VNEG(vnegw, s32)
1961 VNEG(vnegd, s64)
1962 #undef VNEG
1964 #define VSPLTI(suffix, element, splat_type) \
1965 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1967 splat_type x = (int8_t)(splat << 3) >> 3; \
1968 int i; \
1970 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1971 r->element[i] = x; \
1974 VSPLTI(b, s8, int8_t)
1975 VSPLTI(h, s16, int16_t)
1976 VSPLTI(w, s32, int32_t)
1977 #undef VSPLTI
1979 #define VSR(suffix, element, mask) \
1980 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1982 int i; \
1984 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1985 unsigned int shift = b->element[i] & mask; \
1986 r->element[i] = a->element[i] >> shift; \
1989 VSR(ab, s8, 0x7)
1990 VSR(ah, s16, 0xF)
1991 VSR(aw, s32, 0x1F)
1992 VSR(ad, s64, 0x3F)
1993 VSR(b, u8, 0x7)
1994 VSR(h, u16, 0xF)
1995 VSR(w, u32, 0x1F)
1996 VSR(d, u64, 0x3F)
1997 #undef VSR
1999 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2001 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2003 #if defined(HOST_WORDS_BIGENDIAN)
2004 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2005 memset(&r->u8[0], 0, sh);
2006 #else
2007 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2008 memset(&r->u8[16 - sh], 0, sh);
2009 #endif
2012 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2014 int i;
2016 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2017 r->u32[i] = a->u32[i] >= b->u32[i];
2021 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2023 int64_t t;
2024 int i, upper;
2025 ppc_avr_t result;
2026 int sat = 0;
2028 #if defined(HOST_WORDS_BIGENDIAN)
2029 upper = ARRAY_SIZE(r->s32)-1;
2030 #else
2031 upper = 0;
2032 #endif
2033 t = (int64_t)b->s32[upper];
2034 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2035 t += a->s32[i];
2036 result.s32[i] = 0;
2038 result.s32[upper] = cvtsdsw(t, &sat);
2039 *r = result;
2041 if (sat) {
2042 env->vscr |= (1 << VSCR_SAT);
2046 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2048 int i, j, upper;
2049 ppc_avr_t result;
2050 int sat = 0;
2052 #if defined(HOST_WORDS_BIGENDIAN)
2053 upper = 1;
2054 #else
2055 upper = 0;
2056 #endif
2057 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2058 int64_t t = (int64_t)b->s32[upper + i * 2];
2060 result.u64[i] = 0;
2061 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2062 t += a->s32[2 * i + j];
2064 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2067 *r = result;
2068 if (sat) {
2069 env->vscr |= (1 << VSCR_SAT);
2073 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2075 int i, j;
2076 int sat = 0;
2078 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2079 int64_t t = (int64_t)b->s32[i];
2081 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2082 t += a->s8[4 * i + j];
2084 r->s32[i] = cvtsdsw(t, &sat);
2087 if (sat) {
2088 env->vscr |= (1 << VSCR_SAT);
2092 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2094 int sat = 0;
2095 int i;
2097 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2098 int64_t t = (int64_t)b->s32[i];
2100 t += a->s16[2 * i] + a->s16[2 * i + 1];
2101 r->s32[i] = cvtsdsw(t, &sat);
2104 if (sat) {
2105 env->vscr |= (1 << VSCR_SAT);
2109 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2111 int i, j;
2112 int sat = 0;
2114 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2115 uint64_t t = (uint64_t)b->u32[i];
2117 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2118 t += a->u8[4 * i + j];
2120 r->u32[i] = cvtuduw(t, &sat);
2123 if (sat) {
2124 env->vscr |= (1 << VSCR_SAT);
2128 #if defined(HOST_WORDS_BIGENDIAN)
2129 #define UPKHI 1
2130 #define UPKLO 0
2131 #else
2132 #define UPKHI 0
2133 #define UPKLO 1
2134 #endif
2135 #define VUPKPX(suffix, hi) \
2136 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2138 int i; \
2139 ppc_avr_t result; \
2141 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2142 uint16_t e = b->u16[hi ? i : i+4]; \
2143 uint8_t a = (e >> 15) ? 0xff : 0; \
2144 uint8_t r = (e >> 10) & 0x1f; \
2145 uint8_t g = (e >> 5) & 0x1f; \
2146 uint8_t b = e & 0x1f; \
2148 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2150 *r = result; \
2152 VUPKPX(lpx, UPKLO)
2153 VUPKPX(hpx, UPKHI)
2154 #undef VUPKPX
2156 #define VUPK(suffix, unpacked, packee, hi) \
2157 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2159 int i; \
2160 ppc_avr_t result; \
2162 if (hi) { \
2163 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2164 result.unpacked[i] = b->packee[i]; \
2166 } else { \
2167 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2168 i++) { \
2169 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2172 *r = result; \
2174 VUPK(hsb, s16, s8, UPKHI)
2175 VUPK(hsh, s32, s16, UPKHI)
2176 VUPK(hsw, s64, s32, UPKHI)
2177 VUPK(lsb, s16, s8, UPKLO)
2178 VUPK(lsh, s32, s16, UPKLO)
2179 VUPK(lsw, s64, s32, UPKLO)
2180 #undef VUPK
2181 #undef UPKHI
2182 #undef UPKLO
2184 #define VGENERIC_DO(name, element) \
2185 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2187 int i; \
2189 VECTOR_FOR_INORDER_I(i, element) { \
2190 r->element[i] = name(b->element[i]); \
2194 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2195 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2196 #define clzw(v) clz32((v))
2197 #define clzd(v) clz64((v))
2199 VGENERIC_DO(clzb, u8)
2200 VGENERIC_DO(clzh, u16)
2201 VGENERIC_DO(clzw, u32)
2202 VGENERIC_DO(clzd, u64)
2204 #undef clzb
2205 #undef clzh
2206 #undef clzw
2207 #undef clzd
2209 #define ctzb(v) ((v) ? ctz32(v) : 8)
2210 #define ctzh(v) ((v) ? ctz32(v) : 16)
2211 #define ctzw(v) ctz32((v))
2212 #define ctzd(v) ctz64((v))
2214 VGENERIC_DO(ctzb, u8)
2215 VGENERIC_DO(ctzh, u16)
2216 VGENERIC_DO(ctzw, u32)
2217 VGENERIC_DO(ctzd, u64)
2219 #undef ctzb
2220 #undef ctzh
2221 #undef ctzw
2222 #undef ctzd
2224 #define popcntb(v) ctpop8(v)
2225 #define popcnth(v) ctpop16(v)
2226 #define popcntw(v) ctpop32(v)
2227 #define popcntd(v) ctpop64(v)
2229 VGENERIC_DO(popcntb, u8)
2230 VGENERIC_DO(popcnth, u16)
2231 VGENERIC_DO(popcntw, u32)
2232 VGENERIC_DO(popcntd, u64)
2234 #undef popcntb
2235 #undef popcnth
2236 #undef popcntw
2237 #undef popcntd
2239 #undef VGENERIC_DO
2241 #if defined(HOST_WORDS_BIGENDIAN)
2242 #define QW_ONE { .u64 = { 0, 1 } }
2243 #else
2244 #define QW_ONE { .u64 = { 1, 0 } }
2245 #endif
2247 #ifndef CONFIG_INT128
2249 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2251 t->u64[0] = ~a.u64[0];
2252 t->u64[1] = ~a.u64[1];
2255 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2257 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2258 return -1;
2259 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2260 return 1;
2261 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2262 return -1;
2263 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2264 return 1;
2265 } else {
2266 return 0;
2270 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2272 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2273 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2274 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2277 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2279 ppc_avr_t not_a;
2280 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2281 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2282 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2283 avr_qw_not(&not_a, a);
2284 return avr_qw_cmpu(not_a, b) < 0;
2287 #endif
2289 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2291 #ifdef CONFIG_INT128
2292 r->u128 = a->u128 + b->u128;
2293 #else
2294 avr_qw_add(r, *a, *b);
2295 #endif
2298 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2300 #ifdef CONFIG_INT128
2301 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2302 #else
2304 if (c->u64[LO_IDX] & 1) {
2305 ppc_avr_t tmp;
2307 tmp.u64[HI_IDX] = 0;
2308 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2309 avr_qw_add(&tmp, *a, tmp);
2310 avr_qw_add(r, tmp, *b);
2311 } else {
2312 avr_qw_add(r, *a, *b);
2314 #endif
2317 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2319 #ifdef CONFIG_INT128
2320 r->u128 = (~a->u128 < b->u128);
2321 #else
2322 ppc_avr_t not_a;
2324 avr_qw_not(&not_a, *a);
2326 r->u64[HI_IDX] = 0;
2327 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2328 #endif
2331 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2333 #ifdef CONFIG_INT128
2334 int carry_out = (~a->u128 < b->u128);
2335 if (!carry_out && (c->u128 & 1)) {
2336 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2337 ((a->u128 != 0) || (b->u128 != 0));
2339 r->u128 = carry_out;
2340 #else
2342 int carry_in = c->u64[LO_IDX] & 1;
2343 int carry_out = 0;
2344 ppc_avr_t tmp;
2346 carry_out = avr_qw_addc(&tmp, *a, *b);
2348 if (!carry_out && carry_in) {
2349 ppc_avr_t one = QW_ONE;
2350 carry_out = avr_qw_addc(&tmp, tmp, one);
2352 r->u64[HI_IDX] = 0;
2353 r->u64[LO_IDX] = carry_out;
2354 #endif
2357 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2359 #ifdef CONFIG_INT128
2360 r->u128 = a->u128 - b->u128;
2361 #else
2362 ppc_avr_t tmp;
2363 ppc_avr_t one = QW_ONE;
2365 avr_qw_not(&tmp, *b);
2366 avr_qw_add(&tmp, *a, tmp);
2367 avr_qw_add(r, tmp, one);
2368 #endif
2371 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2373 #ifdef CONFIG_INT128
2374 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2375 #else
2376 ppc_avr_t tmp, sum;
2378 avr_qw_not(&tmp, *b);
2379 avr_qw_add(&sum, *a, tmp);
2381 tmp.u64[HI_IDX] = 0;
2382 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2383 avr_qw_add(r, sum, tmp);
2384 #endif
2387 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2389 #ifdef CONFIG_INT128
2390 r->u128 = (~a->u128 < ~b->u128) ||
2391 (a->u128 + ~b->u128 == (__uint128_t)-1);
2392 #else
2393 int carry = (avr_qw_cmpu(*a, *b) > 0);
2394 if (!carry) {
2395 ppc_avr_t tmp;
2396 avr_qw_not(&tmp, *b);
2397 avr_qw_add(&tmp, *a, tmp);
2398 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2400 r->u64[HI_IDX] = 0;
2401 r->u64[LO_IDX] = carry;
2402 #endif
2405 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2407 #ifdef CONFIG_INT128
2408 r->u128 =
2409 (~a->u128 < ~b->u128) ||
2410 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2411 #else
2412 int carry_in = c->u64[LO_IDX] & 1;
2413 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2414 if (!carry_out && carry_in) {
2415 ppc_avr_t tmp;
2416 avr_qw_not(&tmp, *b);
2417 avr_qw_add(&tmp, *a, tmp);
2418 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2421 r->u64[HI_IDX] = 0;
2422 r->u64[LO_IDX] = carry_out;
2423 #endif
2426 #define BCD_PLUS_PREF_1 0xC
2427 #define BCD_PLUS_PREF_2 0xF
2428 #define BCD_PLUS_ALT_1 0xA
2429 #define BCD_NEG_PREF 0xD
2430 #define BCD_NEG_ALT 0xB
2431 #define BCD_PLUS_ALT_2 0xE
2433 #if defined(HOST_WORDS_BIGENDIAN)
2434 #define BCD_DIG_BYTE(n) (15 - (n/2))
2435 #else
2436 #define BCD_DIG_BYTE(n) (n/2)
2437 #endif
2439 static int bcd_get_sgn(ppc_avr_t *bcd)
2441 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2442 case BCD_PLUS_PREF_1:
2443 case BCD_PLUS_PREF_2:
2444 case BCD_PLUS_ALT_1:
2445 case BCD_PLUS_ALT_2:
2447 return 1;
2450 case BCD_NEG_PREF:
2451 case BCD_NEG_ALT:
2453 return -1;
2456 default:
2458 return 0;
2463 static int bcd_preferred_sgn(int sgn, int ps)
2465 if (sgn >= 0) {
2466 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2467 } else {
2468 return BCD_NEG_PREF;
2472 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2474 uint8_t result;
2475 if (n & 1) {
2476 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2477 } else {
2478 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2481 if (unlikely(result > 9)) {
2482 *invalid = true;
2484 return result;
2487 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2489 if (n & 1) {
2490 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2491 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2492 } else {
2493 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2494 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2498 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2500 int i;
2501 int invalid = 0;
2502 for (i = 31; i > 0; i--) {
2503 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2504 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2505 if (unlikely(invalid)) {
2506 return 0; /* doesn't matter */
2507 } else if (dig_a > dig_b) {
2508 return 1;
2509 } else if (dig_a < dig_b) {
2510 return -1;
2514 return 0;
2517 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2518 int *overflow)
2520 int carry = 0;
2521 int i;
2522 int is_zero = 1;
2523 for (i = 1; i <= 31; i++) {
2524 uint8_t digit = bcd_get_digit(a, i, invalid) +
2525 bcd_get_digit(b, i, invalid) + carry;
2526 is_zero &= (digit == 0);
2527 if (digit > 9) {
2528 carry = 1;
2529 digit -= 10;
2530 } else {
2531 carry = 0;
2534 bcd_put_digit(t, digit, i);
2536 if (unlikely(*invalid)) {
2537 return -1;
2541 *overflow = carry;
2542 return is_zero;
2545 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2546 int *overflow)
2548 int carry = 0;
2549 int i;
2550 int is_zero = 1;
2551 for (i = 1; i <= 31; i++) {
2552 uint8_t digit = bcd_get_digit(a, i, invalid) -
2553 bcd_get_digit(b, i, invalid) + carry;
2554 is_zero &= (digit == 0);
2555 if (digit & 0x80) {
2556 carry = -1;
2557 digit += 10;
2558 } else {
2559 carry = 0;
2562 bcd_put_digit(t, digit, i);
2564 if (unlikely(*invalid)) {
2565 return -1;
2569 *overflow = carry;
2570 return is_zero;
2573 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2576 int sgna = bcd_get_sgn(a);
2577 int sgnb = bcd_get_sgn(b);
2578 int invalid = (sgna == 0) || (sgnb == 0);
2579 int overflow = 0;
2580 int zero = 0;
2581 uint32_t cr = 0;
2582 ppc_avr_t result = { .u64 = { 0, 0 } };
2584 if (!invalid) {
2585 if (sgna == sgnb) {
2586 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2587 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2588 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2589 } else if (bcd_cmp_mag(a, b) > 0) {
2590 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2591 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2592 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2593 } else {
2594 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2595 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2596 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2600 if (unlikely(invalid)) {
2601 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2602 cr = 1 << CRF_SO;
2603 } else if (overflow) {
2604 cr |= 1 << CRF_SO;
2605 } else if (zero) {
2606 cr = 1 << CRF_EQ;
2609 *r = result;
2611 return cr;
2614 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2616 ppc_avr_t bcopy = *b;
2617 int sgnb = bcd_get_sgn(b);
2618 if (sgnb < 0) {
2619 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2620 } else if (sgnb > 0) {
2621 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2623 /* else invalid ... defer to bcdadd code for proper handling */
2625 return helper_bcdadd(r, a, &bcopy, ps);
2628 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2630 int i;
2631 VECTOR_FOR_INORDER_I(i, u8) {
2632 r->u8[i] = AES_sbox[a->u8[i]];
2636 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2638 ppc_avr_t result;
2639 int i;
2641 VECTOR_FOR_INORDER_I(i, u32) {
2642 result.AVRW(i) = b->AVRW(i) ^
2643 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2644 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2645 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2646 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2648 *r = result;
2651 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2653 ppc_avr_t result;
2654 int i;
2656 VECTOR_FOR_INORDER_I(i, u8) {
2657 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2659 *r = result;
2662 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2664 /* This differs from what is written in ISA V2.07. The RTL is */
2665 /* incorrect and will be fixed in V2.07B. */
2666 int i;
2667 ppc_avr_t tmp;
2669 VECTOR_FOR_INORDER_I(i, u8) {
2670 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2673 VECTOR_FOR_INORDER_I(i, u32) {
2674 r->AVRW(i) =
2675 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2676 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2677 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2678 AES_imc[tmp.AVRB(4*i + 3)][3];
2682 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2684 ppc_avr_t result;
2685 int i;
2687 VECTOR_FOR_INORDER_I(i, u8) {
2688 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2690 *r = result;
2693 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2694 #if defined(HOST_WORDS_BIGENDIAN)
2695 #define EL_IDX(i) (i)
2696 #else
2697 #define EL_IDX(i) (3 - (i))
2698 #endif
2700 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2702 int st = (st_six & 0x10) != 0;
2703 int six = st_six & 0xF;
2704 int i;
2706 VECTOR_FOR_INORDER_I(i, u32) {
2707 if (st == 0) {
2708 if ((six & (0x8 >> i)) == 0) {
2709 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2710 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2711 (a->u32[EL_IDX(i)] >> 3);
2712 } else { /* six.bit[i] == 1 */
2713 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2714 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2715 (a->u32[EL_IDX(i)] >> 10);
2717 } else { /* st == 1 */
2718 if ((six & (0x8 >> i)) == 0) {
2719 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2720 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2721 ROTRu32(a->u32[EL_IDX(i)], 22);
2722 } else { /* six.bit[i] == 1 */
2723 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2724 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2725 ROTRu32(a->u32[EL_IDX(i)], 25);
2731 #undef ROTRu32
2732 #undef EL_IDX
2734 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2735 #if defined(HOST_WORDS_BIGENDIAN)
2736 #define EL_IDX(i) (i)
2737 #else
2738 #define EL_IDX(i) (1 - (i))
2739 #endif
2741 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2743 int st = (st_six & 0x10) != 0;
2744 int six = st_six & 0xF;
2745 int i;
2747 VECTOR_FOR_INORDER_I(i, u64) {
2748 if (st == 0) {
2749 if ((six & (0x8 >> (2*i))) == 0) {
2750 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2751 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2752 (a->u64[EL_IDX(i)] >> 7);
2753 } else { /* six.bit[2*i] == 1 */
2754 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2755 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2756 (a->u64[EL_IDX(i)] >> 6);
2758 } else { /* st == 1 */
2759 if ((six & (0x8 >> (2*i))) == 0) {
2760 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2761 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2762 ROTRu64(a->u64[EL_IDX(i)], 39);
2763 } else { /* six.bit[2*i] == 1 */
2764 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2765 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2766 ROTRu64(a->u64[EL_IDX(i)], 41);
2772 #undef ROTRu64
2773 #undef EL_IDX
2775 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2777 ppc_avr_t result;
2778 int i;
2780 VECTOR_FOR_INORDER_I(i, u8) {
2781 int indexA = c->u8[i] >> 4;
2782 int indexB = c->u8[i] & 0xF;
2783 #if defined(HOST_WORDS_BIGENDIAN)
2784 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2785 #else
2786 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2787 #endif
2789 *r = result;
2792 #undef VECTOR_FOR_INORDER_I
2793 #undef HI_IDX
2794 #undef LO_IDX
2796 /*****************************************************************************/
2797 /* SPE extension helpers */
2798 /* Use a table to make this quicker */
2799 static const uint8_t hbrev[16] = {
2800 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2801 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2804 static inline uint8_t byte_reverse(uint8_t val)
2806 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2809 static inline uint32_t word_reverse(uint32_t val)
2811 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2812 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2815 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2816 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2818 uint32_t a, b, d, mask;
2820 mask = UINT32_MAX >> (32 - MASKBITS);
2821 a = arg1 & mask;
2822 b = arg2 & mask;
2823 d = word_reverse(1 + word_reverse(a | ~b));
2824 return (arg1 & ~mask) | (d & b);
2827 uint32_t helper_cntlsw32(uint32_t val)
2829 if (val & 0x80000000) {
2830 return clz32(~val);
2831 } else {
2832 return clz32(val);
2836 uint32_t helper_cntlzw32(uint32_t val)
2838 return clz32(val);
2841 /* 440 specific */
2842 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2843 target_ulong low, uint32_t update_Rc)
2845 target_ulong mask;
2846 int i;
2848 i = 1;
2849 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2850 if ((high & mask) == 0) {
2851 if (update_Rc) {
2852 env->crf[0] = 0x4;
2854 goto done;
2856 i++;
2858 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2859 if ((low & mask) == 0) {
2860 if (update_Rc) {
2861 env->crf[0] = 0x8;
2863 goto done;
2865 i++;
2867 i = 8;
2868 if (update_Rc) {
2869 env->crf[0] = 0x2;
2871 done:
2872 env->xer = (env->xer & ~0x7F) | i;
2873 if (update_Rc) {
2874 env->crf[0] |= xer_so;
2876 return i;