ps2: do not generate invalid key codes for unknown keys
[qemu/kevin.git] / target-ppc / int_helper.c
blob51a9ac51827e3ed199d7bd3a6d0fb19d1c39d196
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/exec-all.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
26 #include "helper_regs.h"
27 /*****************************************************************************/
28 /* Fixed point operations helpers */
30 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
33 uint64_t rt = 0;
34 int overflow = 0;
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
58 return (target_ulong)rt;
61 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
64 int64_t rt = 0;
65 int overflow = 0;
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
90 return (target_ulong)rt;
93 #if defined(TARGET_PPC64)
95 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 uint64_t rt = 0;
98 int overflow = 0;
100 overflow = divu128(&rt, &ra, rb);
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
114 return rt;
117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
128 if (oe) {
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
137 return rt;
140 #endif
143 target_ulong helper_cntlzw(target_ulong t)
145 return clz32(t);
148 target_ulong helper_cnttzw(target_ulong t)
150 return ctz32(t);
153 #if defined(TARGET_PPC64)
154 /* if x = 0xab, returns 0xababababababababa */
155 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
157 /* substract 1 from each byte, and with inverse, check if MSB is set at each
158 * byte.
159 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
160 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
162 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
164 /* When you XOR the pattern and there is a match, that byte will be zero */
165 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
167 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
169 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
172 #undef pattern
173 #undef haszero
174 #undef hasvalue
176 target_ulong helper_cntlzd(target_ulong t)
178 return clz64(t);
181 target_ulong helper_cnttzd(target_ulong t)
183 return ctz64(t);
186 /* Return invalid random number.
188 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
189 * random number
191 target_ulong helper_darn32(void)
193 return -1;
196 target_ulong helper_darn64(void)
198 return -1;
201 #endif
203 #if defined(TARGET_PPC64)
205 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
207 int i;
208 uint64_t ra = 0;
210 for (i = 0; i < 8; i++) {
211 int index = (rs >> (i*8)) & 0xFF;
212 if (index < 64) {
213 if (rb & (1ull << (63-index))) {
214 ra |= 1 << i;
218 return ra;
221 #endif
223 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
225 target_ulong mask = 0xff;
226 target_ulong ra = 0;
227 int i;
229 for (i = 0; i < sizeof(target_ulong); i++) {
230 if ((rs & mask) == (rb & mask)) {
231 ra |= mask;
233 mask <<= 8;
235 return ra;
238 /* shift right arithmetic helper */
239 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
240 target_ulong shift)
242 int32_t ret;
244 if (likely(!(shift & 0x20))) {
245 if (likely((uint32_t)shift != 0)) {
246 shift &= 0x1f;
247 ret = (int32_t)value >> shift;
248 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
249 env->ca = 0;
250 } else {
251 env->ca = 1;
253 } else {
254 ret = (int32_t)value;
255 env->ca = 0;
257 } else {
258 ret = (int32_t)value >> 31;
259 env->ca = (ret != 0);
261 return (target_long)ret;
264 #if defined(TARGET_PPC64)
265 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
266 target_ulong shift)
268 int64_t ret;
270 if (likely(!(shift & 0x40))) {
271 if (likely((uint64_t)shift != 0)) {
272 shift &= 0x3f;
273 ret = (int64_t)value >> shift;
274 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
275 env->ca = 0;
276 } else {
277 env->ca = 1;
279 } else {
280 ret = (int64_t)value;
281 env->ca = 0;
283 } else {
284 ret = (int64_t)value >> 63;
285 env->ca = (ret != 0);
287 return ret;
289 #endif
291 #if defined(TARGET_PPC64)
292 target_ulong helper_popcntb(target_ulong val)
294 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
295 0x5555555555555555ULL);
296 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
297 0x3333333333333333ULL);
298 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
299 0x0f0f0f0f0f0f0f0fULL);
300 return val;
303 target_ulong helper_popcntw(target_ulong val)
305 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
306 0x5555555555555555ULL);
307 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
308 0x3333333333333333ULL);
309 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
310 0x0f0f0f0f0f0f0f0fULL);
311 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
312 0x00ff00ff00ff00ffULL);
313 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
314 0x0000ffff0000ffffULL);
315 return val;
318 target_ulong helper_popcntd(target_ulong val)
320 return ctpop64(val);
322 #else
323 target_ulong helper_popcntb(target_ulong val)
325 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
326 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
327 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
328 return val;
331 target_ulong helper_popcntw(target_ulong val)
333 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
334 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
335 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
336 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
337 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
338 return val;
340 #endif
342 /*****************************************************************************/
343 /* PowerPC 601 specific instructions (POWER bridge) */
344 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
346 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
348 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
349 (int32_t)arg2 == 0) {
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->spr[SPR_MQ] = tmp % arg2;
354 return tmp / (int32_t)arg2;
358 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
359 target_ulong arg2)
361 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
363 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->so = env->ov = 1;
366 env->spr[SPR_MQ] = 0;
367 return INT32_MIN;
368 } else {
369 env->spr[SPR_MQ] = tmp % arg2;
370 tmp /= (int32_t)arg2;
371 if ((int32_t)tmp != tmp) {
372 env->so = env->ov = 1;
373 } else {
374 env->ov = 0;
376 return tmp;
380 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
381 target_ulong arg2)
383 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
384 (int32_t)arg2 == 0) {
385 env->spr[SPR_MQ] = 0;
386 return INT32_MIN;
387 } else {
388 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
389 return (int32_t)arg1 / (int32_t)arg2;
393 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
394 target_ulong arg2)
396 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
397 (int32_t)arg2 == 0) {
398 env->so = env->ov = 1;
399 env->spr[SPR_MQ] = 0;
400 return INT32_MIN;
401 } else {
402 env->ov = 0;
403 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
404 return (int32_t)arg1 / (int32_t)arg2;
408 /*****************************************************************************/
409 /* 602 specific instructions */
410 /* mfrom is the most crazy instruction ever seen, imho ! */
411 /* Real implementation uses a ROM table. Do the same */
412 /* Extremely decomposed:
413 * -arg / 256
414 * return 256 * log10(10 + 1.0) + 0.5
416 #if !defined(CONFIG_USER_ONLY)
417 target_ulong helper_602_mfrom(target_ulong arg)
419 if (likely(arg < 602)) {
420 #include "mfrom_table.c"
421 return mfrom_ROM_table[arg];
422 } else {
423 return 0;
426 #endif
428 /*****************************************************************************/
429 /* Altivec extension helpers */
430 #if defined(HOST_WORDS_BIGENDIAN)
431 #define HI_IDX 0
432 #define LO_IDX 1
433 #define AVRB(i) u8[i]
434 #define AVRW(i) u32[i]
435 #else
436 #define HI_IDX 1
437 #define LO_IDX 0
438 #define AVRB(i) u8[15-(i)]
439 #define AVRW(i) u32[3-(i)]
440 #endif
442 #if defined(HOST_WORDS_BIGENDIAN)
443 #define VECTOR_FOR_INORDER_I(index, element) \
444 for (index = 0; index < ARRAY_SIZE(r->element); index++)
445 #else
446 #define VECTOR_FOR_INORDER_I(index, element) \
447 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
448 #endif
450 /* Saturating arithmetic helpers. */
451 #define SATCVT(from, to, from_type, to_type, min, max) \
452 static inline to_type cvt##from##to(from_type x, int *sat) \
454 to_type r; \
456 if (x < (from_type)min) { \
457 r = min; \
458 *sat = 1; \
459 } else if (x > (from_type)max) { \
460 r = max; \
461 *sat = 1; \
462 } else { \
463 r = x; \
465 return r; \
467 #define SATCVTU(from, to, from_type, to_type, min, max) \
468 static inline to_type cvt##from##to(from_type x, int *sat) \
470 to_type r; \
472 if (x > (from_type)max) { \
473 r = max; \
474 *sat = 1; \
475 } else { \
476 r = x; \
478 return r; \
480 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
481 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
482 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
484 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
485 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
486 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
487 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
488 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
489 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
490 #undef SATCVT
491 #undef SATCVTU
493 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
495 int i, j = (sh & 0xf);
497 VECTOR_FOR_INORDER_I(i, u8) {
498 r->u8[i] = j++;
502 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
504 int i, j = 0x10 - (sh & 0xf);
506 VECTOR_FOR_INORDER_I(i, u8) {
507 r->u8[i] = j++;
511 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
513 #if defined(HOST_WORDS_BIGENDIAN)
514 env->vscr = r->u32[3];
515 #else
516 env->vscr = r->u32[0];
517 #endif
518 set_flush_to_zero(vscr_nj, &env->vec_status);
521 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
523 int i;
525 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
526 r->u32[i] = ~a->u32[i] < b->u32[i];
530 #define VARITH_DO(name, op, element) \
531 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
533 int i; \
535 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
536 r->element[i] = a->element[i] op b->element[i]; \
539 #define VARITH(suffix, element) \
540 VARITH_DO(add##suffix, +, element) \
541 VARITH_DO(sub##suffix, -, element)
542 VARITH(ubm, u8)
543 VARITH(uhm, u16)
544 VARITH(uwm, u32)
545 VARITH(udm, u64)
546 VARITH_DO(muluwm, *, u32)
547 #undef VARITH_DO
548 #undef VARITH
550 #define VARITHFP(suffix, func) \
551 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 ppc_avr_t *b) \
554 int i; \
556 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
557 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
560 VARITHFP(addfp, float32_add)
561 VARITHFP(subfp, float32_sub)
562 VARITHFP(minfp, float32_min)
563 VARITHFP(maxfp, float32_max)
564 #undef VARITHFP
566 #define VARITHFPFMA(suffix, type) \
567 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
568 ppc_avr_t *b, ppc_avr_t *c) \
570 int i; \
571 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
572 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
573 type, &env->vec_status); \
576 VARITHFPFMA(maddfp, 0);
577 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
578 #undef VARITHFPFMA
580 #define VARITHSAT_CASE(type, op, cvt, element) \
582 type result = (type)a->element[i] op (type)b->element[i]; \
583 r->element[i] = cvt(result, &sat); \
586 #define VARITHSAT_DO(name, op, optype, cvt, element) \
587 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
588 ppc_avr_t *b) \
590 int sat = 0; \
591 int i; \
593 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
594 switch (sizeof(r->element[0])) { \
595 case 1: \
596 VARITHSAT_CASE(optype, op, cvt, element); \
597 break; \
598 case 2: \
599 VARITHSAT_CASE(optype, op, cvt, element); \
600 break; \
601 case 4: \
602 VARITHSAT_CASE(optype, op, cvt, element); \
603 break; \
606 if (sat) { \
607 env->vscr |= (1 << VSCR_SAT); \
610 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
611 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
612 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
613 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
614 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
615 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
616 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
617 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
618 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
619 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
620 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
621 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
622 #undef VARITHSAT_CASE
623 #undef VARITHSAT_DO
624 #undef VARITHSAT_SIGNED
625 #undef VARITHSAT_UNSIGNED
627 #define VAVG_DO(name, element, etype) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
630 int i; \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
634 r->element[i] = x >> 1; \
638 #define VAVG(type, signed_element, signed_type, unsigned_element, \
639 unsigned_type) \
640 VAVG_DO(avgs##type, signed_element, signed_type) \
641 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
642 VAVG(b, s8, int16_t, u8, uint16_t)
643 VAVG(h, s16, int32_t, u16, uint32_t)
644 VAVG(w, s32, int64_t, u32, uint64_t)
645 #undef VAVG_DO
646 #undef VAVG
648 #define VABSDU_DO(name, element) \
649 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
651 int i; \
653 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
654 r->element[i] = (a->element[i] > b->element[i]) ? \
655 (a->element[i] - b->element[i]) : \
656 (b->element[i] - a->element[i]); \
660 /* VABSDU - Vector absolute difference unsigned
661 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
662 * element - element type to access from vector
664 #define VABSDU(type, element) \
665 VABSDU_DO(absdu##type, element)
666 VABSDU(b, u8)
667 VABSDU(h, u16)
668 VABSDU(w, u32)
669 #undef VABSDU_DO
670 #undef VABSDU
672 #define VCF(suffix, cvt, element) \
673 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
674 ppc_avr_t *b, uint32_t uim) \
676 int i; \
678 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
679 float32 t = cvt(b->element[i], &env->vec_status); \
680 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
683 VCF(ux, uint32_to_float32, u32)
684 VCF(sx, int32_to_float32, s32)
685 #undef VCF
687 #define VCMP_DO(suffix, compare, element, record) \
688 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
689 ppc_avr_t *a, ppc_avr_t *b) \
691 uint64_t ones = (uint64_t)-1; \
692 uint64_t all = ones; \
693 uint64_t none = 0; \
694 int i; \
696 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
697 uint64_t result = (a->element[i] compare b->element[i] ? \
698 ones : 0x0); \
699 switch (sizeof(a->element[0])) { \
700 case 8: \
701 r->u64[i] = result; \
702 break; \
703 case 4: \
704 r->u32[i] = result; \
705 break; \
706 case 2: \
707 r->u16[i] = result; \
708 break; \
709 case 1: \
710 r->u8[i] = result; \
711 break; \
713 all &= result; \
714 none |= result; \
716 if (record) { \
717 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
720 #define VCMP(suffix, compare, element) \
721 VCMP_DO(suffix, compare, element, 0) \
722 VCMP_DO(suffix##_dot, compare, element, 1)
723 VCMP(equb, ==, u8)
724 VCMP(equh, ==, u16)
725 VCMP(equw, ==, u32)
726 VCMP(equd, ==, u64)
727 VCMP(gtub, >, u8)
728 VCMP(gtuh, >, u16)
729 VCMP(gtuw, >, u32)
730 VCMP(gtud, >, u64)
731 VCMP(gtsb, >, s8)
732 VCMP(gtsh, >, s16)
733 VCMP(gtsw, >, s32)
734 VCMP(gtsd, >, s64)
735 #undef VCMP_DO
736 #undef VCMP
738 #define VCMPNEZ_DO(suffix, element, etype, record) \
739 void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
740 ppc_avr_t *a, ppc_avr_t *b) \
742 etype ones = (etype)-1; \
743 etype all = ones; \
744 etype none = 0; \
745 int i; \
747 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
748 etype result = ((a->element[i] == 0) \
749 || (b->element[i] == 0) \
750 || (a->element[i] != b->element[i]) ? \
751 ones : 0x0); \
752 r->element[i] = result; \
753 all &= result; \
754 none |= result; \
756 if (record) { \
757 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
761 /* VCMPNEZ - Vector compare not equal to zero
762 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
763 * element - element type to access from vector
765 #define VCMPNEZ(suffix, element, etype) \
766 VCMPNEZ_DO(suffix, element, etype, 0) \
767 VCMPNEZ_DO(suffix##_dot, element, etype, 1)
768 VCMPNEZ(b, u8, uint8_t)
769 VCMPNEZ(h, u16, uint16_t)
770 VCMPNEZ(w, u32, uint32_t)
771 #undef VCMPNEZ_DO
772 #undef VCMPNEZ
774 #define VCMPFP_DO(suffix, compare, order, record) \
775 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
776 ppc_avr_t *a, ppc_avr_t *b) \
778 uint32_t ones = (uint32_t)-1; \
779 uint32_t all = ones; \
780 uint32_t none = 0; \
781 int i; \
783 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
784 uint32_t result; \
785 int rel = float32_compare_quiet(a->f[i], b->f[i], \
786 &env->vec_status); \
787 if (rel == float_relation_unordered) { \
788 result = 0; \
789 } else if (rel compare order) { \
790 result = ones; \
791 } else { \
792 result = 0; \
794 r->u32[i] = result; \
795 all &= result; \
796 none |= result; \
798 if (record) { \
799 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
802 #define VCMPFP(suffix, compare, order) \
803 VCMPFP_DO(suffix, compare, order, 0) \
804 VCMPFP_DO(suffix##_dot, compare, order, 1)
805 VCMPFP(eqfp, ==, float_relation_equal)
806 VCMPFP(gefp, !=, float_relation_less)
807 VCMPFP(gtfp, ==, float_relation_greater)
808 #undef VCMPFP_DO
809 #undef VCMPFP
811 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
812 ppc_avr_t *a, ppc_avr_t *b, int record)
814 int i;
815 int all_in = 0;
817 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
818 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
819 if (le_rel == float_relation_unordered) {
820 r->u32[i] = 0xc0000000;
821 all_in = 1;
822 } else {
823 float32 bneg = float32_chs(b->f[i]);
824 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
825 int le = le_rel != float_relation_greater;
826 int ge = ge_rel != float_relation_less;
828 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
829 all_in |= (!le | !ge);
832 if (record) {
833 env->crf[6] = (all_in == 0) << 1;
837 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
839 vcmpbfp_internal(env, r, a, b, 0);
842 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
843 ppc_avr_t *b)
845 vcmpbfp_internal(env, r, a, b, 1);
848 #define VCT(suffix, satcvt, element) \
849 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
850 ppc_avr_t *b, uint32_t uim) \
852 int i; \
853 int sat = 0; \
854 float_status s = env->vec_status; \
856 set_float_rounding_mode(float_round_to_zero, &s); \
857 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
858 if (float32_is_any_nan(b->f[i])) { \
859 r->element[i] = 0; \
860 } else { \
861 float64 t = float32_to_float64(b->f[i], &s); \
862 int64_t j; \
864 t = float64_scalbn(t, uim, &s); \
865 j = float64_to_int64(t, &s); \
866 r->element[i] = satcvt(j, &sat); \
869 if (sat) { \
870 env->vscr |= (1 << VSCR_SAT); \
873 VCT(uxs, cvtsduw, u32)
874 VCT(sxs, cvtsdsw, s32)
875 #undef VCT
877 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
878 ppc_avr_t *b, ppc_avr_t *c)
880 int sat = 0;
881 int i;
883 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
884 int32_t prod = a->s16[i] * b->s16[i];
885 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
887 r->s16[i] = cvtswsh(t, &sat);
890 if (sat) {
891 env->vscr |= (1 << VSCR_SAT);
895 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
896 ppc_avr_t *b, ppc_avr_t *c)
898 int sat = 0;
899 int i;
901 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
902 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
903 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
904 r->s16[i] = cvtswsh(t, &sat);
907 if (sat) {
908 env->vscr |= (1 << VSCR_SAT);
912 #define VMINMAX_DO(name, compare, element) \
913 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
915 int i; \
917 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
918 if (a->element[i] compare b->element[i]) { \
919 r->element[i] = b->element[i]; \
920 } else { \
921 r->element[i] = a->element[i]; \
925 #define VMINMAX(suffix, element) \
926 VMINMAX_DO(min##suffix, >, element) \
927 VMINMAX_DO(max##suffix, <, element)
928 VMINMAX(sb, s8)
929 VMINMAX(sh, s16)
930 VMINMAX(sw, s32)
931 VMINMAX(sd, s64)
932 VMINMAX(ub, u8)
933 VMINMAX(uh, u16)
934 VMINMAX(uw, u32)
935 VMINMAX(ud, u64)
936 #undef VMINMAX_DO
937 #undef VMINMAX
939 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
941 int i;
943 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
944 int32_t prod = a->s16[i] * b->s16[i];
945 r->s16[i] = (int16_t) (prod + c->s16[i]);
949 #define VMRG_DO(name, element, highp) \
950 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
952 ppc_avr_t result; \
953 int i; \
954 size_t n_elems = ARRAY_SIZE(r->element); \
956 for (i = 0; i < n_elems / 2; i++) { \
957 if (highp) { \
958 result.element[i*2+HI_IDX] = a->element[i]; \
959 result.element[i*2+LO_IDX] = b->element[i]; \
960 } else { \
961 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
962 b->element[n_elems - i - 1]; \
963 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
964 a->element[n_elems - i - 1]; \
967 *r = result; \
969 #if defined(HOST_WORDS_BIGENDIAN)
970 #define MRGHI 0
971 #define MRGLO 1
972 #else
973 #define MRGHI 1
974 #define MRGLO 0
975 #endif
976 #define VMRG(suffix, element) \
977 VMRG_DO(mrgl##suffix, element, MRGHI) \
978 VMRG_DO(mrgh##suffix, element, MRGLO)
979 VMRG(b, u8)
980 VMRG(h, u16)
981 VMRG(w, u32)
982 #undef VMRG_DO
983 #undef VMRG
984 #undef MRGHI
985 #undef MRGLO
987 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
988 ppc_avr_t *b, ppc_avr_t *c)
990 int32_t prod[16];
991 int i;
993 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
994 prod[i] = (int32_t)a->s8[i] * b->u8[i];
997 VECTOR_FOR_INORDER_I(i, s32) {
998 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
999 prod[4 * i + 2] + prod[4 * i + 3];
1003 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1004 ppc_avr_t *b, ppc_avr_t *c)
1006 int32_t prod[8];
1007 int i;
1009 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1010 prod[i] = a->s16[i] * b->s16[i];
1013 VECTOR_FOR_INORDER_I(i, s32) {
1014 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1018 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1019 ppc_avr_t *b, ppc_avr_t *c)
1021 int32_t prod[8];
1022 int i;
1023 int sat = 0;
1025 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1026 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1029 VECTOR_FOR_INORDER_I(i, s32) {
1030 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1032 r->u32[i] = cvtsdsw(t, &sat);
1035 if (sat) {
1036 env->vscr |= (1 << VSCR_SAT);
1040 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1041 ppc_avr_t *b, ppc_avr_t *c)
1043 uint16_t prod[16];
1044 int i;
1046 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1047 prod[i] = a->u8[i] * b->u8[i];
1050 VECTOR_FOR_INORDER_I(i, u32) {
1051 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1052 prod[4 * i + 2] + prod[4 * i + 3];
1056 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1057 ppc_avr_t *b, ppc_avr_t *c)
1059 uint32_t prod[8];
1060 int i;
1062 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1063 prod[i] = a->u16[i] * b->u16[i];
1066 VECTOR_FOR_INORDER_I(i, u32) {
1067 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1071 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1072 ppc_avr_t *b, ppc_avr_t *c)
1074 uint32_t prod[8];
1075 int i;
1076 int sat = 0;
1078 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1079 prod[i] = a->u16[i] * b->u16[i];
1082 VECTOR_FOR_INORDER_I(i, s32) {
1083 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1085 r->u32[i] = cvtuduw(t, &sat);
1088 if (sat) {
1089 env->vscr |= (1 << VSCR_SAT);
1093 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1094 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1096 int i; \
1098 VECTOR_FOR_INORDER_I(i, prod_element) { \
1099 if (evenp) { \
1100 r->prod_element[i] = \
1101 (cast)a->mul_element[i * 2 + HI_IDX] * \
1102 (cast)b->mul_element[i * 2 + HI_IDX]; \
1103 } else { \
1104 r->prod_element[i] = \
1105 (cast)a->mul_element[i * 2 + LO_IDX] * \
1106 (cast)b->mul_element[i * 2 + LO_IDX]; \
1110 #define VMUL(suffix, mul_element, prod_element, cast) \
1111 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1112 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1113 VMUL(sb, s8, s16, int16_t)
1114 VMUL(sh, s16, s32, int32_t)
1115 VMUL(sw, s32, s64, int64_t)
1116 VMUL(ub, u8, u16, uint16_t)
1117 VMUL(uh, u16, u32, uint32_t)
1118 VMUL(uw, u32, u64, uint64_t)
1119 #undef VMUL_DO
1120 #undef VMUL
1122 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1123 ppc_avr_t *c)
1125 ppc_avr_t result;
1126 int i;
1128 VECTOR_FOR_INORDER_I(i, u8) {
1129 int s = c->u8[i] & 0x1f;
1130 #if defined(HOST_WORDS_BIGENDIAN)
1131 int index = s & 0xf;
1132 #else
1133 int index = 15 - (s & 0xf);
1134 #endif
1136 if (s & 0x10) {
1137 result.u8[i] = b->u8[index];
1138 } else {
1139 result.u8[i] = a->u8[index];
1142 *r = result;
1145 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1146 ppc_avr_t *c)
1148 ppc_avr_t result;
1149 int i;
1151 VECTOR_FOR_INORDER_I(i, u8) {
1152 int s = c->u8[i] & 0x1f;
1153 #if defined(HOST_WORDS_BIGENDIAN)
1154 int index = 15 - (s & 0xf);
1155 #else
1156 int index = s & 0xf;
1157 #endif
1159 if (s & 0x10) {
1160 result.u8[i] = a->u8[index];
1161 } else {
1162 result.u8[i] = b->u8[index];
1165 *r = result;
1168 #if defined(HOST_WORDS_BIGENDIAN)
1169 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1170 #define VBPERMD_INDEX(i) (i)
1171 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1172 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1173 #else
1174 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1175 #define VBPERMD_INDEX(i) (1 - i)
1176 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1177 #define EXTRACT_BIT(avr, i, index) \
1178 (extract64((avr)->u64[1 - i], 63 - index, 1))
1179 #endif
1181 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1183 int i, j;
1184 ppc_avr_t result = { .u64 = { 0, 0 } };
1185 VECTOR_FOR_INORDER_I(i, u64) {
1186 for (j = 0; j < 8; j++) {
1187 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1188 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1189 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1193 *r = result;
1196 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1198 int i;
1199 uint64_t perm = 0;
1201 VECTOR_FOR_INORDER_I(i, u8) {
1202 int index = VBPERMQ_INDEX(b, i);
1204 if (index < 128) {
1205 uint64_t mask = (1ull << (63-(index & 0x3F)));
1206 if (a->u64[VBPERMQ_DW(index)] & mask) {
1207 perm |= (0x8000 >> i);
1212 r->u64[HI_IDX] = perm;
1213 r->u64[LO_IDX] = 0;
1216 #undef VBPERMQ_INDEX
1217 #undef VBPERMQ_DW
1219 static const uint64_t VGBBD_MASKS[256] = {
1220 0x0000000000000000ull, /* 00 */
1221 0x0000000000000080ull, /* 01 */
1222 0x0000000000008000ull, /* 02 */
1223 0x0000000000008080ull, /* 03 */
1224 0x0000000000800000ull, /* 04 */
1225 0x0000000000800080ull, /* 05 */
1226 0x0000000000808000ull, /* 06 */
1227 0x0000000000808080ull, /* 07 */
1228 0x0000000080000000ull, /* 08 */
1229 0x0000000080000080ull, /* 09 */
1230 0x0000000080008000ull, /* 0A */
1231 0x0000000080008080ull, /* 0B */
1232 0x0000000080800000ull, /* 0C */
1233 0x0000000080800080ull, /* 0D */
1234 0x0000000080808000ull, /* 0E */
1235 0x0000000080808080ull, /* 0F */
1236 0x0000008000000000ull, /* 10 */
1237 0x0000008000000080ull, /* 11 */
1238 0x0000008000008000ull, /* 12 */
1239 0x0000008000008080ull, /* 13 */
1240 0x0000008000800000ull, /* 14 */
1241 0x0000008000800080ull, /* 15 */
1242 0x0000008000808000ull, /* 16 */
1243 0x0000008000808080ull, /* 17 */
1244 0x0000008080000000ull, /* 18 */
1245 0x0000008080000080ull, /* 19 */
1246 0x0000008080008000ull, /* 1A */
1247 0x0000008080008080ull, /* 1B */
1248 0x0000008080800000ull, /* 1C */
1249 0x0000008080800080ull, /* 1D */
1250 0x0000008080808000ull, /* 1E */
1251 0x0000008080808080ull, /* 1F */
1252 0x0000800000000000ull, /* 20 */
1253 0x0000800000000080ull, /* 21 */
1254 0x0000800000008000ull, /* 22 */
1255 0x0000800000008080ull, /* 23 */
1256 0x0000800000800000ull, /* 24 */
1257 0x0000800000800080ull, /* 25 */
1258 0x0000800000808000ull, /* 26 */
1259 0x0000800000808080ull, /* 27 */
1260 0x0000800080000000ull, /* 28 */
1261 0x0000800080000080ull, /* 29 */
1262 0x0000800080008000ull, /* 2A */
1263 0x0000800080008080ull, /* 2B */
1264 0x0000800080800000ull, /* 2C */
1265 0x0000800080800080ull, /* 2D */
1266 0x0000800080808000ull, /* 2E */
1267 0x0000800080808080ull, /* 2F */
1268 0x0000808000000000ull, /* 30 */
1269 0x0000808000000080ull, /* 31 */
1270 0x0000808000008000ull, /* 32 */
1271 0x0000808000008080ull, /* 33 */
1272 0x0000808000800000ull, /* 34 */
1273 0x0000808000800080ull, /* 35 */
1274 0x0000808000808000ull, /* 36 */
1275 0x0000808000808080ull, /* 37 */
1276 0x0000808080000000ull, /* 38 */
1277 0x0000808080000080ull, /* 39 */
1278 0x0000808080008000ull, /* 3A */
1279 0x0000808080008080ull, /* 3B */
1280 0x0000808080800000ull, /* 3C */
1281 0x0000808080800080ull, /* 3D */
1282 0x0000808080808000ull, /* 3E */
1283 0x0000808080808080ull, /* 3F */
1284 0x0080000000000000ull, /* 40 */
1285 0x0080000000000080ull, /* 41 */
1286 0x0080000000008000ull, /* 42 */
1287 0x0080000000008080ull, /* 43 */
1288 0x0080000000800000ull, /* 44 */
1289 0x0080000000800080ull, /* 45 */
1290 0x0080000000808000ull, /* 46 */
1291 0x0080000000808080ull, /* 47 */
1292 0x0080000080000000ull, /* 48 */
1293 0x0080000080000080ull, /* 49 */
1294 0x0080000080008000ull, /* 4A */
1295 0x0080000080008080ull, /* 4B */
1296 0x0080000080800000ull, /* 4C */
1297 0x0080000080800080ull, /* 4D */
1298 0x0080000080808000ull, /* 4E */
1299 0x0080000080808080ull, /* 4F */
1300 0x0080008000000000ull, /* 50 */
1301 0x0080008000000080ull, /* 51 */
1302 0x0080008000008000ull, /* 52 */
1303 0x0080008000008080ull, /* 53 */
1304 0x0080008000800000ull, /* 54 */
1305 0x0080008000800080ull, /* 55 */
1306 0x0080008000808000ull, /* 56 */
1307 0x0080008000808080ull, /* 57 */
1308 0x0080008080000000ull, /* 58 */
1309 0x0080008080000080ull, /* 59 */
1310 0x0080008080008000ull, /* 5A */
1311 0x0080008080008080ull, /* 5B */
1312 0x0080008080800000ull, /* 5C */
1313 0x0080008080800080ull, /* 5D */
1314 0x0080008080808000ull, /* 5E */
1315 0x0080008080808080ull, /* 5F */
1316 0x0080800000000000ull, /* 60 */
1317 0x0080800000000080ull, /* 61 */
1318 0x0080800000008000ull, /* 62 */
1319 0x0080800000008080ull, /* 63 */
1320 0x0080800000800000ull, /* 64 */
1321 0x0080800000800080ull, /* 65 */
1322 0x0080800000808000ull, /* 66 */
1323 0x0080800000808080ull, /* 67 */
1324 0x0080800080000000ull, /* 68 */
1325 0x0080800080000080ull, /* 69 */
1326 0x0080800080008000ull, /* 6A */
1327 0x0080800080008080ull, /* 6B */
1328 0x0080800080800000ull, /* 6C */
1329 0x0080800080800080ull, /* 6D */
1330 0x0080800080808000ull, /* 6E */
1331 0x0080800080808080ull, /* 6F */
1332 0x0080808000000000ull, /* 70 */
1333 0x0080808000000080ull, /* 71 */
1334 0x0080808000008000ull, /* 72 */
1335 0x0080808000008080ull, /* 73 */
1336 0x0080808000800000ull, /* 74 */
1337 0x0080808000800080ull, /* 75 */
1338 0x0080808000808000ull, /* 76 */
1339 0x0080808000808080ull, /* 77 */
1340 0x0080808080000000ull, /* 78 */
1341 0x0080808080000080ull, /* 79 */
1342 0x0080808080008000ull, /* 7A */
1343 0x0080808080008080ull, /* 7B */
1344 0x0080808080800000ull, /* 7C */
1345 0x0080808080800080ull, /* 7D */
1346 0x0080808080808000ull, /* 7E */
1347 0x0080808080808080ull, /* 7F */
1348 0x8000000000000000ull, /* 80 */
1349 0x8000000000000080ull, /* 81 */
1350 0x8000000000008000ull, /* 82 */
1351 0x8000000000008080ull, /* 83 */
1352 0x8000000000800000ull, /* 84 */
1353 0x8000000000800080ull, /* 85 */
1354 0x8000000000808000ull, /* 86 */
1355 0x8000000000808080ull, /* 87 */
1356 0x8000000080000000ull, /* 88 */
1357 0x8000000080000080ull, /* 89 */
1358 0x8000000080008000ull, /* 8A */
1359 0x8000000080008080ull, /* 8B */
1360 0x8000000080800000ull, /* 8C */
1361 0x8000000080800080ull, /* 8D */
1362 0x8000000080808000ull, /* 8E */
1363 0x8000000080808080ull, /* 8F */
1364 0x8000008000000000ull, /* 90 */
1365 0x8000008000000080ull, /* 91 */
1366 0x8000008000008000ull, /* 92 */
1367 0x8000008000008080ull, /* 93 */
1368 0x8000008000800000ull, /* 94 */
1369 0x8000008000800080ull, /* 95 */
1370 0x8000008000808000ull, /* 96 */
1371 0x8000008000808080ull, /* 97 */
1372 0x8000008080000000ull, /* 98 */
1373 0x8000008080000080ull, /* 99 */
1374 0x8000008080008000ull, /* 9A */
1375 0x8000008080008080ull, /* 9B */
1376 0x8000008080800000ull, /* 9C */
1377 0x8000008080800080ull, /* 9D */
1378 0x8000008080808000ull, /* 9E */
1379 0x8000008080808080ull, /* 9F */
1380 0x8000800000000000ull, /* A0 */
1381 0x8000800000000080ull, /* A1 */
1382 0x8000800000008000ull, /* A2 */
1383 0x8000800000008080ull, /* A3 */
1384 0x8000800000800000ull, /* A4 */
1385 0x8000800000800080ull, /* A5 */
1386 0x8000800000808000ull, /* A6 */
1387 0x8000800000808080ull, /* A7 */
1388 0x8000800080000000ull, /* A8 */
1389 0x8000800080000080ull, /* A9 */
1390 0x8000800080008000ull, /* AA */
1391 0x8000800080008080ull, /* AB */
1392 0x8000800080800000ull, /* AC */
1393 0x8000800080800080ull, /* AD */
1394 0x8000800080808000ull, /* AE */
1395 0x8000800080808080ull, /* AF */
1396 0x8000808000000000ull, /* B0 */
1397 0x8000808000000080ull, /* B1 */
1398 0x8000808000008000ull, /* B2 */
1399 0x8000808000008080ull, /* B3 */
1400 0x8000808000800000ull, /* B4 */
1401 0x8000808000800080ull, /* B5 */
1402 0x8000808000808000ull, /* B6 */
1403 0x8000808000808080ull, /* B7 */
1404 0x8000808080000000ull, /* B8 */
1405 0x8000808080000080ull, /* B9 */
1406 0x8000808080008000ull, /* BA */
1407 0x8000808080008080ull, /* BB */
1408 0x8000808080800000ull, /* BC */
1409 0x8000808080800080ull, /* BD */
1410 0x8000808080808000ull, /* BE */
1411 0x8000808080808080ull, /* BF */
1412 0x8080000000000000ull, /* C0 */
1413 0x8080000000000080ull, /* C1 */
1414 0x8080000000008000ull, /* C2 */
1415 0x8080000000008080ull, /* C3 */
1416 0x8080000000800000ull, /* C4 */
1417 0x8080000000800080ull, /* C5 */
1418 0x8080000000808000ull, /* C6 */
1419 0x8080000000808080ull, /* C7 */
1420 0x8080000080000000ull, /* C8 */
1421 0x8080000080000080ull, /* C9 */
1422 0x8080000080008000ull, /* CA */
1423 0x8080000080008080ull, /* CB */
1424 0x8080000080800000ull, /* CC */
1425 0x8080000080800080ull, /* CD */
1426 0x8080000080808000ull, /* CE */
1427 0x8080000080808080ull, /* CF */
1428 0x8080008000000000ull, /* D0 */
1429 0x8080008000000080ull, /* D1 */
1430 0x8080008000008000ull, /* D2 */
1431 0x8080008000008080ull, /* D3 */
1432 0x8080008000800000ull, /* D4 */
1433 0x8080008000800080ull, /* D5 */
1434 0x8080008000808000ull, /* D6 */
1435 0x8080008000808080ull, /* D7 */
1436 0x8080008080000000ull, /* D8 */
1437 0x8080008080000080ull, /* D9 */
1438 0x8080008080008000ull, /* DA */
1439 0x8080008080008080ull, /* DB */
1440 0x8080008080800000ull, /* DC */
1441 0x8080008080800080ull, /* DD */
1442 0x8080008080808000ull, /* DE */
1443 0x8080008080808080ull, /* DF */
1444 0x8080800000000000ull, /* E0 */
1445 0x8080800000000080ull, /* E1 */
1446 0x8080800000008000ull, /* E2 */
1447 0x8080800000008080ull, /* E3 */
1448 0x8080800000800000ull, /* E4 */
1449 0x8080800000800080ull, /* E5 */
1450 0x8080800000808000ull, /* E6 */
1451 0x8080800000808080ull, /* E7 */
1452 0x8080800080000000ull, /* E8 */
1453 0x8080800080000080ull, /* E9 */
1454 0x8080800080008000ull, /* EA */
1455 0x8080800080008080ull, /* EB */
1456 0x8080800080800000ull, /* EC */
1457 0x8080800080800080ull, /* ED */
1458 0x8080800080808000ull, /* EE */
1459 0x8080800080808080ull, /* EF */
1460 0x8080808000000000ull, /* F0 */
1461 0x8080808000000080ull, /* F1 */
1462 0x8080808000008000ull, /* F2 */
1463 0x8080808000008080ull, /* F3 */
1464 0x8080808000800000ull, /* F4 */
1465 0x8080808000800080ull, /* F5 */
1466 0x8080808000808000ull, /* F6 */
1467 0x8080808000808080ull, /* F7 */
1468 0x8080808080000000ull, /* F8 */
1469 0x8080808080000080ull, /* F9 */
1470 0x8080808080008000ull, /* FA */
1471 0x8080808080008080ull, /* FB */
1472 0x8080808080800000ull, /* FC */
1473 0x8080808080800080ull, /* FD */
1474 0x8080808080808000ull, /* FE */
1475 0x8080808080808080ull, /* FF */
1478 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1480 int i;
1481 uint64_t t[2] = { 0, 0 };
1483 VECTOR_FOR_INORDER_I(i, u8) {
1484 #if defined(HOST_WORDS_BIGENDIAN)
1485 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1486 #else
1487 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1488 #endif
1491 r->u64[0] = t[0];
1492 r->u64[1] = t[1];
1495 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1496 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1498 int i, j; \
1499 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1501 VECTOR_FOR_INORDER_I(i, srcfld) { \
1502 prod[i] = 0; \
1503 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1504 if (a->srcfld[i] & (1ull<<j)) { \
1505 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1510 VECTOR_FOR_INORDER_I(i, trgfld) { \
1511 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1515 PMSUM(vpmsumb, u8, u16, uint16_t)
1516 PMSUM(vpmsumh, u16, u32, uint32_t)
1517 PMSUM(vpmsumw, u32, u64, uint64_t)
1519 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1522 #ifdef CONFIG_INT128
1523 int i, j;
1524 __uint128_t prod[2];
1526 VECTOR_FOR_INORDER_I(i, u64) {
1527 prod[i] = 0;
1528 for (j = 0; j < 64; j++) {
1529 if (a->u64[i] & (1ull<<j)) {
1530 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1535 r->u128 = prod[0] ^ prod[1];
1537 #else
1538 int i, j;
1539 ppc_avr_t prod[2];
1541 VECTOR_FOR_INORDER_I(i, u64) {
1542 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1543 for (j = 0; j < 64; j++) {
1544 if (a->u64[i] & (1ull<<j)) {
1545 ppc_avr_t bshift;
1546 if (j == 0) {
1547 bshift.u64[HI_IDX] = 0;
1548 bshift.u64[LO_IDX] = b->u64[i];
1549 } else {
1550 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1551 bshift.u64[LO_IDX] = b->u64[i] << j;
1553 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1554 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1559 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1560 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1561 #endif
1565 #if defined(HOST_WORDS_BIGENDIAN)
1566 #define PKBIG 1
1567 #else
1568 #define PKBIG 0
1569 #endif
1570 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1572 int i, j;
1573 ppc_avr_t result;
1574 #if defined(HOST_WORDS_BIGENDIAN)
1575 const ppc_avr_t *x[2] = { a, b };
1576 #else
1577 const ppc_avr_t *x[2] = { b, a };
1578 #endif
1580 VECTOR_FOR_INORDER_I(i, u64) {
1581 VECTOR_FOR_INORDER_I(j, u32) {
1582 uint32_t e = x[i]->u32[j];
1584 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1585 ((e >> 6) & 0x3e0) |
1586 ((e >> 3) & 0x1f));
1589 *r = result;
1592 #define VPK(suffix, from, to, cvt, dosat) \
1593 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1594 ppc_avr_t *a, ppc_avr_t *b) \
1596 int i; \
1597 int sat = 0; \
1598 ppc_avr_t result; \
1599 ppc_avr_t *a0 = PKBIG ? a : b; \
1600 ppc_avr_t *a1 = PKBIG ? b : a; \
1602 VECTOR_FOR_INORDER_I(i, from) { \
1603 result.to[i] = cvt(a0->from[i], &sat); \
1604 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1606 *r = result; \
1607 if (dosat && sat) { \
1608 env->vscr |= (1 << VSCR_SAT); \
1611 #define I(x, y) (x)
1612 VPK(shss, s16, s8, cvtshsb, 1)
1613 VPK(shus, s16, u8, cvtshub, 1)
1614 VPK(swss, s32, s16, cvtswsh, 1)
1615 VPK(swus, s32, u16, cvtswuh, 1)
1616 VPK(sdss, s64, s32, cvtsdsw, 1)
1617 VPK(sdus, s64, u32, cvtsduw, 1)
1618 VPK(uhus, u16, u8, cvtuhub, 1)
1619 VPK(uwus, u32, u16, cvtuwuh, 1)
1620 VPK(udus, u64, u32, cvtuduw, 1)
1621 VPK(uhum, u16, u8, I, 0)
1622 VPK(uwum, u32, u16, I, 0)
1623 VPK(udum, u64, u32, I, 0)
1624 #undef I
1625 #undef VPK
1626 #undef PKBIG
1628 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1630 int i;
1632 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1633 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1637 #define VRFI(suffix, rounding) \
1638 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1639 ppc_avr_t *b) \
1641 int i; \
1642 float_status s = env->vec_status; \
1644 set_float_rounding_mode(rounding, &s); \
1645 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1646 r->f[i] = float32_round_to_int (b->f[i], &s); \
1649 VRFI(n, float_round_nearest_even)
1650 VRFI(m, float_round_down)
1651 VRFI(p, float_round_up)
1652 VRFI(z, float_round_to_zero)
1653 #undef VRFI
1655 #define VROTATE(suffix, element, mask) \
1656 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1658 int i; \
1660 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1661 unsigned int shift = b->element[i] & mask; \
1662 r->element[i] = (a->element[i] << shift) | \
1663 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1666 VROTATE(b, u8, 0x7)
1667 VROTATE(h, u16, 0xF)
1668 VROTATE(w, u32, 0x1F)
1669 VROTATE(d, u64, 0x3F)
1670 #undef VROTATE
1672 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1674 int i;
1676 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1677 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1679 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1683 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1684 ppc_avr_t *c)
1686 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1687 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1690 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1692 int i;
1694 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1695 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1699 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1701 int i;
1703 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1704 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1708 /* The specification says that the results are undefined if all of the
1709 * shift counts are not identical. We check to make sure that they are
1710 * to conform to what real hardware appears to do. */
1711 #define VSHIFT(suffix, leftp) \
1712 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1714 int shift = b->u8[LO_IDX*15] & 0x7; \
1715 int doit = 1; \
1716 int i; \
1718 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1719 doit = doit && ((b->u8[i] & 0x7) == shift); \
1721 if (doit) { \
1722 if (shift == 0) { \
1723 *r = *a; \
1724 } else if (leftp) { \
1725 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1727 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1728 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1729 } else { \
1730 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1732 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1733 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1737 VSHIFT(l, 1)
1738 VSHIFT(r, 0)
1739 #undef VSHIFT
1741 #define VSL(suffix, element, mask) \
1742 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1744 int i; \
1746 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1747 unsigned int shift = b->element[i] & mask; \
1749 r->element[i] = a->element[i] << shift; \
1752 VSL(b, u8, 0x7)
1753 VSL(h, u16, 0x0F)
1754 VSL(w, u32, 0x1F)
1755 VSL(d, u64, 0x3F)
1756 #undef VSL
1758 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1760 int i;
1761 unsigned int shift, bytes, size;
1763 size = ARRAY_SIZE(r->u8);
1764 for (i = 0; i < size; i++) {
1765 shift = b->u8[i] & 0x7; /* extract shift value */
1766 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1767 (((i + 1) < size) ? a->u8[i + 1] : 0);
1768 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1772 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1774 int i;
1775 unsigned int shift, bytes;
1777 /* Use reverse order, as destination and source register can be same. Its
1778 * being modified in place saving temporary, reverse order will guarantee
1779 * that computed result is not fed back.
1781 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1782 shift = b->u8[i] & 0x7; /* extract shift value */
1783 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1784 /* extract adjacent bytes */
1785 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1789 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1791 int sh = shift & 0xf;
1792 int i;
1793 ppc_avr_t result;
1795 #if defined(HOST_WORDS_BIGENDIAN)
1796 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1797 int index = sh + i;
1798 if (index > 0xf) {
1799 result.u8[i] = b->u8[index - 0x10];
1800 } else {
1801 result.u8[i] = a->u8[index];
1804 #else
1805 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1806 int index = (16 - sh) + i;
1807 if (index > 0xf) {
1808 result.u8[i] = a->u8[index - 0x10];
1809 } else {
1810 result.u8[i] = b->u8[index];
1813 #endif
1814 *r = result;
1817 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1819 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1821 #if defined(HOST_WORDS_BIGENDIAN)
1822 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1823 memset(&r->u8[16-sh], 0, sh);
1824 #else
1825 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1826 memset(&r->u8[0], 0, sh);
1827 #endif
1830 /* Experimental testing shows that hardware masks the immediate. */
1831 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1832 #if defined(HOST_WORDS_BIGENDIAN)
1833 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1834 #else
1835 #define SPLAT_ELEMENT(element) \
1836 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1837 #endif
1838 #define VSPLT(suffix, element) \
1839 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1841 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1842 int i; \
1844 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1845 r->element[i] = s; \
1848 VSPLT(b, u8)
1849 VSPLT(h, u16)
1850 VSPLT(w, u32)
1851 #undef VSPLT
1852 #undef SPLAT_ELEMENT
1853 #undef _SPLAT_MASKED
1854 #if defined(HOST_WORDS_BIGENDIAN)
1855 #define VINSERT(suffix, element) \
1856 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1858 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1859 sizeof(r->element[0])); \
1861 #else
1862 #define VINSERT(suffix, element) \
1863 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1865 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1866 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1868 #endif
1869 VINSERT(b, u8)
1870 VINSERT(h, u16)
1871 VINSERT(w, u32)
1872 VINSERT(d, u64)
1873 #undef VINSERT
1874 #if defined(HOST_WORDS_BIGENDIAN)
1875 #define VEXTRACT(suffix, element) \
1876 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1878 uint32_t es = sizeof(r->element[0]); \
1879 memmove(&r->u8[8 - es], &b->u8[index], es); \
1880 memset(&r->u8[8], 0, 8); \
1881 memset(&r->u8[0], 0, 8 - es); \
1883 #else
1884 #define VEXTRACT(suffix, element) \
1885 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1887 uint32_t es = sizeof(r->element[0]); \
1888 uint32_t s = (16 - index) - es; \
1889 memmove(&r->u8[8], &b->u8[s], es); \
1890 memset(&r->u8[0], 0, 8); \
1891 memset(&r->u8[8 + es], 0, 8 - es); \
1893 #endif
1894 VEXTRACT(ub, u8)
1895 VEXTRACT(uh, u16)
1896 VEXTRACT(uw, u32)
1897 VEXTRACT(d, u64)
1898 #undef VEXTRACT
1900 #define VSPLTI(suffix, element, splat_type) \
1901 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1903 splat_type x = (int8_t)(splat << 3) >> 3; \
1904 int i; \
1906 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1907 r->element[i] = x; \
1910 VSPLTI(b, s8, int8_t)
1911 VSPLTI(h, s16, int16_t)
1912 VSPLTI(w, s32, int32_t)
1913 #undef VSPLTI
1915 #define VSR(suffix, element, mask) \
1916 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1918 int i; \
1920 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1921 unsigned int shift = b->element[i] & mask; \
1922 r->element[i] = a->element[i] >> shift; \
1925 VSR(ab, s8, 0x7)
1926 VSR(ah, s16, 0xF)
1927 VSR(aw, s32, 0x1F)
1928 VSR(ad, s64, 0x3F)
1929 VSR(b, u8, 0x7)
1930 VSR(h, u16, 0xF)
1931 VSR(w, u32, 0x1F)
1932 VSR(d, u64, 0x3F)
1933 #undef VSR
1935 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1937 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1939 #if defined(HOST_WORDS_BIGENDIAN)
1940 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1941 memset(&r->u8[0], 0, sh);
1942 #else
1943 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1944 memset(&r->u8[16 - sh], 0, sh);
1945 #endif
1948 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1950 int i;
1952 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1953 r->u32[i] = a->u32[i] >= b->u32[i];
1957 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1959 int64_t t;
1960 int i, upper;
1961 ppc_avr_t result;
1962 int sat = 0;
1964 #if defined(HOST_WORDS_BIGENDIAN)
1965 upper = ARRAY_SIZE(r->s32)-1;
1966 #else
1967 upper = 0;
1968 #endif
1969 t = (int64_t)b->s32[upper];
1970 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1971 t += a->s32[i];
1972 result.s32[i] = 0;
1974 result.s32[upper] = cvtsdsw(t, &sat);
1975 *r = result;
1977 if (sat) {
1978 env->vscr |= (1 << VSCR_SAT);
1982 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1984 int i, j, upper;
1985 ppc_avr_t result;
1986 int sat = 0;
1988 #if defined(HOST_WORDS_BIGENDIAN)
1989 upper = 1;
1990 #else
1991 upper = 0;
1992 #endif
1993 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1994 int64_t t = (int64_t)b->s32[upper + i * 2];
1996 result.u64[i] = 0;
1997 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1998 t += a->s32[2 * i + j];
2000 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2003 *r = result;
2004 if (sat) {
2005 env->vscr |= (1 << VSCR_SAT);
2009 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2011 int i, j;
2012 int sat = 0;
2014 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2015 int64_t t = (int64_t)b->s32[i];
2017 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2018 t += a->s8[4 * i + j];
2020 r->s32[i] = cvtsdsw(t, &sat);
2023 if (sat) {
2024 env->vscr |= (1 << VSCR_SAT);
2028 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2030 int sat = 0;
2031 int i;
2033 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2034 int64_t t = (int64_t)b->s32[i];
2036 t += a->s16[2 * i] + a->s16[2 * i + 1];
2037 r->s32[i] = cvtsdsw(t, &sat);
2040 if (sat) {
2041 env->vscr |= (1 << VSCR_SAT);
2045 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2047 int i, j;
2048 int sat = 0;
2050 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2051 uint64_t t = (uint64_t)b->u32[i];
2053 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2054 t += a->u8[4 * i + j];
2056 r->u32[i] = cvtuduw(t, &sat);
2059 if (sat) {
2060 env->vscr |= (1 << VSCR_SAT);
2064 #if defined(HOST_WORDS_BIGENDIAN)
2065 #define UPKHI 1
2066 #define UPKLO 0
2067 #else
2068 #define UPKHI 0
2069 #define UPKLO 1
2070 #endif
2071 #define VUPKPX(suffix, hi) \
2072 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2074 int i; \
2075 ppc_avr_t result; \
2077 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2078 uint16_t e = b->u16[hi ? i : i+4]; \
2079 uint8_t a = (e >> 15) ? 0xff : 0; \
2080 uint8_t r = (e >> 10) & 0x1f; \
2081 uint8_t g = (e >> 5) & 0x1f; \
2082 uint8_t b = e & 0x1f; \
2084 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2086 *r = result; \
2088 VUPKPX(lpx, UPKLO)
2089 VUPKPX(hpx, UPKHI)
2090 #undef VUPKPX
2092 #define VUPK(suffix, unpacked, packee, hi) \
2093 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2095 int i; \
2096 ppc_avr_t result; \
2098 if (hi) { \
2099 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2100 result.unpacked[i] = b->packee[i]; \
2102 } else { \
2103 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2104 i++) { \
2105 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2108 *r = result; \
2110 VUPK(hsb, s16, s8, UPKHI)
2111 VUPK(hsh, s32, s16, UPKHI)
2112 VUPK(hsw, s64, s32, UPKHI)
2113 VUPK(lsb, s16, s8, UPKLO)
2114 VUPK(lsh, s32, s16, UPKLO)
2115 VUPK(lsw, s64, s32, UPKLO)
2116 #undef VUPK
2117 #undef UPKHI
2118 #undef UPKLO
2120 #define VGENERIC_DO(name, element) \
2121 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2123 int i; \
2125 VECTOR_FOR_INORDER_I(i, element) { \
2126 r->element[i] = name(b->element[i]); \
2130 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2131 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2132 #define clzw(v) clz32((v))
2133 #define clzd(v) clz64((v))
2135 VGENERIC_DO(clzb, u8)
2136 VGENERIC_DO(clzh, u16)
2137 VGENERIC_DO(clzw, u32)
2138 VGENERIC_DO(clzd, u64)
2140 #undef clzb
2141 #undef clzh
2142 #undef clzw
2143 #undef clzd
2145 #define ctzb(v) ((v) ? ctz32(v) : 8)
2146 #define ctzh(v) ((v) ? ctz32(v) : 16)
2147 #define ctzw(v) ctz32((v))
2148 #define ctzd(v) ctz64((v))
2150 VGENERIC_DO(ctzb, u8)
2151 VGENERIC_DO(ctzh, u16)
2152 VGENERIC_DO(ctzw, u32)
2153 VGENERIC_DO(ctzd, u64)
2155 #undef ctzb
2156 #undef ctzh
2157 #undef ctzw
2158 #undef ctzd
2160 #define popcntb(v) ctpop8(v)
2161 #define popcnth(v) ctpop16(v)
2162 #define popcntw(v) ctpop32(v)
2163 #define popcntd(v) ctpop64(v)
2165 VGENERIC_DO(popcntb, u8)
2166 VGENERIC_DO(popcnth, u16)
2167 VGENERIC_DO(popcntw, u32)
2168 VGENERIC_DO(popcntd, u64)
2170 #undef popcntb
2171 #undef popcnth
2172 #undef popcntw
2173 #undef popcntd
2175 #undef VGENERIC_DO
2177 #if defined(HOST_WORDS_BIGENDIAN)
2178 #define QW_ONE { .u64 = { 0, 1 } }
2179 #else
2180 #define QW_ONE { .u64 = { 1, 0 } }
2181 #endif
2183 #ifndef CONFIG_INT128
2185 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2187 t->u64[0] = ~a.u64[0];
2188 t->u64[1] = ~a.u64[1];
2191 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2193 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2194 return -1;
2195 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2196 return 1;
2197 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2198 return -1;
2199 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2200 return 1;
2201 } else {
2202 return 0;
2206 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2208 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2209 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2210 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2213 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2215 ppc_avr_t not_a;
2216 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2217 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2218 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2219 avr_qw_not(&not_a, a);
2220 return avr_qw_cmpu(not_a, b) < 0;
2223 #endif
2225 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2227 #ifdef CONFIG_INT128
2228 r->u128 = a->u128 + b->u128;
2229 #else
2230 avr_qw_add(r, *a, *b);
2231 #endif
2234 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2236 #ifdef CONFIG_INT128
2237 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2238 #else
2240 if (c->u64[LO_IDX] & 1) {
2241 ppc_avr_t tmp;
2243 tmp.u64[HI_IDX] = 0;
2244 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2245 avr_qw_add(&tmp, *a, tmp);
2246 avr_qw_add(r, tmp, *b);
2247 } else {
2248 avr_qw_add(r, *a, *b);
2250 #endif
2253 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2255 #ifdef CONFIG_INT128
2256 r->u128 = (~a->u128 < b->u128);
2257 #else
2258 ppc_avr_t not_a;
2260 avr_qw_not(&not_a, *a);
2262 r->u64[HI_IDX] = 0;
2263 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2264 #endif
2267 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2269 #ifdef CONFIG_INT128
2270 int carry_out = (~a->u128 < b->u128);
2271 if (!carry_out && (c->u128 & 1)) {
2272 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2273 ((a->u128 != 0) || (b->u128 != 0));
2275 r->u128 = carry_out;
2276 #else
2278 int carry_in = c->u64[LO_IDX] & 1;
2279 int carry_out = 0;
2280 ppc_avr_t tmp;
2282 carry_out = avr_qw_addc(&tmp, *a, *b);
2284 if (!carry_out && carry_in) {
2285 ppc_avr_t one = QW_ONE;
2286 carry_out = avr_qw_addc(&tmp, tmp, one);
2288 r->u64[HI_IDX] = 0;
2289 r->u64[LO_IDX] = carry_out;
2290 #endif
2293 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2295 #ifdef CONFIG_INT128
2296 r->u128 = a->u128 - b->u128;
2297 #else
2298 ppc_avr_t tmp;
2299 ppc_avr_t one = QW_ONE;
2301 avr_qw_not(&tmp, *b);
2302 avr_qw_add(&tmp, *a, tmp);
2303 avr_qw_add(r, tmp, one);
2304 #endif
2307 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2309 #ifdef CONFIG_INT128
2310 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2311 #else
2312 ppc_avr_t tmp, sum;
2314 avr_qw_not(&tmp, *b);
2315 avr_qw_add(&sum, *a, tmp);
2317 tmp.u64[HI_IDX] = 0;
2318 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2319 avr_qw_add(r, sum, tmp);
2320 #endif
2323 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2325 #ifdef CONFIG_INT128
2326 r->u128 = (~a->u128 < ~b->u128) ||
2327 (a->u128 + ~b->u128 == (__uint128_t)-1);
2328 #else
2329 int carry = (avr_qw_cmpu(*a, *b) > 0);
2330 if (!carry) {
2331 ppc_avr_t tmp;
2332 avr_qw_not(&tmp, *b);
2333 avr_qw_add(&tmp, *a, tmp);
2334 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2336 r->u64[HI_IDX] = 0;
2337 r->u64[LO_IDX] = carry;
2338 #endif
2341 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2343 #ifdef CONFIG_INT128
2344 r->u128 =
2345 (~a->u128 < ~b->u128) ||
2346 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2347 #else
2348 int carry_in = c->u64[LO_IDX] & 1;
2349 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2350 if (!carry_out && carry_in) {
2351 ppc_avr_t tmp;
2352 avr_qw_not(&tmp, *b);
2353 avr_qw_add(&tmp, *a, tmp);
2354 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2357 r->u64[HI_IDX] = 0;
2358 r->u64[LO_IDX] = carry_out;
2359 #endif
2362 #define BCD_PLUS_PREF_1 0xC
2363 #define BCD_PLUS_PREF_2 0xF
2364 #define BCD_PLUS_ALT_1 0xA
2365 #define BCD_NEG_PREF 0xD
2366 #define BCD_NEG_ALT 0xB
2367 #define BCD_PLUS_ALT_2 0xE
2369 #if defined(HOST_WORDS_BIGENDIAN)
2370 #define BCD_DIG_BYTE(n) (15 - (n/2))
2371 #else
2372 #define BCD_DIG_BYTE(n) (n/2)
2373 #endif
2375 static int bcd_get_sgn(ppc_avr_t *bcd)
2377 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2378 case BCD_PLUS_PREF_1:
2379 case BCD_PLUS_PREF_2:
2380 case BCD_PLUS_ALT_1:
2381 case BCD_PLUS_ALT_2:
2383 return 1;
2386 case BCD_NEG_PREF:
2387 case BCD_NEG_ALT:
2389 return -1;
2392 default:
2394 return 0;
2399 static int bcd_preferred_sgn(int sgn, int ps)
2401 if (sgn >= 0) {
2402 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2403 } else {
2404 return BCD_NEG_PREF;
2408 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2410 uint8_t result;
2411 if (n & 1) {
2412 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2413 } else {
2414 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2417 if (unlikely(result > 9)) {
2418 *invalid = true;
2420 return result;
2423 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2425 if (n & 1) {
2426 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2427 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2428 } else {
2429 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2430 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2434 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2436 int i;
2437 int invalid = 0;
2438 for (i = 31; i > 0; i--) {
2439 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2440 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2441 if (unlikely(invalid)) {
2442 return 0; /* doesn't matter */
2443 } else if (dig_a > dig_b) {
2444 return 1;
2445 } else if (dig_a < dig_b) {
2446 return -1;
2450 return 0;
2453 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2454 int *overflow)
2456 int carry = 0;
2457 int i;
2458 int is_zero = 1;
2459 for (i = 1; i <= 31; i++) {
2460 uint8_t digit = bcd_get_digit(a, i, invalid) +
2461 bcd_get_digit(b, i, invalid) + carry;
2462 is_zero &= (digit == 0);
2463 if (digit > 9) {
2464 carry = 1;
2465 digit -= 10;
2466 } else {
2467 carry = 0;
2470 bcd_put_digit(t, digit, i);
2472 if (unlikely(*invalid)) {
2473 return -1;
2477 *overflow = carry;
2478 return is_zero;
2481 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2482 int *overflow)
2484 int carry = 0;
2485 int i;
2486 int is_zero = 1;
2487 for (i = 1; i <= 31; i++) {
2488 uint8_t digit = bcd_get_digit(a, i, invalid) -
2489 bcd_get_digit(b, i, invalid) + carry;
2490 is_zero &= (digit == 0);
2491 if (digit & 0x80) {
2492 carry = -1;
2493 digit += 10;
2494 } else {
2495 carry = 0;
2498 bcd_put_digit(t, digit, i);
2500 if (unlikely(*invalid)) {
2501 return -1;
2505 *overflow = carry;
2506 return is_zero;
2509 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2512 int sgna = bcd_get_sgn(a);
2513 int sgnb = bcd_get_sgn(b);
2514 int invalid = (sgna == 0) || (sgnb == 0);
2515 int overflow = 0;
2516 int zero = 0;
2517 uint32_t cr = 0;
2518 ppc_avr_t result = { .u64 = { 0, 0 } };
2520 if (!invalid) {
2521 if (sgna == sgnb) {
2522 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2523 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2524 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2525 } else if (bcd_cmp_mag(a, b) > 0) {
2526 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2527 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2528 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2529 } else {
2530 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2531 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2532 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2536 if (unlikely(invalid)) {
2537 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2538 cr = 1 << CRF_SO;
2539 } else if (overflow) {
2540 cr |= 1 << CRF_SO;
2541 } else if (zero) {
2542 cr = 1 << CRF_EQ;
2545 *r = result;
2547 return cr;
2550 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2552 ppc_avr_t bcopy = *b;
2553 int sgnb = bcd_get_sgn(b);
2554 if (sgnb < 0) {
2555 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2556 } else if (sgnb > 0) {
2557 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2559 /* else invalid ... defer to bcdadd code for proper handling */
2561 return helper_bcdadd(r, a, &bcopy, ps);
2564 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2566 int i;
2567 VECTOR_FOR_INORDER_I(i, u8) {
2568 r->u8[i] = AES_sbox[a->u8[i]];
2572 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2574 ppc_avr_t result;
2575 int i;
2577 VECTOR_FOR_INORDER_I(i, u32) {
2578 result.AVRW(i) = b->AVRW(i) ^
2579 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2580 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2581 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2582 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2584 *r = result;
2587 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2589 ppc_avr_t result;
2590 int i;
2592 VECTOR_FOR_INORDER_I(i, u8) {
2593 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2595 *r = result;
2598 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2600 /* This differs from what is written in ISA V2.07. The RTL is */
2601 /* incorrect and will be fixed in V2.07B. */
2602 int i;
2603 ppc_avr_t tmp;
2605 VECTOR_FOR_INORDER_I(i, u8) {
2606 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2609 VECTOR_FOR_INORDER_I(i, u32) {
2610 r->AVRW(i) =
2611 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2612 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2613 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2614 AES_imc[tmp.AVRB(4*i + 3)][3];
2618 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2620 ppc_avr_t result;
2621 int i;
2623 VECTOR_FOR_INORDER_I(i, u8) {
2624 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2626 *r = result;
2629 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2630 #if defined(HOST_WORDS_BIGENDIAN)
2631 #define EL_IDX(i) (i)
2632 #else
2633 #define EL_IDX(i) (3 - (i))
2634 #endif
2636 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2638 int st = (st_six & 0x10) != 0;
2639 int six = st_six & 0xF;
2640 int i;
2642 VECTOR_FOR_INORDER_I(i, u32) {
2643 if (st == 0) {
2644 if ((six & (0x8 >> i)) == 0) {
2645 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2646 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2647 (a->u32[EL_IDX(i)] >> 3);
2648 } else { /* six.bit[i] == 1 */
2649 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2650 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2651 (a->u32[EL_IDX(i)] >> 10);
2653 } else { /* st == 1 */
2654 if ((six & (0x8 >> i)) == 0) {
2655 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2656 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2657 ROTRu32(a->u32[EL_IDX(i)], 22);
2658 } else { /* six.bit[i] == 1 */
2659 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2660 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2661 ROTRu32(a->u32[EL_IDX(i)], 25);
2667 #undef ROTRu32
2668 #undef EL_IDX
2670 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2671 #if defined(HOST_WORDS_BIGENDIAN)
2672 #define EL_IDX(i) (i)
2673 #else
2674 #define EL_IDX(i) (1 - (i))
2675 #endif
2677 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2679 int st = (st_six & 0x10) != 0;
2680 int six = st_six & 0xF;
2681 int i;
2683 VECTOR_FOR_INORDER_I(i, u64) {
2684 if (st == 0) {
2685 if ((six & (0x8 >> (2*i))) == 0) {
2686 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2687 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2688 (a->u64[EL_IDX(i)] >> 7);
2689 } else { /* six.bit[2*i] == 1 */
2690 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2691 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2692 (a->u64[EL_IDX(i)] >> 6);
2694 } else { /* st == 1 */
2695 if ((six & (0x8 >> (2*i))) == 0) {
2696 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2697 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2698 ROTRu64(a->u64[EL_IDX(i)], 39);
2699 } else { /* six.bit[2*i] == 1 */
2700 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2701 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2702 ROTRu64(a->u64[EL_IDX(i)], 41);
2708 #undef ROTRu64
2709 #undef EL_IDX
2711 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2713 ppc_avr_t result;
2714 int i;
2716 VECTOR_FOR_INORDER_I(i, u8) {
2717 int indexA = c->u8[i] >> 4;
2718 int indexB = c->u8[i] & 0xF;
2719 #if defined(HOST_WORDS_BIGENDIAN)
2720 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2721 #else
2722 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2723 #endif
2725 *r = result;
2728 #undef VECTOR_FOR_INORDER_I
2729 #undef HI_IDX
2730 #undef LO_IDX
2732 /*****************************************************************************/
2733 /* SPE extension helpers */
2734 /* Use a table to make this quicker */
2735 static const uint8_t hbrev[16] = {
2736 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2737 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2740 static inline uint8_t byte_reverse(uint8_t val)
2742 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2745 static inline uint32_t word_reverse(uint32_t val)
2747 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2748 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2751 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2752 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2754 uint32_t a, b, d, mask;
2756 mask = UINT32_MAX >> (32 - MASKBITS);
2757 a = arg1 & mask;
2758 b = arg2 & mask;
2759 d = word_reverse(1 + word_reverse(a | ~b));
2760 return (arg1 & ~mask) | (d & b);
2763 uint32_t helper_cntlsw32(uint32_t val)
2765 if (val & 0x80000000) {
2766 return clz32(~val);
2767 } else {
2768 return clz32(val);
2772 uint32_t helper_cntlzw32(uint32_t val)
2774 return clz32(val);
2777 /* 440 specific */
2778 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2779 target_ulong low, uint32_t update_Rc)
2781 target_ulong mask;
2782 int i;
2784 i = 1;
2785 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2786 if ((high & mask) == 0) {
2787 if (update_Rc) {
2788 env->crf[0] = 0x4;
2790 goto done;
2792 i++;
2794 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2795 if ((low & mask) == 0) {
2796 if (update_Rc) {
2797 env->crf[0] = 0x8;
2799 goto done;
2801 i++;
2803 i = 8;
2804 if (update_Rc) {
2805 env->crf[0] = 0x2;
2807 done:
2808 env->xer = (env->xer & ~0x7F) | i;
2809 if (update_Rc) {
2810 env->crf[0] |= xer_so;
2812 return i;