target-ppc: add vector bit permute doubleword instruction
[qemu/ar7.git] / target-ppc / int_helper.c
blobb12af9587782b18e7d3b915f9cccd52e1044d775
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "exec/exec-all.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
26 #include "helper_regs.h"
27 /*****************************************************************************/
28 /* Fixed point operations helpers */
30 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
33 uint64_t rt = 0;
34 int overflow = 0;
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
58 return (target_ulong)rt;
61 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
64 int64_t rt = 0;
65 int overflow = 0;
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
90 return (target_ulong)rt;
93 #if defined(TARGET_PPC64)
95 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
97 uint64_t rt = 0;
98 int overflow = 0;
100 overflow = divu128(&rt, &ra, rb);
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
114 return rt;
117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
128 if (oe) {
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
137 return rt;
140 #endif
143 target_ulong helper_cntlzw(target_ulong t)
145 return clz32(t);
148 target_ulong helper_cnttzw(target_ulong t)
150 return ctz32(t);
153 #if defined(TARGET_PPC64)
154 /* if x = 0xab, returns 0xababababababababa */
155 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
157 /* substract 1 from each byte, and with inverse, check if MSB is set at each
158 * byte.
159 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
160 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
162 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
164 /* When you XOR the pattern and there is a match, that byte will be zero */
165 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
167 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
169 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
172 #undef pattern
173 #undef haszero
174 #undef hasvalue
176 target_ulong helper_cntlzd(target_ulong t)
178 return clz64(t);
181 target_ulong helper_cnttzd(target_ulong t)
183 return ctz64(t);
185 #endif
187 #if defined(TARGET_PPC64)
189 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
191 int i;
192 uint64_t ra = 0;
194 for (i = 0; i < 8; i++) {
195 int index = (rs >> (i*8)) & 0xFF;
196 if (index < 64) {
197 if (rb & (1ull << (63-index))) {
198 ra |= 1 << i;
202 return ra;
205 #endif
207 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
209 target_ulong mask = 0xff;
210 target_ulong ra = 0;
211 int i;
213 for (i = 0; i < sizeof(target_ulong); i++) {
214 if ((rs & mask) == (rb & mask)) {
215 ra |= mask;
217 mask <<= 8;
219 return ra;
222 /* shift right arithmetic helper */
223 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
224 target_ulong shift)
226 int32_t ret;
228 if (likely(!(shift & 0x20))) {
229 if (likely((uint32_t)shift != 0)) {
230 shift &= 0x1f;
231 ret = (int32_t)value >> shift;
232 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
233 env->ca = 0;
234 } else {
235 env->ca = 1;
237 } else {
238 ret = (int32_t)value;
239 env->ca = 0;
241 } else {
242 ret = (int32_t)value >> 31;
243 env->ca = (ret != 0);
245 return (target_long)ret;
248 #if defined(TARGET_PPC64)
249 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
250 target_ulong shift)
252 int64_t ret;
254 if (likely(!(shift & 0x40))) {
255 if (likely((uint64_t)shift != 0)) {
256 shift &= 0x3f;
257 ret = (int64_t)value >> shift;
258 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
259 env->ca = 0;
260 } else {
261 env->ca = 1;
263 } else {
264 ret = (int64_t)value;
265 env->ca = 0;
267 } else {
268 ret = (int64_t)value >> 63;
269 env->ca = (ret != 0);
271 return ret;
273 #endif
275 #if defined(TARGET_PPC64)
276 target_ulong helper_popcntb(target_ulong val)
278 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
279 0x5555555555555555ULL);
280 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
281 0x3333333333333333ULL);
282 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
283 0x0f0f0f0f0f0f0f0fULL);
284 return val;
287 target_ulong helper_popcntw(target_ulong val)
289 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
290 0x5555555555555555ULL);
291 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
292 0x3333333333333333ULL);
293 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
294 0x0f0f0f0f0f0f0f0fULL);
295 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
296 0x00ff00ff00ff00ffULL);
297 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
298 0x0000ffff0000ffffULL);
299 return val;
302 target_ulong helper_popcntd(target_ulong val)
304 return ctpop64(val);
306 #else
307 target_ulong helper_popcntb(target_ulong val)
309 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
310 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
311 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
312 return val;
315 target_ulong helper_popcntw(target_ulong val)
317 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
318 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
319 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
320 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
321 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
322 return val;
324 #endif
326 /*****************************************************************************/
327 /* PowerPC 601 specific instructions (POWER bridge) */
328 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333 (int32_t)arg2 == 0) {
334 env->spr[SPR_MQ] = 0;
335 return INT32_MIN;
336 } else {
337 env->spr[SPR_MQ] = tmp % arg2;
338 return tmp / (int32_t)arg2;
342 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
343 target_ulong arg2)
345 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
347 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
348 (int32_t)arg2 == 0) {
349 env->so = env->ov = 1;
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->spr[SPR_MQ] = tmp % arg2;
354 tmp /= (int32_t)arg2;
355 if ((int32_t)tmp != tmp) {
356 env->so = env->ov = 1;
357 } else {
358 env->ov = 0;
360 return tmp;
364 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
365 target_ulong arg2)
367 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
368 (int32_t)arg2 == 0) {
369 env->spr[SPR_MQ] = 0;
370 return INT32_MIN;
371 } else {
372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373 return (int32_t)arg1 / (int32_t)arg2;
377 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
378 target_ulong arg2)
380 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
381 (int32_t)arg2 == 0) {
382 env->so = env->ov = 1;
383 env->spr[SPR_MQ] = 0;
384 return INT32_MIN;
385 } else {
386 env->ov = 0;
387 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
388 return (int32_t)arg1 / (int32_t)arg2;
392 /*****************************************************************************/
393 /* 602 specific instructions */
394 /* mfrom is the most crazy instruction ever seen, imho ! */
395 /* Real implementation uses a ROM table. Do the same */
396 /* Extremely decomposed:
397 * -arg / 256
398 * return 256 * log10(10 + 1.0) + 0.5
400 #if !defined(CONFIG_USER_ONLY)
401 target_ulong helper_602_mfrom(target_ulong arg)
403 if (likely(arg < 602)) {
404 #include "mfrom_table.c"
405 return mfrom_ROM_table[arg];
406 } else {
407 return 0;
410 #endif
412 /*****************************************************************************/
413 /* Altivec extension helpers */
414 #if defined(HOST_WORDS_BIGENDIAN)
415 #define HI_IDX 0
416 #define LO_IDX 1
417 #define AVRB(i) u8[i]
418 #define AVRW(i) u32[i]
419 #else
420 #define HI_IDX 1
421 #define LO_IDX 0
422 #define AVRB(i) u8[15-(i)]
423 #define AVRW(i) u32[3-(i)]
424 #endif
426 #if defined(HOST_WORDS_BIGENDIAN)
427 #define VECTOR_FOR_INORDER_I(index, element) \
428 for (index = 0; index < ARRAY_SIZE(r->element); index++)
429 #else
430 #define VECTOR_FOR_INORDER_I(index, element) \
431 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
432 #endif
434 /* Saturating arithmetic helpers. */
435 #define SATCVT(from, to, from_type, to_type, min, max) \
436 static inline to_type cvt##from##to(from_type x, int *sat) \
438 to_type r; \
440 if (x < (from_type)min) { \
441 r = min; \
442 *sat = 1; \
443 } else if (x > (from_type)max) { \
444 r = max; \
445 *sat = 1; \
446 } else { \
447 r = x; \
449 return r; \
451 #define SATCVTU(from, to, from_type, to_type, min, max) \
452 static inline to_type cvt##from##to(from_type x, int *sat) \
454 to_type r; \
456 if (x > (from_type)max) { \
457 r = max; \
458 *sat = 1; \
459 } else { \
460 r = x; \
462 return r; \
464 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
465 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
466 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
468 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
469 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
470 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
471 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
472 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
473 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
474 #undef SATCVT
475 #undef SATCVTU
477 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
479 int i, j = (sh & 0xf);
481 VECTOR_FOR_INORDER_I(i, u8) {
482 r->u8[i] = j++;
486 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
488 int i, j = 0x10 - (sh & 0xf);
490 VECTOR_FOR_INORDER_I(i, u8) {
491 r->u8[i] = j++;
495 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
497 #if defined(HOST_WORDS_BIGENDIAN)
498 env->vscr = r->u32[3];
499 #else
500 env->vscr = r->u32[0];
501 #endif
502 set_flush_to_zero(vscr_nj, &env->vec_status);
505 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
507 int i;
509 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
510 r->u32[i] = ~a->u32[i] < b->u32[i];
514 #define VARITH_DO(name, op, element) \
515 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
517 int i; \
519 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
520 r->element[i] = a->element[i] op b->element[i]; \
523 #define VARITH(suffix, element) \
524 VARITH_DO(add##suffix, +, element) \
525 VARITH_DO(sub##suffix, -, element)
526 VARITH(ubm, u8)
527 VARITH(uhm, u16)
528 VARITH(uwm, u32)
529 VARITH(udm, u64)
530 VARITH_DO(muluwm, *, u32)
531 #undef VARITH_DO
532 #undef VARITH
534 #define VARITHFP(suffix, func) \
535 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
536 ppc_avr_t *b) \
538 int i; \
540 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
541 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
544 VARITHFP(addfp, float32_add)
545 VARITHFP(subfp, float32_sub)
546 VARITHFP(minfp, float32_min)
547 VARITHFP(maxfp, float32_max)
548 #undef VARITHFP
550 #define VARITHFPFMA(suffix, type) \
551 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 ppc_avr_t *b, ppc_avr_t *c) \
554 int i; \
555 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
556 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
557 type, &env->vec_status); \
560 VARITHFPFMA(maddfp, 0);
561 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
562 #undef VARITHFPFMA
564 #define VARITHSAT_CASE(type, op, cvt, element) \
566 type result = (type)a->element[i] op (type)b->element[i]; \
567 r->element[i] = cvt(result, &sat); \
570 #define VARITHSAT_DO(name, op, optype, cvt, element) \
571 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
572 ppc_avr_t *b) \
574 int sat = 0; \
575 int i; \
577 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
578 switch (sizeof(r->element[0])) { \
579 case 1: \
580 VARITHSAT_CASE(optype, op, cvt, element); \
581 break; \
582 case 2: \
583 VARITHSAT_CASE(optype, op, cvt, element); \
584 break; \
585 case 4: \
586 VARITHSAT_CASE(optype, op, cvt, element); \
587 break; \
590 if (sat) { \
591 env->vscr |= (1 << VSCR_SAT); \
594 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
595 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
596 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
597 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
598 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
599 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
600 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
601 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
602 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
603 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
604 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
605 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
606 #undef VARITHSAT_CASE
607 #undef VARITHSAT_DO
608 #undef VARITHSAT_SIGNED
609 #undef VARITHSAT_UNSIGNED
611 #define VAVG_DO(name, element, etype) \
612 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
614 int i; \
616 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
617 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
618 r->element[i] = x >> 1; \
622 #define VAVG(type, signed_element, signed_type, unsigned_element, \
623 unsigned_type) \
624 VAVG_DO(avgs##type, signed_element, signed_type) \
625 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
626 VAVG(b, s8, int16_t, u8, uint16_t)
627 VAVG(h, s16, int32_t, u16, uint32_t)
628 VAVG(w, s32, int64_t, u32, uint64_t)
629 #undef VAVG_DO
630 #undef VAVG
632 #define VABSDU_DO(name, element) \
633 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
635 int i; \
637 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
638 r->element[i] = (a->element[i] > b->element[i]) ? \
639 (a->element[i] - b->element[i]) : \
640 (b->element[i] - a->element[i]); \
644 /* VABSDU - Vector absolute difference unsigned
645 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
646 * element - element type to access from vector
648 #define VABSDU(type, element) \
649 VABSDU_DO(absdu##type, element)
650 VABSDU(b, u8)
651 VABSDU(h, u16)
652 VABSDU(w, u32)
653 #undef VABSDU_DO
654 #undef VABSDU
656 #define VCF(suffix, cvt, element) \
657 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
658 ppc_avr_t *b, uint32_t uim) \
660 int i; \
662 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
663 float32 t = cvt(b->element[i], &env->vec_status); \
664 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
667 VCF(ux, uint32_to_float32, u32)
668 VCF(sx, int32_to_float32, s32)
669 #undef VCF
671 #define VCMP_DO(suffix, compare, element, record) \
672 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
673 ppc_avr_t *a, ppc_avr_t *b) \
675 uint64_t ones = (uint64_t)-1; \
676 uint64_t all = ones; \
677 uint64_t none = 0; \
678 int i; \
680 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
681 uint64_t result = (a->element[i] compare b->element[i] ? \
682 ones : 0x0); \
683 switch (sizeof(a->element[0])) { \
684 case 8: \
685 r->u64[i] = result; \
686 break; \
687 case 4: \
688 r->u32[i] = result; \
689 break; \
690 case 2: \
691 r->u16[i] = result; \
692 break; \
693 case 1: \
694 r->u8[i] = result; \
695 break; \
697 all &= result; \
698 none |= result; \
700 if (record) { \
701 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
704 #define VCMP(suffix, compare, element) \
705 VCMP_DO(suffix, compare, element, 0) \
706 VCMP_DO(suffix##_dot, compare, element, 1)
707 VCMP(equb, ==, u8)
708 VCMP(equh, ==, u16)
709 VCMP(equw, ==, u32)
710 VCMP(equd, ==, u64)
711 VCMP(gtub, >, u8)
712 VCMP(gtuh, >, u16)
713 VCMP(gtuw, >, u32)
714 VCMP(gtud, >, u64)
715 VCMP(gtsb, >, s8)
716 VCMP(gtsh, >, s16)
717 VCMP(gtsw, >, s32)
718 VCMP(gtsd, >, s64)
719 #undef VCMP_DO
720 #undef VCMP
722 #define VCMPNEZ_DO(suffix, element, etype, record) \
723 void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
724 ppc_avr_t *a, ppc_avr_t *b) \
726 etype ones = (etype)-1; \
727 etype all = ones; \
728 etype none = 0; \
729 int i; \
731 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
732 etype result = ((a->element[i] == 0) \
733 || (b->element[i] == 0) \
734 || (a->element[i] != b->element[i]) ? \
735 ones : 0x0); \
736 r->element[i] = result; \
737 all &= result; \
738 none |= result; \
740 if (record) { \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
745 /* VCMPNEZ - Vector compare not equal to zero
746 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
747 * element - element type to access from vector
749 #define VCMPNEZ(suffix, element, etype) \
750 VCMPNEZ_DO(suffix, element, etype, 0) \
751 VCMPNEZ_DO(suffix##_dot, element, etype, 1)
752 VCMPNEZ(b, u8, uint8_t)
753 VCMPNEZ(h, u16, uint16_t)
754 VCMPNEZ(w, u32, uint32_t)
755 #undef VCMPNEZ_DO
756 #undef VCMPNEZ
758 #define VCMPFP_DO(suffix, compare, order, record) \
759 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
760 ppc_avr_t *a, ppc_avr_t *b) \
762 uint32_t ones = (uint32_t)-1; \
763 uint32_t all = ones; \
764 uint32_t none = 0; \
765 int i; \
767 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
768 uint32_t result; \
769 int rel = float32_compare_quiet(a->f[i], b->f[i], \
770 &env->vec_status); \
771 if (rel == float_relation_unordered) { \
772 result = 0; \
773 } else if (rel compare order) { \
774 result = ones; \
775 } else { \
776 result = 0; \
778 r->u32[i] = result; \
779 all &= result; \
780 none |= result; \
782 if (record) { \
783 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
786 #define VCMPFP(suffix, compare, order) \
787 VCMPFP_DO(suffix, compare, order, 0) \
788 VCMPFP_DO(suffix##_dot, compare, order, 1)
789 VCMPFP(eqfp, ==, float_relation_equal)
790 VCMPFP(gefp, !=, float_relation_less)
791 VCMPFP(gtfp, ==, float_relation_greater)
792 #undef VCMPFP_DO
793 #undef VCMPFP
795 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
796 ppc_avr_t *a, ppc_avr_t *b, int record)
798 int i;
799 int all_in = 0;
801 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
802 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
803 if (le_rel == float_relation_unordered) {
804 r->u32[i] = 0xc0000000;
805 all_in = 1;
806 } else {
807 float32 bneg = float32_chs(b->f[i]);
808 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
809 int le = le_rel != float_relation_greater;
810 int ge = ge_rel != float_relation_less;
812 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
813 all_in |= (!le | !ge);
816 if (record) {
817 env->crf[6] = (all_in == 0) << 1;
821 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
823 vcmpbfp_internal(env, r, a, b, 0);
826 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
827 ppc_avr_t *b)
829 vcmpbfp_internal(env, r, a, b, 1);
832 #define VCT(suffix, satcvt, element) \
833 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
834 ppc_avr_t *b, uint32_t uim) \
836 int i; \
837 int sat = 0; \
838 float_status s = env->vec_status; \
840 set_float_rounding_mode(float_round_to_zero, &s); \
841 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
842 if (float32_is_any_nan(b->f[i])) { \
843 r->element[i] = 0; \
844 } else { \
845 float64 t = float32_to_float64(b->f[i], &s); \
846 int64_t j; \
848 t = float64_scalbn(t, uim, &s); \
849 j = float64_to_int64(t, &s); \
850 r->element[i] = satcvt(j, &sat); \
853 if (sat) { \
854 env->vscr |= (1 << VSCR_SAT); \
857 VCT(uxs, cvtsduw, u32)
858 VCT(sxs, cvtsdsw, s32)
859 #undef VCT
861 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
862 ppc_avr_t *b, ppc_avr_t *c)
864 int sat = 0;
865 int i;
867 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
868 int32_t prod = a->s16[i] * b->s16[i];
869 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
871 r->s16[i] = cvtswsh(t, &sat);
874 if (sat) {
875 env->vscr |= (1 << VSCR_SAT);
879 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
880 ppc_avr_t *b, ppc_avr_t *c)
882 int sat = 0;
883 int i;
885 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
886 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
887 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
888 r->s16[i] = cvtswsh(t, &sat);
891 if (sat) {
892 env->vscr |= (1 << VSCR_SAT);
896 #define VMINMAX_DO(name, compare, element) \
897 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
899 int i; \
901 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
902 if (a->element[i] compare b->element[i]) { \
903 r->element[i] = b->element[i]; \
904 } else { \
905 r->element[i] = a->element[i]; \
909 #define VMINMAX(suffix, element) \
910 VMINMAX_DO(min##suffix, >, element) \
911 VMINMAX_DO(max##suffix, <, element)
912 VMINMAX(sb, s8)
913 VMINMAX(sh, s16)
914 VMINMAX(sw, s32)
915 VMINMAX(sd, s64)
916 VMINMAX(ub, u8)
917 VMINMAX(uh, u16)
918 VMINMAX(uw, u32)
919 VMINMAX(ud, u64)
920 #undef VMINMAX_DO
921 #undef VMINMAX
923 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
925 int i;
927 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
928 int32_t prod = a->s16[i] * b->s16[i];
929 r->s16[i] = (int16_t) (prod + c->s16[i]);
933 #define VMRG_DO(name, element, highp) \
934 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
936 ppc_avr_t result; \
937 int i; \
938 size_t n_elems = ARRAY_SIZE(r->element); \
940 for (i = 0; i < n_elems / 2; i++) { \
941 if (highp) { \
942 result.element[i*2+HI_IDX] = a->element[i]; \
943 result.element[i*2+LO_IDX] = b->element[i]; \
944 } else { \
945 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
946 b->element[n_elems - i - 1]; \
947 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
948 a->element[n_elems - i - 1]; \
951 *r = result; \
953 #if defined(HOST_WORDS_BIGENDIAN)
954 #define MRGHI 0
955 #define MRGLO 1
956 #else
957 #define MRGHI 1
958 #define MRGLO 0
959 #endif
960 #define VMRG(suffix, element) \
961 VMRG_DO(mrgl##suffix, element, MRGHI) \
962 VMRG_DO(mrgh##suffix, element, MRGLO)
963 VMRG(b, u8)
964 VMRG(h, u16)
965 VMRG(w, u32)
966 #undef VMRG_DO
967 #undef VMRG
968 #undef MRGHI
969 #undef MRGLO
971 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
972 ppc_avr_t *b, ppc_avr_t *c)
974 int32_t prod[16];
975 int i;
977 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
978 prod[i] = (int32_t)a->s8[i] * b->u8[i];
981 VECTOR_FOR_INORDER_I(i, s32) {
982 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
983 prod[4 * i + 2] + prod[4 * i + 3];
987 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
988 ppc_avr_t *b, ppc_avr_t *c)
990 int32_t prod[8];
991 int i;
993 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
994 prod[i] = a->s16[i] * b->s16[i];
997 VECTOR_FOR_INORDER_I(i, s32) {
998 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1002 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1003 ppc_avr_t *b, ppc_avr_t *c)
1005 int32_t prod[8];
1006 int i;
1007 int sat = 0;
1009 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1010 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1013 VECTOR_FOR_INORDER_I(i, s32) {
1014 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1016 r->u32[i] = cvtsdsw(t, &sat);
1019 if (sat) {
1020 env->vscr |= (1 << VSCR_SAT);
1024 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1025 ppc_avr_t *b, ppc_avr_t *c)
1027 uint16_t prod[16];
1028 int i;
1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1031 prod[i] = a->u8[i] * b->u8[i];
1034 VECTOR_FOR_INORDER_I(i, u32) {
1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1036 prod[4 * i + 2] + prod[4 * i + 3];
1040 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1041 ppc_avr_t *b, ppc_avr_t *c)
1043 uint32_t prod[8];
1044 int i;
1046 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1047 prod[i] = a->u16[i] * b->u16[i];
1050 VECTOR_FOR_INORDER_I(i, u32) {
1051 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1055 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1056 ppc_avr_t *b, ppc_avr_t *c)
1058 uint32_t prod[8];
1059 int i;
1060 int sat = 0;
1062 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1063 prod[i] = a->u16[i] * b->u16[i];
1066 VECTOR_FOR_INORDER_I(i, s32) {
1067 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1069 r->u32[i] = cvtuduw(t, &sat);
1072 if (sat) {
1073 env->vscr |= (1 << VSCR_SAT);
1077 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1080 int i; \
1082 VECTOR_FOR_INORDER_I(i, prod_element) { \
1083 if (evenp) { \
1084 r->prod_element[i] = \
1085 (cast)a->mul_element[i * 2 + HI_IDX] * \
1086 (cast)b->mul_element[i * 2 + HI_IDX]; \
1087 } else { \
1088 r->prod_element[i] = \
1089 (cast)a->mul_element[i * 2 + LO_IDX] * \
1090 (cast)b->mul_element[i * 2 + LO_IDX]; \
1094 #define VMUL(suffix, mul_element, prod_element, cast) \
1095 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1096 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1097 VMUL(sb, s8, s16, int16_t)
1098 VMUL(sh, s16, s32, int32_t)
1099 VMUL(sw, s32, s64, int64_t)
1100 VMUL(ub, u8, u16, uint16_t)
1101 VMUL(uh, u16, u32, uint32_t)
1102 VMUL(uw, u32, u64, uint64_t)
1103 #undef VMUL_DO
1104 #undef VMUL
1106 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1107 ppc_avr_t *c)
1109 ppc_avr_t result;
1110 int i;
1112 VECTOR_FOR_INORDER_I(i, u8) {
1113 int s = c->u8[i] & 0x1f;
1114 #if defined(HOST_WORDS_BIGENDIAN)
1115 int index = s & 0xf;
1116 #else
1117 int index = 15 - (s & 0xf);
1118 #endif
1120 if (s & 0x10) {
1121 result.u8[i] = b->u8[index];
1122 } else {
1123 result.u8[i] = a->u8[index];
1126 *r = result;
1129 #if defined(HOST_WORDS_BIGENDIAN)
1130 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1131 #define VBPERMD_INDEX(i) (i)
1132 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1133 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1134 #else
1135 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1136 #define VBPERMD_INDEX(i) (1 - i)
1137 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1138 #define EXTRACT_BIT(avr, i, index) \
1139 (extract64((avr)->u64[1 - i], 63 - index, 1))
1140 #endif
1142 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1144 int i, j;
1145 ppc_avr_t result = { .u64 = { 0, 0 } };
1146 VECTOR_FOR_INORDER_I(i, u64) {
1147 for (j = 0; j < 8; j++) {
1148 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1149 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1150 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1154 *r = result;
1157 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1159 int i;
1160 uint64_t perm = 0;
1162 VECTOR_FOR_INORDER_I(i, u8) {
1163 int index = VBPERMQ_INDEX(b, i);
1165 if (index < 128) {
1166 uint64_t mask = (1ull << (63-(index & 0x3F)));
1167 if (a->u64[VBPERMQ_DW(index)] & mask) {
1168 perm |= (0x8000 >> i);
1173 r->u64[HI_IDX] = perm;
1174 r->u64[LO_IDX] = 0;
1177 #undef VBPERMQ_INDEX
1178 #undef VBPERMQ_DW
1180 static const uint64_t VGBBD_MASKS[256] = {
1181 0x0000000000000000ull, /* 00 */
1182 0x0000000000000080ull, /* 01 */
1183 0x0000000000008000ull, /* 02 */
1184 0x0000000000008080ull, /* 03 */
1185 0x0000000000800000ull, /* 04 */
1186 0x0000000000800080ull, /* 05 */
1187 0x0000000000808000ull, /* 06 */
1188 0x0000000000808080ull, /* 07 */
1189 0x0000000080000000ull, /* 08 */
1190 0x0000000080000080ull, /* 09 */
1191 0x0000000080008000ull, /* 0A */
1192 0x0000000080008080ull, /* 0B */
1193 0x0000000080800000ull, /* 0C */
1194 0x0000000080800080ull, /* 0D */
1195 0x0000000080808000ull, /* 0E */
1196 0x0000000080808080ull, /* 0F */
1197 0x0000008000000000ull, /* 10 */
1198 0x0000008000000080ull, /* 11 */
1199 0x0000008000008000ull, /* 12 */
1200 0x0000008000008080ull, /* 13 */
1201 0x0000008000800000ull, /* 14 */
1202 0x0000008000800080ull, /* 15 */
1203 0x0000008000808000ull, /* 16 */
1204 0x0000008000808080ull, /* 17 */
1205 0x0000008080000000ull, /* 18 */
1206 0x0000008080000080ull, /* 19 */
1207 0x0000008080008000ull, /* 1A */
1208 0x0000008080008080ull, /* 1B */
1209 0x0000008080800000ull, /* 1C */
1210 0x0000008080800080ull, /* 1D */
1211 0x0000008080808000ull, /* 1E */
1212 0x0000008080808080ull, /* 1F */
1213 0x0000800000000000ull, /* 20 */
1214 0x0000800000000080ull, /* 21 */
1215 0x0000800000008000ull, /* 22 */
1216 0x0000800000008080ull, /* 23 */
1217 0x0000800000800000ull, /* 24 */
1218 0x0000800000800080ull, /* 25 */
1219 0x0000800000808000ull, /* 26 */
1220 0x0000800000808080ull, /* 27 */
1221 0x0000800080000000ull, /* 28 */
1222 0x0000800080000080ull, /* 29 */
1223 0x0000800080008000ull, /* 2A */
1224 0x0000800080008080ull, /* 2B */
1225 0x0000800080800000ull, /* 2C */
1226 0x0000800080800080ull, /* 2D */
1227 0x0000800080808000ull, /* 2E */
1228 0x0000800080808080ull, /* 2F */
1229 0x0000808000000000ull, /* 30 */
1230 0x0000808000000080ull, /* 31 */
1231 0x0000808000008000ull, /* 32 */
1232 0x0000808000008080ull, /* 33 */
1233 0x0000808000800000ull, /* 34 */
1234 0x0000808000800080ull, /* 35 */
1235 0x0000808000808000ull, /* 36 */
1236 0x0000808000808080ull, /* 37 */
1237 0x0000808080000000ull, /* 38 */
1238 0x0000808080000080ull, /* 39 */
1239 0x0000808080008000ull, /* 3A */
1240 0x0000808080008080ull, /* 3B */
1241 0x0000808080800000ull, /* 3C */
1242 0x0000808080800080ull, /* 3D */
1243 0x0000808080808000ull, /* 3E */
1244 0x0000808080808080ull, /* 3F */
1245 0x0080000000000000ull, /* 40 */
1246 0x0080000000000080ull, /* 41 */
1247 0x0080000000008000ull, /* 42 */
1248 0x0080000000008080ull, /* 43 */
1249 0x0080000000800000ull, /* 44 */
1250 0x0080000000800080ull, /* 45 */
1251 0x0080000000808000ull, /* 46 */
1252 0x0080000000808080ull, /* 47 */
1253 0x0080000080000000ull, /* 48 */
1254 0x0080000080000080ull, /* 49 */
1255 0x0080000080008000ull, /* 4A */
1256 0x0080000080008080ull, /* 4B */
1257 0x0080000080800000ull, /* 4C */
1258 0x0080000080800080ull, /* 4D */
1259 0x0080000080808000ull, /* 4E */
1260 0x0080000080808080ull, /* 4F */
1261 0x0080008000000000ull, /* 50 */
1262 0x0080008000000080ull, /* 51 */
1263 0x0080008000008000ull, /* 52 */
1264 0x0080008000008080ull, /* 53 */
1265 0x0080008000800000ull, /* 54 */
1266 0x0080008000800080ull, /* 55 */
1267 0x0080008000808000ull, /* 56 */
1268 0x0080008000808080ull, /* 57 */
1269 0x0080008080000000ull, /* 58 */
1270 0x0080008080000080ull, /* 59 */
1271 0x0080008080008000ull, /* 5A */
1272 0x0080008080008080ull, /* 5B */
1273 0x0080008080800000ull, /* 5C */
1274 0x0080008080800080ull, /* 5D */
1275 0x0080008080808000ull, /* 5E */
1276 0x0080008080808080ull, /* 5F */
1277 0x0080800000000000ull, /* 60 */
1278 0x0080800000000080ull, /* 61 */
1279 0x0080800000008000ull, /* 62 */
1280 0x0080800000008080ull, /* 63 */
1281 0x0080800000800000ull, /* 64 */
1282 0x0080800000800080ull, /* 65 */
1283 0x0080800000808000ull, /* 66 */
1284 0x0080800000808080ull, /* 67 */
1285 0x0080800080000000ull, /* 68 */
1286 0x0080800080000080ull, /* 69 */
1287 0x0080800080008000ull, /* 6A */
1288 0x0080800080008080ull, /* 6B */
1289 0x0080800080800000ull, /* 6C */
1290 0x0080800080800080ull, /* 6D */
1291 0x0080800080808000ull, /* 6E */
1292 0x0080800080808080ull, /* 6F */
1293 0x0080808000000000ull, /* 70 */
1294 0x0080808000000080ull, /* 71 */
1295 0x0080808000008000ull, /* 72 */
1296 0x0080808000008080ull, /* 73 */
1297 0x0080808000800000ull, /* 74 */
1298 0x0080808000800080ull, /* 75 */
1299 0x0080808000808000ull, /* 76 */
1300 0x0080808000808080ull, /* 77 */
1301 0x0080808080000000ull, /* 78 */
1302 0x0080808080000080ull, /* 79 */
1303 0x0080808080008000ull, /* 7A */
1304 0x0080808080008080ull, /* 7B */
1305 0x0080808080800000ull, /* 7C */
1306 0x0080808080800080ull, /* 7D */
1307 0x0080808080808000ull, /* 7E */
1308 0x0080808080808080ull, /* 7F */
1309 0x8000000000000000ull, /* 80 */
1310 0x8000000000000080ull, /* 81 */
1311 0x8000000000008000ull, /* 82 */
1312 0x8000000000008080ull, /* 83 */
1313 0x8000000000800000ull, /* 84 */
1314 0x8000000000800080ull, /* 85 */
1315 0x8000000000808000ull, /* 86 */
1316 0x8000000000808080ull, /* 87 */
1317 0x8000000080000000ull, /* 88 */
1318 0x8000000080000080ull, /* 89 */
1319 0x8000000080008000ull, /* 8A */
1320 0x8000000080008080ull, /* 8B */
1321 0x8000000080800000ull, /* 8C */
1322 0x8000000080800080ull, /* 8D */
1323 0x8000000080808000ull, /* 8E */
1324 0x8000000080808080ull, /* 8F */
1325 0x8000008000000000ull, /* 90 */
1326 0x8000008000000080ull, /* 91 */
1327 0x8000008000008000ull, /* 92 */
1328 0x8000008000008080ull, /* 93 */
1329 0x8000008000800000ull, /* 94 */
1330 0x8000008000800080ull, /* 95 */
1331 0x8000008000808000ull, /* 96 */
1332 0x8000008000808080ull, /* 97 */
1333 0x8000008080000000ull, /* 98 */
1334 0x8000008080000080ull, /* 99 */
1335 0x8000008080008000ull, /* 9A */
1336 0x8000008080008080ull, /* 9B */
1337 0x8000008080800000ull, /* 9C */
1338 0x8000008080800080ull, /* 9D */
1339 0x8000008080808000ull, /* 9E */
1340 0x8000008080808080ull, /* 9F */
1341 0x8000800000000000ull, /* A0 */
1342 0x8000800000000080ull, /* A1 */
1343 0x8000800000008000ull, /* A2 */
1344 0x8000800000008080ull, /* A3 */
1345 0x8000800000800000ull, /* A4 */
1346 0x8000800000800080ull, /* A5 */
1347 0x8000800000808000ull, /* A6 */
1348 0x8000800000808080ull, /* A7 */
1349 0x8000800080000000ull, /* A8 */
1350 0x8000800080000080ull, /* A9 */
1351 0x8000800080008000ull, /* AA */
1352 0x8000800080008080ull, /* AB */
1353 0x8000800080800000ull, /* AC */
1354 0x8000800080800080ull, /* AD */
1355 0x8000800080808000ull, /* AE */
1356 0x8000800080808080ull, /* AF */
1357 0x8000808000000000ull, /* B0 */
1358 0x8000808000000080ull, /* B1 */
1359 0x8000808000008000ull, /* B2 */
1360 0x8000808000008080ull, /* B3 */
1361 0x8000808000800000ull, /* B4 */
1362 0x8000808000800080ull, /* B5 */
1363 0x8000808000808000ull, /* B6 */
1364 0x8000808000808080ull, /* B7 */
1365 0x8000808080000000ull, /* B8 */
1366 0x8000808080000080ull, /* B9 */
1367 0x8000808080008000ull, /* BA */
1368 0x8000808080008080ull, /* BB */
1369 0x8000808080800000ull, /* BC */
1370 0x8000808080800080ull, /* BD */
1371 0x8000808080808000ull, /* BE */
1372 0x8000808080808080ull, /* BF */
1373 0x8080000000000000ull, /* C0 */
1374 0x8080000000000080ull, /* C1 */
1375 0x8080000000008000ull, /* C2 */
1376 0x8080000000008080ull, /* C3 */
1377 0x8080000000800000ull, /* C4 */
1378 0x8080000000800080ull, /* C5 */
1379 0x8080000000808000ull, /* C6 */
1380 0x8080000000808080ull, /* C7 */
1381 0x8080000080000000ull, /* C8 */
1382 0x8080000080000080ull, /* C9 */
1383 0x8080000080008000ull, /* CA */
1384 0x8080000080008080ull, /* CB */
1385 0x8080000080800000ull, /* CC */
1386 0x8080000080800080ull, /* CD */
1387 0x8080000080808000ull, /* CE */
1388 0x8080000080808080ull, /* CF */
1389 0x8080008000000000ull, /* D0 */
1390 0x8080008000000080ull, /* D1 */
1391 0x8080008000008000ull, /* D2 */
1392 0x8080008000008080ull, /* D3 */
1393 0x8080008000800000ull, /* D4 */
1394 0x8080008000800080ull, /* D5 */
1395 0x8080008000808000ull, /* D6 */
1396 0x8080008000808080ull, /* D7 */
1397 0x8080008080000000ull, /* D8 */
1398 0x8080008080000080ull, /* D9 */
1399 0x8080008080008000ull, /* DA */
1400 0x8080008080008080ull, /* DB */
1401 0x8080008080800000ull, /* DC */
1402 0x8080008080800080ull, /* DD */
1403 0x8080008080808000ull, /* DE */
1404 0x8080008080808080ull, /* DF */
1405 0x8080800000000000ull, /* E0 */
1406 0x8080800000000080ull, /* E1 */
1407 0x8080800000008000ull, /* E2 */
1408 0x8080800000008080ull, /* E3 */
1409 0x8080800000800000ull, /* E4 */
1410 0x8080800000800080ull, /* E5 */
1411 0x8080800000808000ull, /* E6 */
1412 0x8080800000808080ull, /* E7 */
1413 0x8080800080000000ull, /* E8 */
1414 0x8080800080000080ull, /* E9 */
1415 0x8080800080008000ull, /* EA */
1416 0x8080800080008080ull, /* EB */
1417 0x8080800080800000ull, /* EC */
1418 0x8080800080800080ull, /* ED */
1419 0x8080800080808000ull, /* EE */
1420 0x8080800080808080ull, /* EF */
1421 0x8080808000000000ull, /* F0 */
1422 0x8080808000000080ull, /* F1 */
1423 0x8080808000008000ull, /* F2 */
1424 0x8080808000008080ull, /* F3 */
1425 0x8080808000800000ull, /* F4 */
1426 0x8080808000800080ull, /* F5 */
1427 0x8080808000808000ull, /* F6 */
1428 0x8080808000808080ull, /* F7 */
1429 0x8080808080000000ull, /* F8 */
1430 0x8080808080000080ull, /* F9 */
1431 0x8080808080008000ull, /* FA */
1432 0x8080808080008080ull, /* FB */
1433 0x8080808080800000ull, /* FC */
1434 0x8080808080800080ull, /* FD */
1435 0x8080808080808000ull, /* FE */
1436 0x8080808080808080ull, /* FF */
1439 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1441 int i;
1442 uint64_t t[2] = { 0, 0 };
1444 VECTOR_FOR_INORDER_I(i, u8) {
1445 #if defined(HOST_WORDS_BIGENDIAN)
1446 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1447 #else
1448 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1449 #endif
1452 r->u64[0] = t[0];
1453 r->u64[1] = t[1];
1456 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1457 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1459 int i, j; \
1460 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1462 VECTOR_FOR_INORDER_I(i, srcfld) { \
1463 prod[i] = 0; \
1464 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1465 if (a->srcfld[i] & (1ull<<j)) { \
1466 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1471 VECTOR_FOR_INORDER_I(i, trgfld) { \
1472 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1476 PMSUM(vpmsumb, u8, u16, uint16_t)
1477 PMSUM(vpmsumh, u16, u32, uint32_t)
1478 PMSUM(vpmsumw, u32, u64, uint64_t)
1480 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1483 #ifdef CONFIG_INT128
1484 int i, j;
1485 __uint128_t prod[2];
1487 VECTOR_FOR_INORDER_I(i, u64) {
1488 prod[i] = 0;
1489 for (j = 0; j < 64; j++) {
1490 if (a->u64[i] & (1ull<<j)) {
1491 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1496 r->u128 = prod[0] ^ prod[1];
1498 #else
1499 int i, j;
1500 ppc_avr_t prod[2];
1502 VECTOR_FOR_INORDER_I(i, u64) {
1503 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1504 for (j = 0; j < 64; j++) {
1505 if (a->u64[i] & (1ull<<j)) {
1506 ppc_avr_t bshift;
1507 if (j == 0) {
1508 bshift.u64[HI_IDX] = 0;
1509 bshift.u64[LO_IDX] = b->u64[i];
1510 } else {
1511 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1512 bshift.u64[LO_IDX] = b->u64[i] << j;
1514 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1515 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1520 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1521 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1522 #endif
1526 #if defined(HOST_WORDS_BIGENDIAN)
1527 #define PKBIG 1
1528 #else
1529 #define PKBIG 0
1530 #endif
1531 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1533 int i, j;
1534 ppc_avr_t result;
1535 #if defined(HOST_WORDS_BIGENDIAN)
1536 const ppc_avr_t *x[2] = { a, b };
1537 #else
1538 const ppc_avr_t *x[2] = { b, a };
1539 #endif
1541 VECTOR_FOR_INORDER_I(i, u64) {
1542 VECTOR_FOR_INORDER_I(j, u32) {
1543 uint32_t e = x[i]->u32[j];
1545 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1546 ((e >> 6) & 0x3e0) |
1547 ((e >> 3) & 0x1f));
1550 *r = result;
1553 #define VPK(suffix, from, to, cvt, dosat) \
1554 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1555 ppc_avr_t *a, ppc_avr_t *b) \
1557 int i; \
1558 int sat = 0; \
1559 ppc_avr_t result; \
1560 ppc_avr_t *a0 = PKBIG ? a : b; \
1561 ppc_avr_t *a1 = PKBIG ? b : a; \
1563 VECTOR_FOR_INORDER_I(i, from) { \
1564 result.to[i] = cvt(a0->from[i], &sat); \
1565 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1567 *r = result; \
1568 if (dosat && sat) { \
1569 env->vscr |= (1 << VSCR_SAT); \
1572 #define I(x, y) (x)
1573 VPK(shss, s16, s8, cvtshsb, 1)
1574 VPK(shus, s16, u8, cvtshub, 1)
1575 VPK(swss, s32, s16, cvtswsh, 1)
1576 VPK(swus, s32, u16, cvtswuh, 1)
1577 VPK(sdss, s64, s32, cvtsdsw, 1)
1578 VPK(sdus, s64, u32, cvtsduw, 1)
1579 VPK(uhus, u16, u8, cvtuhub, 1)
1580 VPK(uwus, u32, u16, cvtuwuh, 1)
1581 VPK(udus, u64, u32, cvtuduw, 1)
1582 VPK(uhum, u16, u8, I, 0)
1583 VPK(uwum, u32, u16, I, 0)
1584 VPK(udum, u64, u32, I, 0)
1585 #undef I
1586 #undef VPK
1587 #undef PKBIG
1589 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1591 int i;
1593 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1594 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1598 #define VRFI(suffix, rounding) \
1599 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1600 ppc_avr_t *b) \
1602 int i; \
1603 float_status s = env->vec_status; \
1605 set_float_rounding_mode(rounding, &s); \
1606 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1607 r->f[i] = float32_round_to_int (b->f[i], &s); \
1610 VRFI(n, float_round_nearest_even)
1611 VRFI(m, float_round_down)
1612 VRFI(p, float_round_up)
1613 VRFI(z, float_round_to_zero)
1614 #undef VRFI
1616 #define VROTATE(suffix, element, mask) \
1617 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1619 int i; \
1621 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1622 unsigned int shift = b->element[i] & mask; \
1623 r->element[i] = (a->element[i] << shift) | \
1624 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1627 VROTATE(b, u8, 0x7)
1628 VROTATE(h, u16, 0xF)
1629 VROTATE(w, u32, 0x1F)
1630 VROTATE(d, u64, 0x3F)
1631 #undef VROTATE
1633 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1635 int i;
1637 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1638 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1640 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1644 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1645 ppc_avr_t *c)
1647 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1648 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1651 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1653 int i;
1655 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1656 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1660 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1662 int i;
1664 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1665 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1669 /* The specification says that the results are undefined if all of the
1670 * shift counts are not identical. We check to make sure that they are
1671 * to conform to what real hardware appears to do. */
1672 #define VSHIFT(suffix, leftp) \
1673 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1675 int shift = b->u8[LO_IDX*15] & 0x7; \
1676 int doit = 1; \
1677 int i; \
1679 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1680 doit = doit && ((b->u8[i] & 0x7) == shift); \
1682 if (doit) { \
1683 if (shift == 0) { \
1684 *r = *a; \
1685 } else if (leftp) { \
1686 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1688 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1689 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1690 } else { \
1691 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1693 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1694 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1698 VSHIFT(l, 1)
1699 VSHIFT(r, 0)
1700 #undef VSHIFT
1702 #define VSL(suffix, element, mask) \
1703 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1705 int i; \
1707 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1708 unsigned int shift = b->element[i] & mask; \
1710 r->element[i] = a->element[i] << shift; \
1713 VSL(b, u8, 0x7)
1714 VSL(h, u16, 0x0F)
1715 VSL(w, u32, 0x1F)
1716 VSL(d, u64, 0x3F)
1717 #undef VSL
1719 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1721 int i;
1722 unsigned int shift, bytes, size;
1724 size = ARRAY_SIZE(r->u8);
1725 for (i = 0; i < size; i++) {
1726 shift = b->u8[i] & 0x7; /* extract shift value */
1727 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1728 (((i + 1) < size) ? a->u8[i + 1] : 0);
1729 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1733 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1735 int i;
1736 unsigned int shift, bytes;
1738 /* Use reverse order, as destination and source register can be same. Its
1739 * being modified in place saving temporary, reverse order will guarantee
1740 * that computed result is not fed back.
1742 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1743 shift = b->u8[i] & 0x7; /* extract shift value */
1744 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1745 /* extract adjacent bytes */
1746 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1750 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1752 int sh = shift & 0xf;
1753 int i;
1754 ppc_avr_t result;
1756 #if defined(HOST_WORDS_BIGENDIAN)
1757 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1758 int index = sh + i;
1759 if (index > 0xf) {
1760 result.u8[i] = b->u8[index - 0x10];
1761 } else {
1762 result.u8[i] = a->u8[index];
1765 #else
1766 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1767 int index = (16 - sh) + i;
1768 if (index > 0xf) {
1769 result.u8[i] = a->u8[index - 0x10];
1770 } else {
1771 result.u8[i] = b->u8[index];
1774 #endif
1775 *r = result;
1778 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1780 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1782 #if defined(HOST_WORDS_BIGENDIAN)
1783 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1784 memset(&r->u8[16-sh], 0, sh);
1785 #else
1786 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1787 memset(&r->u8[0], 0, sh);
1788 #endif
1791 /* Experimental testing shows that hardware masks the immediate. */
1792 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1793 #if defined(HOST_WORDS_BIGENDIAN)
1794 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1795 #else
1796 #define SPLAT_ELEMENT(element) \
1797 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1798 #endif
1799 #define VSPLT(suffix, element) \
1800 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1802 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1803 int i; \
1805 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1806 r->element[i] = s; \
1809 VSPLT(b, u8)
1810 VSPLT(h, u16)
1811 VSPLT(w, u32)
1812 #undef VSPLT
1813 #undef SPLAT_ELEMENT
1814 #undef _SPLAT_MASKED
1815 #if defined(HOST_WORDS_BIGENDIAN)
1816 #define VINSERT(suffix, element) \
1817 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1819 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1820 sizeof(r->element[0])); \
1822 #else
1823 #define VINSERT(suffix, element) \
1824 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1826 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1827 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1829 #endif
1830 VINSERT(b, u8)
1831 VINSERT(h, u16)
1832 VINSERT(w, u32)
1833 VINSERT(d, u64)
1834 #undef VINSERT
1835 #if defined(HOST_WORDS_BIGENDIAN)
1836 #define VEXTRACT(suffix, element) \
1837 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1839 uint32_t es = sizeof(r->element[0]); \
1840 memmove(&r->u8[8 - es], &b->u8[index], es); \
1841 memset(&r->u8[8], 0, 8); \
1842 memset(&r->u8[0], 0, 8 - es); \
1844 #else
1845 #define VEXTRACT(suffix, element) \
1846 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1848 uint32_t es = sizeof(r->element[0]); \
1849 uint32_t s = (16 - index) - es; \
1850 memmove(&r->u8[8], &b->u8[s], es); \
1851 memset(&r->u8[0], 0, 8); \
1852 memset(&r->u8[8 + es], 0, 8 - es); \
1854 #endif
1855 VEXTRACT(ub, u8)
1856 VEXTRACT(uh, u16)
1857 VEXTRACT(uw, u32)
1858 VEXTRACT(d, u64)
1859 #undef VEXTRACT
1861 #define VSPLTI(suffix, element, splat_type) \
1862 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1864 splat_type x = (int8_t)(splat << 3) >> 3; \
1865 int i; \
1867 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1868 r->element[i] = x; \
1871 VSPLTI(b, s8, int8_t)
1872 VSPLTI(h, s16, int16_t)
1873 VSPLTI(w, s32, int32_t)
1874 #undef VSPLTI
1876 #define VSR(suffix, element, mask) \
1877 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1879 int i; \
1881 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1882 unsigned int shift = b->element[i] & mask; \
1883 r->element[i] = a->element[i] >> shift; \
1886 VSR(ab, s8, 0x7)
1887 VSR(ah, s16, 0xF)
1888 VSR(aw, s32, 0x1F)
1889 VSR(ad, s64, 0x3F)
1890 VSR(b, u8, 0x7)
1891 VSR(h, u16, 0xF)
1892 VSR(w, u32, 0x1F)
1893 VSR(d, u64, 0x3F)
1894 #undef VSR
1896 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1898 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1900 #if defined(HOST_WORDS_BIGENDIAN)
1901 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1902 memset(&r->u8[0], 0, sh);
1903 #else
1904 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1905 memset(&r->u8[16 - sh], 0, sh);
1906 #endif
1909 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1911 int i;
1913 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1914 r->u32[i] = a->u32[i] >= b->u32[i];
1918 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1920 int64_t t;
1921 int i, upper;
1922 ppc_avr_t result;
1923 int sat = 0;
1925 #if defined(HOST_WORDS_BIGENDIAN)
1926 upper = ARRAY_SIZE(r->s32)-1;
1927 #else
1928 upper = 0;
1929 #endif
1930 t = (int64_t)b->s32[upper];
1931 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1932 t += a->s32[i];
1933 result.s32[i] = 0;
1935 result.s32[upper] = cvtsdsw(t, &sat);
1936 *r = result;
1938 if (sat) {
1939 env->vscr |= (1 << VSCR_SAT);
1943 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1945 int i, j, upper;
1946 ppc_avr_t result;
1947 int sat = 0;
1949 #if defined(HOST_WORDS_BIGENDIAN)
1950 upper = 1;
1951 #else
1952 upper = 0;
1953 #endif
1954 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1955 int64_t t = (int64_t)b->s32[upper + i * 2];
1957 result.u64[i] = 0;
1958 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1959 t += a->s32[2 * i + j];
1961 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1964 *r = result;
1965 if (sat) {
1966 env->vscr |= (1 << VSCR_SAT);
1970 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1972 int i, j;
1973 int sat = 0;
1975 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1976 int64_t t = (int64_t)b->s32[i];
1978 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1979 t += a->s8[4 * i + j];
1981 r->s32[i] = cvtsdsw(t, &sat);
1984 if (sat) {
1985 env->vscr |= (1 << VSCR_SAT);
1989 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1991 int sat = 0;
1992 int i;
1994 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1995 int64_t t = (int64_t)b->s32[i];
1997 t += a->s16[2 * i] + a->s16[2 * i + 1];
1998 r->s32[i] = cvtsdsw(t, &sat);
2001 if (sat) {
2002 env->vscr |= (1 << VSCR_SAT);
2006 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2008 int i, j;
2009 int sat = 0;
2011 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2012 uint64_t t = (uint64_t)b->u32[i];
2014 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2015 t += a->u8[4 * i + j];
2017 r->u32[i] = cvtuduw(t, &sat);
2020 if (sat) {
2021 env->vscr |= (1 << VSCR_SAT);
2025 #if defined(HOST_WORDS_BIGENDIAN)
2026 #define UPKHI 1
2027 #define UPKLO 0
2028 #else
2029 #define UPKHI 0
2030 #define UPKLO 1
2031 #endif
2032 #define VUPKPX(suffix, hi) \
2033 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2035 int i; \
2036 ppc_avr_t result; \
2038 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2039 uint16_t e = b->u16[hi ? i : i+4]; \
2040 uint8_t a = (e >> 15) ? 0xff : 0; \
2041 uint8_t r = (e >> 10) & 0x1f; \
2042 uint8_t g = (e >> 5) & 0x1f; \
2043 uint8_t b = e & 0x1f; \
2045 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2047 *r = result; \
2049 VUPKPX(lpx, UPKLO)
2050 VUPKPX(hpx, UPKHI)
2051 #undef VUPKPX
2053 #define VUPK(suffix, unpacked, packee, hi) \
2054 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2056 int i; \
2057 ppc_avr_t result; \
2059 if (hi) { \
2060 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2061 result.unpacked[i] = b->packee[i]; \
2063 } else { \
2064 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2065 i++) { \
2066 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2069 *r = result; \
2071 VUPK(hsb, s16, s8, UPKHI)
2072 VUPK(hsh, s32, s16, UPKHI)
2073 VUPK(hsw, s64, s32, UPKHI)
2074 VUPK(lsb, s16, s8, UPKLO)
2075 VUPK(lsh, s32, s16, UPKLO)
2076 VUPK(lsw, s64, s32, UPKLO)
2077 #undef VUPK
2078 #undef UPKHI
2079 #undef UPKLO
2081 #define VGENERIC_DO(name, element) \
2082 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2084 int i; \
2086 VECTOR_FOR_INORDER_I(i, element) { \
2087 r->element[i] = name(b->element[i]); \
2091 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2092 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2093 #define clzw(v) clz32((v))
2094 #define clzd(v) clz64((v))
2096 VGENERIC_DO(clzb, u8)
2097 VGENERIC_DO(clzh, u16)
2098 VGENERIC_DO(clzw, u32)
2099 VGENERIC_DO(clzd, u64)
2101 #undef clzb
2102 #undef clzh
2103 #undef clzw
2104 #undef clzd
2106 #define ctzb(v) ((v) ? ctz32(v) : 8)
2107 #define ctzh(v) ((v) ? ctz32(v) : 16)
2108 #define ctzw(v) ctz32((v))
2109 #define ctzd(v) ctz64((v))
2111 VGENERIC_DO(ctzb, u8)
2112 VGENERIC_DO(ctzh, u16)
2113 VGENERIC_DO(ctzw, u32)
2114 VGENERIC_DO(ctzd, u64)
2116 #undef ctzb
2117 #undef ctzh
2118 #undef ctzw
2119 #undef ctzd
2121 #define popcntb(v) ctpop8(v)
2122 #define popcnth(v) ctpop16(v)
2123 #define popcntw(v) ctpop32(v)
2124 #define popcntd(v) ctpop64(v)
2126 VGENERIC_DO(popcntb, u8)
2127 VGENERIC_DO(popcnth, u16)
2128 VGENERIC_DO(popcntw, u32)
2129 VGENERIC_DO(popcntd, u64)
2131 #undef popcntb
2132 #undef popcnth
2133 #undef popcntw
2134 #undef popcntd
2136 #undef VGENERIC_DO
2138 #if defined(HOST_WORDS_BIGENDIAN)
2139 #define QW_ONE { .u64 = { 0, 1 } }
2140 #else
2141 #define QW_ONE { .u64 = { 1, 0 } }
2142 #endif
2144 #ifndef CONFIG_INT128
2146 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2148 t->u64[0] = ~a.u64[0];
2149 t->u64[1] = ~a.u64[1];
2152 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2154 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2155 return -1;
2156 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2157 return 1;
2158 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2159 return -1;
2160 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2161 return 1;
2162 } else {
2163 return 0;
2167 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2169 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2170 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2171 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2174 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2176 ppc_avr_t not_a;
2177 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2178 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2179 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2180 avr_qw_not(&not_a, a);
2181 return avr_qw_cmpu(not_a, b) < 0;
2184 #endif
2186 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2188 #ifdef CONFIG_INT128
2189 r->u128 = a->u128 + b->u128;
2190 #else
2191 avr_qw_add(r, *a, *b);
2192 #endif
2195 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2197 #ifdef CONFIG_INT128
2198 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2199 #else
2201 if (c->u64[LO_IDX] & 1) {
2202 ppc_avr_t tmp;
2204 tmp.u64[HI_IDX] = 0;
2205 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2206 avr_qw_add(&tmp, *a, tmp);
2207 avr_qw_add(r, tmp, *b);
2208 } else {
2209 avr_qw_add(r, *a, *b);
2211 #endif
2214 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2216 #ifdef CONFIG_INT128
2217 r->u128 = (~a->u128 < b->u128);
2218 #else
2219 ppc_avr_t not_a;
2221 avr_qw_not(&not_a, *a);
2223 r->u64[HI_IDX] = 0;
2224 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2225 #endif
2228 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2230 #ifdef CONFIG_INT128
2231 int carry_out = (~a->u128 < b->u128);
2232 if (!carry_out && (c->u128 & 1)) {
2233 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2234 ((a->u128 != 0) || (b->u128 != 0));
2236 r->u128 = carry_out;
2237 #else
2239 int carry_in = c->u64[LO_IDX] & 1;
2240 int carry_out = 0;
2241 ppc_avr_t tmp;
2243 carry_out = avr_qw_addc(&tmp, *a, *b);
2245 if (!carry_out && carry_in) {
2246 ppc_avr_t one = QW_ONE;
2247 carry_out = avr_qw_addc(&tmp, tmp, one);
2249 r->u64[HI_IDX] = 0;
2250 r->u64[LO_IDX] = carry_out;
2251 #endif
2254 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2256 #ifdef CONFIG_INT128
2257 r->u128 = a->u128 - b->u128;
2258 #else
2259 ppc_avr_t tmp;
2260 ppc_avr_t one = QW_ONE;
2262 avr_qw_not(&tmp, *b);
2263 avr_qw_add(&tmp, *a, tmp);
2264 avr_qw_add(r, tmp, one);
2265 #endif
2268 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2270 #ifdef CONFIG_INT128
2271 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2272 #else
2273 ppc_avr_t tmp, sum;
2275 avr_qw_not(&tmp, *b);
2276 avr_qw_add(&sum, *a, tmp);
2278 tmp.u64[HI_IDX] = 0;
2279 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2280 avr_qw_add(r, sum, tmp);
2281 #endif
2284 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2286 #ifdef CONFIG_INT128
2287 r->u128 = (~a->u128 < ~b->u128) ||
2288 (a->u128 + ~b->u128 == (__uint128_t)-1);
2289 #else
2290 int carry = (avr_qw_cmpu(*a, *b) > 0);
2291 if (!carry) {
2292 ppc_avr_t tmp;
2293 avr_qw_not(&tmp, *b);
2294 avr_qw_add(&tmp, *a, tmp);
2295 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2297 r->u64[HI_IDX] = 0;
2298 r->u64[LO_IDX] = carry;
2299 #endif
2302 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2304 #ifdef CONFIG_INT128
2305 r->u128 =
2306 (~a->u128 < ~b->u128) ||
2307 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2308 #else
2309 int carry_in = c->u64[LO_IDX] & 1;
2310 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2311 if (!carry_out && carry_in) {
2312 ppc_avr_t tmp;
2313 avr_qw_not(&tmp, *b);
2314 avr_qw_add(&tmp, *a, tmp);
2315 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2318 r->u64[HI_IDX] = 0;
2319 r->u64[LO_IDX] = carry_out;
2320 #endif
2323 #define BCD_PLUS_PREF_1 0xC
2324 #define BCD_PLUS_PREF_2 0xF
2325 #define BCD_PLUS_ALT_1 0xA
2326 #define BCD_NEG_PREF 0xD
2327 #define BCD_NEG_ALT 0xB
2328 #define BCD_PLUS_ALT_2 0xE
2330 #if defined(HOST_WORDS_BIGENDIAN)
2331 #define BCD_DIG_BYTE(n) (15 - (n/2))
2332 #else
2333 #define BCD_DIG_BYTE(n) (n/2)
2334 #endif
2336 static int bcd_get_sgn(ppc_avr_t *bcd)
2338 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2339 case BCD_PLUS_PREF_1:
2340 case BCD_PLUS_PREF_2:
2341 case BCD_PLUS_ALT_1:
2342 case BCD_PLUS_ALT_2:
2344 return 1;
2347 case BCD_NEG_PREF:
2348 case BCD_NEG_ALT:
2350 return -1;
2353 default:
2355 return 0;
2360 static int bcd_preferred_sgn(int sgn, int ps)
2362 if (sgn >= 0) {
2363 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2364 } else {
2365 return BCD_NEG_PREF;
2369 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2371 uint8_t result;
2372 if (n & 1) {
2373 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2374 } else {
2375 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2378 if (unlikely(result > 9)) {
2379 *invalid = true;
2381 return result;
2384 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2386 if (n & 1) {
2387 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2388 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2389 } else {
2390 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2391 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2395 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2397 int i;
2398 int invalid = 0;
2399 for (i = 31; i > 0; i--) {
2400 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2401 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2402 if (unlikely(invalid)) {
2403 return 0; /* doesn't matter */
2404 } else if (dig_a > dig_b) {
2405 return 1;
2406 } else if (dig_a < dig_b) {
2407 return -1;
2411 return 0;
2414 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2415 int *overflow)
2417 int carry = 0;
2418 int i;
2419 int is_zero = 1;
2420 for (i = 1; i <= 31; i++) {
2421 uint8_t digit = bcd_get_digit(a, i, invalid) +
2422 bcd_get_digit(b, i, invalid) + carry;
2423 is_zero &= (digit == 0);
2424 if (digit > 9) {
2425 carry = 1;
2426 digit -= 10;
2427 } else {
2428 carry = 0;
2431 bcd_put_digit(t, digit, i);
2433 if (unlikely(*invalid)) {
2434 return -1;
2438 *overflow = carry;
2439 return is_zero;
2442 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2443 int *overflow)
2445 int carry = 0;
2446 int i;
2447 int is_zero = 1;
2448 for (i = 1; i <= 31; i++) {
2449 uint8_t digit = bcd_get_digit(a, i, invalid) -
2450 bcd_get_digit(b, i, invalid) + carry;
2451 is_zero &= (digit == 0);
2452 if (digit & 0x80) {
2453 carry = -1;
2454 digit += 10;
2455 } else {
2456 carry = 0;
2459 bcd_put_digit(t, digit, i);
2461 if (unlikely(*invalid)) {
2462 return -1;
2466 *overflow = carry;
2467 return is_zero;
2470 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2473 int sgna = bcd_get_sgn(a);
2474 int sgnb = bcd_get_sgn(b);
2475 int invalid = (sgna == 0) || (sgnb == 0);
2476 int overflow = 0;
2477 int zero = 0;
2478 uint32_t cr = 0;
2479 ppc_avr_t result = { .u64 = { 0, 0 } };
2481 if (!invalid) {
2482 if (sgna == sgnb) {
2483 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2484 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2485 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2486 } else if (bcd_cmp_mag(a, b) > 0) {
2487 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2488 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2489 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2490 } else {
2491 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2492 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2493 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2497 if (unlikely(invalid)) {
2498 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2499 cr = 1 << CRF_SO;
2500 } else if (overflow) {
2501 cr |= 1 << CRF_SO;
2502 } else if (zero) {
2503 cr = 1 << CRF_EQ;
2506 *r = result;
2508 return cr;
2511 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2513 ppc_avr_t bcopy = *b;
2514 int sgnb = bcd_get_sgn(b);
2515 if (sgnb < 0) {
2516 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2517 } else if (sgnb > 0) {
2518 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2520 /* else invalid ... defer to bcdadd code for proper handling */
2522 return helper_bcdadd(r, a, &bcopy, ps);
2525 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2527 int i;
2528 VECTOR_FOR_INORDER_I(i, u8) {
2529 r->u8[i] = AES_sbox[a->u8[i]];
2533 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2535 ppc_avr_t result;
2536 int i;
2538 VECTOR_FOR_INORDER_I(i, u32) {
2539 result.AVRW(i) = b->AVRW(i) ^
2540 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2541 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2542 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2543 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2545 *r = result;
2548 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2550 ppc_avr_t result;
2551 int i;
2553 VECTOR_FOR_INORDER_I(i, u8) {
2554 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2556 *r = result;
2559 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2561 /* This differs from what is written in ISA V2.07. The RTL is */
2562 /* incorrect and will be fixed in V2.07B. */
2563 int i;
2564 ppc_avr_t tmp;
2566 VECTOR_FOR_INORDER_I(i, u8) {
2567 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2570 VECTOR_FOR_INORDER_I(i, u32) {
2571 r->AVRW(i) =
2572 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2573 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2574 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2575 AES_imc[tmp.AVRB(4*i + 3)][3];
2579 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2581 ppc_avr_t result;
2582 int i;
2584 VECTOR_FOR_INORDER_I(i, u8) {
2585 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2587 *r = result;
2590 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2591 #if defined(HOST_WORDS_BIGENDIAN)
2592 #define EL_IDX(i) (i)
2593 #else
2594 #define EL_IDX(i) (3 - (i))
2595 #endif
2597 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2599 int st = (st_six & 0x10) != 0;
2600 int six = st_six & 0xF;
2601 int i;
2603 VECTOR_FOR_INORDER_I(i, u32) {
2604 if (st == 0) {
2605 if ((six & (0x8 >> i)) == 0) {
2606 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2607 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2608 (a->u32[EL_IDX(i)] >> 3);
2609 } else { /* six.bit[i] == 1 */
2610 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2611 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2612 (a->u32[EL_IDX(i)] >> 10);
2614 } else { /* st == 1 */
2615 if ((six & (0x8 >> i)) == 0) {
2616 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2617 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2618 ROTRu32(a->u32[EL_IDX(i)], 22);
2619 } else { /* six.bit[i] == 1 */
2620 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2621 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2622 ROTRu32(a->u32[EL_IDX(i)], 25);
2628 #undef ROTRu32
2629 #undef EL_IDX
2631 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2632 #if defined(HOST_WORDS_BIGENDIAN)
2633 #define EL_IDX(i) (i)
2634 #else
2635 #define EL_IDX(i) (1 - (i))
2636 #endif
2638 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2640 int st = (st_six & 0x10) != 0;
2641 int six = st_six & 0xF;
2642 int i;
2644 VECTOR_FOR_INORDER_I(i, u64) {
2645 if (st == 0) {
2646 if ((six & (0x8 >> (2*i))) == 0) {
2647 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2648 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2649 (a->u64[EL_IDX(i)] >> 7);
2650 } else { /* six.bit[2*i] == 1 */
2651 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2652 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2653 (a->u64[EL_IDX(i)] >> 6);
2655 } else { /* st == 1 */
2656 if ((six & (0x8 >> (2*i))) == 0) {
2657 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2658 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2659 ROTRu64(a->u64[EL_IDX(i)], 39);
2660 } else { /* six.bit[2*i] == 1 */
2661 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2662 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2663 ROTRu64(a->u64[EL_IDX(i)], 41);
2669 #undef ROTRu64
2670 #undef EL_IDX
2672 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2674 ppc_avr_t result;
2675 int i;
2677 VECTOR_FOR_INORDER_I(i, u8) {
2678 int indexA = c->u8[i] >> 4;
2679 int indexB = c->u8[i] & 0xF;
2680 #if defined(HOST_WORDS_BIGENDIAN)
2681 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2682 #else
2683 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2684 #endif
2686 *r = result;
2689 #undef VECTOR_FOR_INORDER_I
2690 #undef HI_IDX
2691 #undef LO_IDX
2693 /*****************************************************************************/
2694 /* SPE extension helpers */
2695 /* Use a table to make this quicker */
2696 static const uint8_t hbrev[16] = {
2697 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2698 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2701 static inline uint8_t byte_reverse(uint8_t val)
2703 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2706 static inline uint32_t word_reverse(uint32_t val)
2708 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2709 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2712 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2713 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2715 uint32_t a, b, d, mask;
2717 mask = UINT32_MAX >> (32 - MASKBITS);
2718 a = arg1 & mask;
2719 b = arg2 & mask;
2720 d = word_reverse(1 + word_reverse(a | ~b));
2721 return (arg1 & ~mask) | (d & b);
2724 uint32_t helper_cntlsw32(uint32_t val)
2726 if (val & 0x80000000) {
2727 return clz32(~val);
2728 } else {
2729 return clz32(val);
2733 uint32_t helper_cntlzw32(uint32_t val)
2735 return clz32(val);
2738 /* 440 specific */
2739 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2740 target_ulong low, uint32_t update_Rc)
2742 target_ulong mask;
2743 int i;
2745 i = 1;
2746 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2747 if ((high & mask) == 0) {
2748 if (update_Rc) {
2749 env->crf[0] = 0x4;
2751 goto done;
2753 i++;
2755 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2756 if ((low & mask) == 0) {
2757 if (update_Rc) {
2758 env->crf[0] = 0x8;
2760 goto done;
2762 i++;
2764 i = 8;
2765 if (update_Rc) {
2766 env->crf[0] = 0x2;
2768 done:
2769 env->xer = (env->xer & ~0x7F) | i;
2770 if (update_Rc) {
2771 env->crf[0] |= xer_so;
2773 return i;