hw/block: Introduce share-rw qdev property
[qemu/ar7.git] / target / ppc / int_helper.c
blobdd0a8929b3bbdb49cb2eb82d053fb2121b74a397
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "cpu.h"
21 #include "internal.h"
22 #include "exec/exec-all.h"
23 #include "qemu/host-utils.h"
24 #include "exec/helper-proto.h"
25 #include "crypto/aes.h"
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
31 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
32 uint32_t oe)
34 uint64_t rt = 0;
35 int overflow = 0;
37 uint64_t dividend = (uint64_t)ra << 32;
38 uint64_t divisor = (uint32_t)rb;
40 if (unlikely(divisor == 0)) {
41 overflow = 1;
42 } else {
43 rt = dividend / divisor;
44 overflow = rt > UINT32_MAX;
47 if (unlikely(overflow)) {
48 rt = 0; /* Undefined */
51 if (oe) {
52 if (unlikely(overflow)) {
53 env->so = env->ov = 1;
54 } else {
55 env->ov = 0;
59 return (target_ulong)rt;
62 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
63 uint32_t oe)
65 int64_t rt = 0;
66 int overflow = 0;
68 int64_t dividend = (int64_t)ra << 32;
69 int64_t divisor = (int64_t)((int32_t)rb);
71 if (unlikely((divisor == 0) ||
72 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
73 overflow = 1;
74 } else {
75 rt = dividend / divisor;
76 overflow = rt != (int32_t)rt;
79 if (unlikely(overflow)) {
80 rt = 0; /* Undefined */
83 if (oe) {
84 if (unlikely(overflow)) {
85 env->so = env->ov = 1;
86 } else {
87 env->ov = 0;
91 return (target_ulong)rt;
94 #if defined(TARGET_PPC64)
96 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
98 uint64_t rt = 0;
99 int overflow = 0;
101 overflow = divu128(&rt, &ra, rb);
103 if (unlikely(overflow)) {
104 rt = 0; /* Undefined */
107 if (oe) {
108 if (unlikely(overflow)) {
109 env->so = env->ov = 1;
110 } else {
111 env->ov = 0;
115 return rt;
118 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
120 int64_t rt = 0;
121 int64_t ra = (int64_t)rau;
122 int64_t rb = (int64_t)rbu;
123 int overflow = divs128(&rt, &ra, rb);
125 if (unlikely(overflow)) {
126 rt = 0; /* Undefined */
129 if (oe) {
131 if (unlikely(overflow)) {
132 env->so = env->ov = 1;
133 } else {
134 env->ov = 0;
138 return rt;
141 #endif
144 #if defined(TARGET_PPC64)
145 /* if x = 0xab, returns 0xababababababababa */
146 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
148 /* substract 1 from each byte, and with inverse, check if MSB is set at each
149 * byte.
150 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
151 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
153 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
155 /* When you XOR the pattern and there is a match, that byte will be zero */
156 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
158 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
160 return hasvalue(rb, ra) ? CRF_GT : 0;
163 #undef pattern
164 #undef haszero
165 #undef hasvalue
167 /* Return invalid random number.
169 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
170 * random number
172 target_ulong helper_darn32(void)
174 return -1;
177 target_ulong helper_darn64(void)
179 return -1;
182 #endif
184 #if defined(TARGET_PPC64)
186 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
188 int i;
189 uint64_t ra = 0;
191 for (i = 0; i < 8; i++) {
192 int index = (rs >> (i*8)) & 0xFF;
193 if (index < 64) {
194 if (rb & (1ull << (63-index))) {
195 ra |= 1 << i;
199 return ra;
202 #endif
204 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
206 target_ulong mask = 0xff;
207 target_ulong ra = 0;
208 int i;
210 for (i = 0; i < sizeof(target_ulong); i++) {
211 if ((rs & mask) == (rb & mask)) {
212 ra |= mask;
214 mask <<= 8;
216 return ra;
219 /* shift right arithmetic helper */
220 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
221 target_ulong shift)
223 int32_t ret;
225 if (likely(!(shift & 0x20))) {
226 if (likely((uint32_t)shift != 0)) {
227 shift &= 0x1f;
228 ret = (int32_t)value >> shift;
229 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
230 env->ca = 0;
231 } else {
232 env->ca = 1;
234 } else {
235 ret = (int32_t)value;
236 env->ca = 0;
238 } else {
239 ret = (int32_t)value >> 31;
240 env->ca = (ret != 0);
242 return (target_long)ret;
245 #if defined(TARGET_PPC64)
246 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
247 target_ulong shift)
249 int64_t ret;
251 if (likely(!(shift & 0x40))) {
252 if (likely((uint64_t)shift != 0)) {
253 shift &= 0x3f;
254 ret = (int64_t)value >> shift;
255 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
256 env->ca = 0;
257 } else {
258 env->ca = 1;
260 } else {
261 ret = (int64_t)value;
262 env->ca = 0;
264 } else {
265 ret = (int64_t)value >> 63;
266 env->ca = (ret != 0);
268 return ret;
270 #endif
272 #if defined(TARGET_PPC64)
273 target_ulong helper_popcntb(target_ulong val)
275 /* Note that we don't fold past bytes */
276 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
277 0x5555555555555555ULL);
278 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
279 0x3333333333333333ULL);
280 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
281 0x0f0f0f0f0f0f0f0fULL);
282 return val;
285 target_ulong helper_popcntw(target_ulong val)
287 /* Note that we don't fold past words. */
288 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
289 0x5555555555555555ULL);
290 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
291 0x3333333333333333ULL);
292 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
293 0x0f0f0f0f0f0f0f0fULL);
294 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
295 0x00ff00ff00ff00ffULL);
296 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
297 0x0000ffff0000ffffULL);
298 return val;
300 #else
301 target_ulong helper_popcntb(target_ulong val)
303 /* Note that we don't fold past bytes */
304 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
305 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
306 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
307 return val;
309 #endif
311 /*****************************************************************************/
312 /* PowerPC 601 specific instructions (POWER bridge) */
313 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
315 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
317 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
318 (int32_t)arg2 == 0) {
319 env->spr[SPR_MQ] = 0;
320 return INT32_MIN;
321 } else {
322 env->spr[SPR_MQ] = tmp % arg2;
323 return tmp / (int32_t)arg2;
327 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
328 target_ulong arg2)
330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333 (int32_t)arg2 == 0) {
334 env->so = env->ov = 1;
335 env->spr[SPR_MQ] = 0;
336 return INT32_MIN;
337 } else {
338 env->spr[SPR_MQ] = tmp % arg2;
339 tmp /= (int32_t)arg2;
340 if ((int32_t)tmp != tmp) {
341 env->so = env->ov = 1;
342 } else {
343 env->ov = 0;
345 return tmp;
349 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
350 target_ulong arg2)
352 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
353 (int32_t)arg2 == 0) {
354 env->spr[SPR_MQ] = 0;
355 return INT32_MIN;
356 } else {
357 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
358 return (int32_t)arg1 / (int32_t)arg2;
362 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
363 target_ulong arg2)
365 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
366 (int32_t)arg2 == 0) {
367 env->so = env->ov = 1;
368 env->spr[SPR_MQ] = 0;
369 return INT32_MIN;
370 } else {
371 env->ov = 0;
372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373 return (int32_t)arg1 / (int32_t)arg2;
377 /*****************************************************************************/
378 /* 602 specific instructions */
379 /* mfrom is the most crazy instruction ever seen, imho ! */
380 /* Real implementation uses a ROM table. Do the same */
381 /* Extremely decomposed:
382 * -arg / 256
383 * return 256 * log10(10 + 1.0) + 0.5
385 #if !defined(CONFIG_USER_ONLY)
386 target_ulong helper_602_mfrom(target_ulong arg)
388 if (likely(arg < 602)) {
389 #include "mfrom_table.c"
390 return mfrom_ROM_table[arg];
391 } else {
392 return 0;
395 #endif
397 /*****************************************************************************/
398 /* Altivec extension helpers */
399 #if defined(HOST_WORDS_BIGENDIAN)
400 #define HI_IDX 0
401 #define LO_IDX 1
402 #define AVRB(i) u8[i]
403 #define AVRW(i) u32[i]
404 #else
405 #define HI_IDX 1
406 #define LO_IDX 0
407 #define AVRB(i) u8[15-(i)]
408 #define AVRW(i) u32[3-(i)]
409 #endif
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
414 #else
415 #define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
417 #endif
419 /* Saturating arithmetic helpers. */
420 #define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
423 to_type r; \
425 if (x < (from_type)min) { \
426 r = min; \
427 *sat = 1; \
428 } else if (x > (from_type)max) { \
429 r = max; \
430 *sat = 1; \
431 } else { \
432 r = x; \
434 return r; \
436 #define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
439 to_type r; \
441 if (x > (from_type)max) { \
442 r = max; \
443 *sat = 1; \
444 } else { \
445 r = x; \
447 return r; \
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
459 #undef SATCVT
460 #undef SATCVTU
462 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
464 int i, j = (sh & 0xf);
466 VECTOR_FOR_INORDER_I(i, u8) {
467 r->u8[i] = j++;
471 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
473 int i, j = 0x10 - (sh & 0xf);
475 VECTOR_FOR_INORDER_I(i, u8) {
476 r->u8[i] = j++;
480 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
482 #if defined(HOST_WORDS_BIGENDIAN)
483 env->vscr = r->u32[3];
484 #else
485 env->vscr = r->u32[0];
486 #endif
487 set_flush_to_zero(vscr_nj, &env->vec_status);
490 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
492 int i;
494 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
495 r->u32[i] = ~a->u32[i] < b->u32[i];
499 /* vprtybw */
500 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
502 int i;
503 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
504 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
505 res ^= res >> 8;
506 r->u32[i] = res & 1;
510 /* vprtybd */
511 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
513 int i;
514 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
515 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
516 res ^= res >> 16;
517 res ^= res >> 8;
518 r->u64[i] = res & 1;
522 /* vprtybq */
523 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
525 uint64_t res = b->u64[0] ^ b->u64[1];
526 res ^= res >> 32;
527 res ^= res >> 16;
528 res ^= res >> 8;
529 r->u64[LO_IDX] = res & 1;
530 r->u64[HI_IDX] = 0;
533 #define VARITH_DO(name, op, element) \
534 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
536 int i; \
538 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
539 r->element[i] = a->element[i] op b->element[i]; \
542 #define VARITH(suffix, element) \
543 VARITH_DO(add##suffix, +, element) \
544 VARITH_DO(sub##suffix, -, element)
545 VARITH(ubm, u8)
546 VARITH(uhm, u16)
547 VARITH(uwm, u32)
548 VARITH(udm, u64)
549 VARITH_DO(muluwm, *, u32)
550 #undef VARITH_DO
551 #undef VARITH
553 #define VARITHFP(suffix, func) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b) \
557 int i; \
559 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
560 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
563 VARITHFP(addfp, float32_add)
564 VARITHFP(subfp, float32_sub)
565 VARITHFP(minfp, float32_min)
566 VARITHFP(maxfp, float32_max)
567 #undef VARITHFP
569 #define VARITHFPFMA(suffix, type) \
570 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
571 ppc_avr_t *b, ppc_avr_t *c) \
573 int i; \
574 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
575 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
576 type, &env->vec_status); \
579 VARITHFPFMA(maddfp, 0);
580 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
581 #undef VARITHFPFMA
583 #define VARITHSAT_CASE(type, op, cvt, element) \
585 type result = (type)a->element[i] op (type)b->element[i]; \
586 r->element[i] = cvt(result, &sat); \
589 #define VARITHSAT_DO(name, op, optype, cvt, element) \
590 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
591 ppc_avr_t *b) \
593 int sat = 0; \
594 int i; \
596 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
597 switch (sizeof(r->element[0])) { \
598 case 1: \
599 VARITHSAT_CASE(optype, op, cvt, element); \
600 break; \
601 case 2: \
602 VARITHSAT_CASE(optype, op, cvt, element); \
603 break; \
604 case 4: \
605 VARITHSAT_CASE(optype, op, cvt, element); \
606 break; \
609 if (sat) { \
610 env->vscr |= (1 << VSCR_SAT); \
613 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
614 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
615 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
616 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
617 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
618 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
619 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
620 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
621 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
622 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
623 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
624 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
625 #undef VARITHSAT_CASE
626 #undef VARITHSAT_DO
627 #undef VARITHSAT_SIGNED
628 #undef VARITHSAT_UNSIGNED
630 #define VAVG_DO(name, element, etype) \
631 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
633 int i; \
635 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
636 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
637 r->element[i] = x >> 1; \
641 #define VAVG(type, signed_element, signed_type, unsigned_element, \
642 unsigned_type) \
643 VAVG_DO(avgs##type, signed_element, signed_type) \
644 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
645 VAVG(b, s8, int16_t, u8, uint16_t)
646 VAVG(h, s16, int32_t, u16, uint32_t)
647 VAVG(w, s32, int64_t, u32, uint64_t)
648 #undef VAVG_DO
649 #undef VAVG
651 #define VABSDU_DO(name, element) \
652 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
654 int i; \
656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
657 r->element[i] = (a->element[i] > b->element[i]) ? \
658 (a->element[i] - b->element[i]) : \
659 (b->element[i] - a->element[i]); \
663 /* VABSDU - Vector absolute difference unsigned
664 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
665 * element - element type to access from vector
667 #define VABSDU(type, element) \
668 VABSDU_DO(absdu##type, element)
669 VABSDU(b, u8)
670 VABSDU(h, u16)
671 VABSDU(w, u32)
672 #undef VABSDU_DO
673 #undef VABSDU
675 #define VCF(suffix, cvt, element) \
676 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
677 ppc_avr_t *b, uint32_t uim) \
679 int i; \
681 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
682 float32 t = cvt(b->element[i], &env->vec_status); \
683 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
686 VCF(ux, uint32_to_float32, u32)
687 VCF(sx, int32_to_float32, s32)
688 #undef VCF
690 #define VCMP_DO(suffix, compare, element, record) \
691 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
692 ppc_avr_t *a, ppc_avr_t *b) \
694 uint64_t ones = (uint64_t)-1; \
695 uint64_t all = ones; \
696 uint64_t none = 0; \
697 int i; \
699 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
700 uint64_t result = (a->element[i] compare b->element[i] ? \
701 ones : 0x0); \
702 switch (sizeof(a->element[0])) { \
703 case 8: \
704 r->u64[i] = result; \
705 break; \
706 case 4: \
707 r->u32[i] = result; \
708 break; \
709 case 2: \
710 r->u16[i] = result; \
711 break; \
712 case 1: \
713 r->u8[i] = result; \
714 break; \
716 all &= result; \
717 none |= result; \
719 if (record) { \
720 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
723 #define VCMP(suffix, compare, element) \
724 VCMP_DO(suffix, compare, element, 0) \
725 VCMP_DO(suffix##_dot, compare, element, 1)
726 VCMP(equb, ==, u8)
727 VCMP(equh, ==, u16)
728 VCMP(equw, ==, u32)
729 VCMP(equd, ==, u64)
730 VCMP(gtub, >, u8)
731 VCMP(gtuh, >, u16)
732 VCMP(gtuw, >, u32)
733 VCMP(gtud, >, u64)
734 VCMP(gtsb, >, s8)
735 VCMP(gtsh, >, s16)
736 VCMP(gtsw, >, s32)
737 VCMP(gtsd, >, s64)
738 #undef VCMP_DO
739 #undef VCMP
741 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
742 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
743 ppc_avr_t *a, ppc_avr_t *b) \
745 etype ones = (etype)-1; \
746 etype all = ones; \
747 etype result, none = 0; \
748 int i; \
750 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
751 if (cmpzero) { \
752 result = ((a->element[i] == 0) \
753 || (b->element[i] == 0) \
754 || (a->element[i] != b->element[i]) ? \
755 ones : 0x0); \
756 } else { \
757 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
759 r->element[i] = result; \
760 all &= result; \
761 none |= result; \
763 if (record) { \
764 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
768 /* VCMPNEZ - Vector compare not equal to zero
769 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
770 * element - element type to access from vector
772 #define VCMPNE(suffix, element, etype, cmpzero) \
773 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
774 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
775 VCMPNE(zb, u8, uint8_t, 1)
776 VCMPNE(zh, u16, uint16_t, 1)
777 VCMPNE(zw, u32, uint32_t, 1)
778 VCMPNE(b, u8, uint8_t, 0)
779 VCMPNE(h, u16, uint16_t, 0)
780 VCMPNE(w, u32, uint32_t, 0)
781 #undef VCMPNE_DO
782 #undef VCMPNE
784 #define VCMPFP_DO(suffix, compare, order, record) \
785 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
786 ppc_avr_t *a, ppc_avr_t *b) \
788 uint32_t ones = (uint32_t)-1; \
789 uint32_t all = ones; \
790 uint32_t none = 0; \
791 int i; \
793 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
794 uint32_t result; \
795 int rel = float32_compare_quiet(a->f[i], b->f[i], \
796 &env->vec_status); \
797 if (rel == float_relation_unordered) { \
798 result = 0; \
799 } else if (rel compare order) { \
800 result = ones; \
801 } else { \
802 result = 0; \
804 r->u32[i] = result; \
805 all &= result; \
806 none |= result; \
808 if (record) { \
809 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
812 #define VCMPFP(suffix, compare, order) \
813 VCMPFP_DO(suffix, compare, order, 0) \
814 VCMPFP_DO(suffix##_dot, compare, order, 1)
815 VCMPFP(eqfp, ==, float_relation_equal)
816 VCMPFP(gefp, !=, float_relation_less)
817 VCMPFP(gtfp, ==, float_relation_greater)
818 #undef VCMPFP_DO
819 #undef VCMPFP
821 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
822 ppc_avr_t *a, ppc_avr_t *b, int record)
824 int i;
825 int all_in = 0;
827 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
828 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
829 if (le_rel == float_relation_unordered) {
830 r->u32[i] = 0xc0000000;
831 all_in = 1;
832 } else {
833 float32 bneg = float32_chs(b->f[i]);
834 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
835 int le = le_rel != float_relation_greater;
836 int ge = ge_rel != float_relation_less;
838 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
839 all_in |= (!le | !ge);
842 if (record) {
843 env->crf[6] = (all_in == 0) << 1;
847 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
849 vcmpbfp_internal(env, r, a, b, 0);
852 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
853 ppc_avr_t *b)
855 vcmpbfp_internal(env, r, a, b, 1);
858 #define VCT(suffix, satcvt, element) \
859 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
860 ppc_avr_t *b, uint32_t uim) \
862 int i; \
863 int sat = 0; \
864 float_status s = env->vec_status; \
866 set_float_rounding_mode(float_round_to_zero, &s); \
867 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
868 if (float32_is_any_nan(b->f[i])) { \
869 r->element[i] = 0; \
870 } else { \
871 float64 t = float32_to_float64(b->f[i], &s); \
872 int64_t j; \
874 t = float64_scalbn(t, uim, &s); \
875 j = float64_to_int64(t, &s); \
876 r->element[i] = satcvt(j, &sat); \
879 if (sat) { \
880 env->vscr |= (1 << VSCR_SAT); \
883 VCT(uxs, cvtsduw, u32)
884 VCT(sxs, cvtsdsw, s32)
885 #undef VCT
887 target_ulong helper_vclzlsbb(ppc_avr_t *r)
889 target_ulong count = 0;
890 int i;
891 VECTOR_FOR_INORDER_I(i, u8) {
892 if (r->u8[i] & 0x01) {
893 break;
895 count++;
897 return count;
900 target_ulong helper_vctzlsbb(ppc_avr_t *r)
902 target_ulong count = 0;
903 int i;
904 #if defined(HOST_WORDS_BIGENDIAN)
905 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
906 #else
907 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
908 #endif
909 if (r->u8[i] & 0x01) {
910 break;
912 count++;
914 return count;
917 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
918 ppc_avr_t *b, ppc_avr_t *c)
920 int sat = 0;
921 int i;
923 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
924 int32_t prod = a->s16[i] * b->s16[i];
925 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
927 r->s16[i] = cvtswsh(t, &sat);
930 if (sat) {
931 env->vscr |= (1 << VSCR_SAT);
935 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
936 ppc_avr_t *b, ppc_avr_t *c)
938 int sat = 0;
939 int i;
941 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
942 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
943 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
944 r->s16[i] = cvtswsh(t, &sat);
947 if (sat) {
948 env->vscr |= (1 << VSCR_SAT);
952 #define VMINMAX_DO(name, compare, element) \
953 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
955 int i; \
957 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
958 if (a->element[i] compare b->element[i]) { \
959 r->element[i] = b->element[i]; \
960 } else { \
961 r->element[i] = a->element[i]; \
965 #define VMINMAX(suffix, element) \
966 VMINMAX_DO(min##suffix, >, element) \
967 VMINMAX_DO(max##suffix, <, element)
968 VMINMAX(sb, s8)
969 VMINMAX(sh, s16)
970 VMINMAX(sw, s32)
971 VMINMAX(sd, s64)
972 VMINMAX(ub, u8)
973 VMINMAX(uh, u16)
974 VMINMAX(uw, u32)
975 VMINMAX(ud, u64)
976 #undef VMINMAX_DO
977 #undef VMINMAX
979 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
981 int i;
983 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
984 int32_t prod = a->s16[i] * b->s16[i];
985 r->s16[i] = (int16_t) (prod + c->s16[i]);
989 #define VMRG_DO(name, element, highp) \
990 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
992 ppc_avr_t result; \
993 int i; \
994 size_t n_elems = ARRAY_SIZE(r->element); \
996 for (i = 0; i < n_elems / 2; i++) { \
997 if (highp) { \
998 result.element[i*2+HI_IDX] = a->element[i]; \
999 result.element[i*2+LO_IDX] = b->element[i]; \
1000 } else { \
1001 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
1002 b->element[n_elems - i - 1]; \
1003 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
1004 a->element[n_elems - i - 1]; \
1007 *r = result; \
1009 #if defined(HOST_WORDS_BIGENDIAN)
1010 #define MRGHI 0
1011 #define MRGLO 1
1012 #else
1013 #define MRGHI 1
1014 #define MRGLO 0
1015 #endif
1016 #define VMRG(suffix, element) \
1017 VMRG_DO(mrgl##suffix, element, MRGHI) \
1018 VMRG_DO(mrgh##suffix, element, MRGLO)
1019 VMRG(b, u8)
1020 VMRG(h, u16)
1021 VMRG(w, u32)
1022 #undef VMRG_DO
1023 #undef VMRG
1024 #undef MRGHI
1025 #undef MRGLO
1027 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1028 ppc_avr_t *b, ppc_avr_t *c)
1030 int32_t prod[16];
1031 int i;
1033 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1034 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1037 VECTOR_FOR_INORDER_I(i, s32) {
1038 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1039 prod[4 * i + 2] + prod[4 * i + 3];
1043 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1044 ppc_avr_t *b, ppc_avr_t *c)
1046 int32_t prod[8];
1047 int i;
1049 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1050 prod[i] = a->s16[i] * b->s16[i];
1053 VECTOR_FOR_INORDER_I(i, s32) {
1054 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1058 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1059 ppc_avr_t *b, ppc_avr_t *c)
1061 int32_t prod[8];
1062 int i;
1063 int sat = 0;
1065 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1066 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1069 VECTOR_FOR_INORDER_I(i, s32) {
1070 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1072 r->u32[i] = cvtsdsw(t, &sat);
1075 if (sat) {
1076 env->vscr |= (1 << VSCR_SAT);
1080 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1081 ppc_avr_t *b, ppc_avr_t *c)
1083 uint16_t prod[16];
1084 int i;
1086 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1087 prod[i] = a->u8[i] * b->u8[i];
1090 VECTOR_FOR_INORDER_I(i, u32) {
1091 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1092 prod[4 * i + 2] + prod[4 * i + 3];
1096 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1097 ppc_avr_t *b, ppc_avr_t *c)
1099 uint32_t prod[8];
1100 int i;
1102 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1103 prod[i] = a->u16[i] * b->u16[i];
1106 VECTOR_FOR_INORDER_I(i, u32) {
1107 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1111 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1112 ppc_avr_t *b, ppc_avr_t *c)
1114 uint32_t prod[8];
1115 int i;
1116 int sat = 0;
1118 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1119 prod[i] = a->u16[i] * b->u16[i];
1122 VECTOR_FOR_INORDER_I(i, s32) {
1123 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1125 r->u32[i] = cvtuduw(t, &sat);
1128 if (sat) {
1129 env->vscr |= (1 << VSCR_SAT);
1133 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1134 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1136 int i; \
1138 VECTOR_FOR_INORDER_I(i, prod_element) { \
1139 if (evenp) { \
1140 r->prod_element[i] = \
1141 (cast)a->mul_element[i * 2 + HI_IDX] * \
1142 (cast)b->mul_element[i * 2 + HI_IDX]; \
1143 } else { \
1144 r->prod_element[i] = \
1145 (cast)a->mul_element[i * 2 + LO_IDX] * \
1146 (cast)b->mul_element[i * 2 + LO_IDX]; \
1150 #define VMUL(suffix, mul_element, prod_element, cast) \
1151 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1152 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1153 VMUL(sb, s8, s16, int16_t)
1154 VMUL(sh, s16, s32, int32_t)
1155 VMUL(sw, s32, s64, int64_t)
1156 VMUL(ub, u8, u16, uint16_t)
1157 VMUL(uh, u16, u32, uint32_t)
1158 VMUL(uw, u32, u64, uint64_t)
1159 #undef VMUL_DO
1160 #undef VMUL
1162 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1163 ppc_avr_t *c)
1165 ppc_avr_t result;
1166 int i;
1168 VECTOR_FOR_INORDER_I(i, u8) {
1169 int s = c->u8[i] & 0x1f;
1170 #if defined(HOST_WORDS_BIGENDIAN)
1171 int index = s & 0xf;
1172 #else
1173 int index = 15 - (s & 0xf);
1174 #endif
1176 if (s & 0x10) {
1177 result.u8[i] = b->u8[index];
1178 } else {
1179 result.u8[i] = a->u8[index];
1182 *r = result;
1185 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1186 ppc_avr_t *c)
1188 ppc_avr_t result;
1189 int i;
1191 VECTOR_FOR_INORDER_I(i, u8) {
1192 int s = c->u8[i] & 0x1f;
1193 #if defined(HOST_WORDS_BIGENDIAN)
1194 int index = 15 - (s & 0xf);
1195 #else
1196 int index = s & 0xf;
1197 #endif
1199 if (s & 0x10) {
1200 result.u8[i] = a->u8[index];
1201 } else {
1202 result.u8[i] = b->u8[index];
1205 *r = result;
1208 #if defined(HOST_WORDS_BIGENDIAN)
1209 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1210 #define VBPERMD_INDEX(i) (i)
1211 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1212 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1213 #else
1214 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1215 #define VBPERMD_INDEX(i) (1 - i)
1216 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1217 #define EXTRACT_BIT(avr, i, index) \
1218 (extract64((avr)->u64[1 - i], 63 - index, 1))
1219 #endif
1221 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1223 int i, j;
1224 ppc_avr_t result = { .u64 = { 0, 0 } };
1225 VECTOR_FOR_INORDER_I(i, u64) {
1226 for (j = 0; j < 8; j++) {
1227 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1228 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1229 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1233 *r = result;
1236 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1238 int i;
1239 uint64_t perm = 0;
1241 VECTOR_FOR_INORDER_I(i, u8) {
1242 int index = VBPERMQ_INDEX(b, i);
1244 if (index < 128) {
1245 uint64_t mask = (1ull << (63-(index & 0x3F)));
1246 if (a->u64[VBPERMQ_DW(index)] & mask) {
1247 perm |= (0x8000 >> i);
1252 r->u64[HI_IDX] = perm;
1253 r->u64[LO_IDX] = 0;
1256 #undef VBPERMQ_INDEX
1257 #undef VBPERMQ_DW
1259 static const uint64_t VGBBD_MASKS[256] = {
1260 0x0000000000000000ull, /* 00 */
1261 0x0000000000000080ull, /* 01 */
1262 0x0000000000008000ull, /* 02 */
1263 0x0000000000008080ull, /* 03 */
1264 0x0000000000800000ull, /* 04 */
1265 0x0000000000800080ull, /* 05 */
1266 0x0000000000808000ull, /* 06 */
1267 0x0000000000808080ull, /* 07 */
1268 0x0000000080000000ull, /* 08 */
1269 0x0000000080000080ull, /* 09 */
1270 0x0000000080008000ull, /* 0A */
1271 0x0000000080008080ull, /* 0B */
1272 0x0000000080800000ull, /* 0C */
1273 0x0000000080800080ull, /* 0D */
1274 0x0000000080808000ull, /* 0E */
1275 0x0000000080808080ull, /* 0F */
1276 0x0000008000000000ull, /* 10 */
1277 0x0000008000000080ull, /* 11 */
1278 0x0000008000008000ull, /* 12 */
1279 0x0000008000008080ull, /* 13 */
1280 0x0000008000800000ull, /* 14 */
1281 0x0000008000800080ull, /* 15 */
1282 0x0000008000808000ull, /* 16 */
1283 0x0000008000808080ull, /* 17 */
1284 0x0000008080000000ull, /* 18 */
1285 0x0000008080000080ull, /* 19 */
1286 0x0000008080008000ull, /* 1A */
1287 0x0000008080008080ull, /* 1B */
1288 0x0000008080800000ull, /* 1C */
1289 0x0000008080800080ull, /* 1D */
1290 0x0000008080808000ull, /* 1E */
1291 0x0000008080808080ull, /* 1F */
1292 0x0000800000000000ull, /* 20 */
1293 0x0000800000000080ull, /* 21 */
1294 0x0000800000008000ull, /* 22 */
1295 0x0000800000008080ull, /* 23 */
1296 0x0000800000800000ull, /* 24 */
1297 0x0000800000800080ull, /* 25 */
1298 0x0000800000808000ull, /* 26 */
1299 0x0000800000808080ull, /* 27 */
1300 0x0000800080000000ull, /* 28 */
1301 0x0000800080000080ull, /* 29 */
1302 0x0000800080008000ull, /* 2A */
1303 0x0000800080008080ull, /* 2B */
1304 0x0000800080800000ull, /* 2C */
1305 0x0000800080800080ull, /* 2D */
1306 0x0000800080808000ull, /* 2E */
1307 0x0000800080808080ull, /* 2F */
1308 0x0000808000000000ull, /* 30 */
1309 0x0000808000000080ull, /* 31 */
1310 0x0000808000008000ull, /* 32 */
1311 0x0000808000008080ull, /* 33 */
1312 0x0000808000800000ull, /* 34 */
1313 0x0000808000800080ull, /* 35 */
1314 0x0000808000808000ull, /* 36 */
1315 0x0000808000808080ull, /* 37 */
1316 0x0000808080000000ull, /* 38 */
1317 0x0000808080000080ull, /* 39 */
1318 0x0000808080008000ull, /* 3A */
1319 0x0000808080008080ull, /* 3B */
1320 0x0000808080800000ull, /* 3C */
1321 0x0000808080800080ull, /* 3D */
1322 0x0000808080808000ull, /* 3E */
1323 0x0000808080808080ull, /* 3F */
1324 0x0080000000000000ull, /* 40 */
1325 0x0080000000000080ull, /* 41 */
1326 0x0080000000008000ull, /* 42 */
1327 0x0080000000008080ull, /* 43 */
1328 0x0080000000800000ull, /* 44 */
1329 0x0080000000800080ull, /* 45 */
1330 0x0080000000808000ull, /* 46 */
1331 0x0080000000808080ull, /* 47 */
1332 0x0080000080000000ull, /* 48 */
1333 0x0080000080000080ull, /* 49 */
1334 0x0080000080008000ull, /* 4A */
1335 0x0080000080008080ull, /* 4B */
1336 0x0080000080800000ull, /* 4C */
1337 0x0080000080800080ull, /* 4D */
1338 0x0080000080808000ull, /* 4E */
1339 0x0080000080808080ull, /* 4F */
1340 0x0080008000000000ull, /* 50 */
1341 0x0080008000000080ull, /* 51 */
1342 0x0080008000008000ull, /* 52 */
1343 0x0080008000008080ull, /* 53 */
1344 0x0080008000800000ull, /* 54 */
1345 0x0080008000800080ull, /* 55 */
1346 0x0080008000808000ull, /* 56 */
1347 0x0080008000808080ull, /* 57 */
1348 0x0080008080000000ull, /* 58 */
1349 0x0080008080000080ull, /* 59 */
1350 0x0080008080008000ull, /* 5A */
1351 0x0080008080008080ull, /* 5B */
1352 0x0080008080800000ull, /* 5C */
1353 0x0080008080800080ull, /* 5D */
1354 0x0080008080808000ull, /* 5E */
1355 0x0080008080808080ull, /* 5F */
1356 0x0080800000000000ull, /* 60 */
1357 0x0080800000000080ull, /* 61 */
1358 0x0080800000008000ull, /* 62 */
1359 0x0080800000008080ull, /* 63 */
1360 0x0080800000800000ull, /* 64 */
1361 0x0080800000800080ull, /* 65 */
1362 0x0080800000808000ull, /* 66 */
1363 0x0080800000808080ull, /* 67 */
1364 0x0080800080000000ull, /* 68 */
1365 0x0080800080000080ull, /* 69 */
1366 0x0080800080008000ull, /* 6A */
1367 0x0080800080008080ull, /* 6B */
1368 0x0080800080800000ull, /* 6C */
1369 0x0080800080800080ull, /* 6D */
1370 0x0080800080808000ull, /* 6E */
1371 0x0080800080808080ull, /* 6F */
1372 0x0080808000000000ull, /* 70 */
1373 0x0080808000000080ull, /* 71 */
1374 0x0080808000008000ull, /* 72 */
1375 0x0080808000008080ull, /* 73 */
1376 0x0080808000800000ull, /* 74 */
1377 0x0080808000800080ull, /* 75 */
1378 0x0080808000808000ull, /* 76 */
1379 0x0080808000808080ull, /* 77 */
1380 0x0080808080000000ull, /* 78 */
1381 0x0080808080000080ull, /* 79 */
1382 0x0080808080008000ull, /* 7A */
1383 0x0080808080008080ull, /* 7B */
1384 0x0080808080800000ull, /* 7C */
1385 0x0080808080800080ull, /* 7D */
1386 0x0080808080808000ull, /* 7E */
1387 0x0080808080808080ull, /* 7F */
1388 0x8000000000000000ull, /* 80 */
1389 0x8000000000000080ull, /* 81 */
1390 0x8000000000008000ull, /* 82 */
1391 0x8000000000008080ull, /* 83 */
1392 0x8000000000800000ull, /* 84 */
1393 0x8000000000800080ull, /* 85 */
1394 0x8000000000808000ull, /* 86 */
1395 0x8000000000808080ull, /* 87 */
1396 0x8000000080000000ull, /* 88 */
1397 0x8000000080000080ull, /* 89 */
1398 0x8000000080008000ull, /* 8A */
1399 0x8000000080008080ull, /* 8B */
1400 0x8000000080800000ull, /* 8C */
1401 0x8000000080800080ull, /* 8D */
1402 0x8000000080808000ull, /* 8E */
1403 0x8000000080808080ull, /* 8F */
1404 0x8000008000000000ull, /* 90 */
1405 0x8000008000000080ull, /* 91 */
1406 0x8000008000008000ull, /* 92 */
1407 0x8000008000008080ull, /* 93 */
1408 0x8000008000800000ull, /* 94 */
1409 0x8000008000800080ull, /* 95 */
1410 0x8000008000808000ull, /* 96 */
1411 0x8000008000808080ull, /* 97 */
1412 0x8000008080000000ull, /* 98 */
1413 0x8000008080000080ull, /* 99 */
1414 0x8000008080008000ull, /* 9A */
1415 0x8000008080008080ull, /* 9B */
1416 0x8000008080800000ull, /* 9C */
1417 0x8000008080800080ull, /* 9D */
1418 0x8000008080808000ull, /* 9E */
1419 0x8000008080808080ull, /* 9F */
1420 0x8000800000000000ull, /* A0 */
1421 0x8000800000000080ull, /* A1 */
1422 0x8000800000008000ull, /* A2 */
1423 0x8000800000008080ull, /* A3 */
1424 0x8000800000800000ull, /* A4 */
1425 0x8000800000800080ull, /* A5 */
1426 0x8000800000808000ull, /* A6 */
1427 0x8000800000808080ull, /* A7 */
1428 0x8000800080000000ull, /* A8 */
1429 0x8000800080000080ull, /* A9 */
1430 0x8000800080008000ull, /* AA */
1431 0x8000800080008080ull, /* AB */
1432 0x8000800080800000ull, /* AC */
1433 0x8000800080800080ull, /* AD */
1434 0x8000800080808000ull, /* AE */
1435 0x8000800080808080ull, /* AF */
1436 0x8000808000000000ull, /* B0 */
1437 0x8000808000000080ull, /* B1 */
1438 0x8000808000008000ull, /* B2 */
1439 0x8000808000008080ull, /* B3 */
1440 0x8000808000800000ull, /* B4 */
1441 0x8000808000800080ull, /* B5 */
1442 0x8000808000808000ull, /* B6 */
1443 0x8000808000808080ull, /* B7 */
1444 0x8000808080000000ull, /* B8 */
1445 0x8000808080000080ull, /* B9 */
1446 0x8000808080008000ull, /* BA */
1447 0x8000808080008080ull, /* BB */
1448 0x8000808080800000ull, /* BC */
1449 0x8000808080800080ull, /* BD */
1450 0x8000808080808000ull, /* BE */
1451 0x8000808080808080ull, /* BF */
1452 0x8080000000000000ull, /* C0 */
1453 0x8080000000000080ull, /* C1 */
1454 0x8080000000008000ull, /* C2 */
1455 0x8080000000008080ull, /* C3 */
1456 0x8080000000800000ull, /* C4 */
1457 0x8080000000800080ull, /* C5 */
1458 0x8080000000808000ull, /* C6 */
1459 0x8080000000808080ull, /* C7 */
1460 0x8080000080000000ull, /* C8 */
1461 0x8080000080000080ull, /* C9 */
1462 0x8080000080008000ull, /* CA */
1463 0x8080000080008080ull, /* CB */
1464 0x8080000080800000ull, /* CC */
1465 0x8080000080800080ull, /* CD */
1466 0x8080000080808000ull, /* CE */
1467 0x8080000080808080ull, /* CF */
1468 0x8080008000000000ull, /* D0 */
1469 0x8080008000000080ull, /* D1 */
1470 0x8080008000008000ull, /* D2 */
1471 0x8080008000008080ull, /* D3 */
1472 0x8080008000800000ull, /* D4 */
1473 0x8080008000800080ull, /* D5 */
1474 0x8080008000808000ull, /* D6 */
1475 0x8080008000808080ull, /* D7 */
1476 0x8080008080000000ull, /* D8 */
1477 0x8080008080000080ull, /* D9 */
1478 0x8080008080008000ull, /* DA */
1479 0x8080008080008080ull, /* DB */
1480 0x8080008080800000ull, /* DC */
1481 0x8080008080800080ull, /* DD */
1482 0x8080008080808000ull, /* DE */
1483 0x8080008080808080ull, /* DF */
1484 0x8080800000000000ull, /* E0 */
1485 0x8080800000000080ull, /* E1 */
1486 0x8080800000008000ull, /* E2 */
1487 0x8080800000008080ull, /* E3 */
1488 0x8080800000800000ull, /* E4 */
1489 0x8080800000800080ull, /* E5 */
1490 0x8080800000808000ull, /* E6 */
1491 0x8080800000808080ull, /* E7 */
1492 0x8080800080000000ull, /* E8 */
1493 0x8080800080000080ull, /* E9 */
1494 0x8080800080008000ull, /* EA */
1495 0x8080800080008080ull, /* EB */
1496 0x8080800080800000ull, /* EC */
1497 0x8080800080800080ull, /* ED */
1498 0x8080800080808000ull, /* EE */
1499 0x8080800080808080ull, /* EF */
1500 0x8080808000000000ull, /* F0 */
1501 0x8080808000000080ull, /* F1 */
1502 0x8080808000008000ull, /* F2 */
1503 0x8080808000008080ull, /* F3 */
1504 0x8080808000800000ull, /* F4 */
1505 0x8080808000800080ull, /* F5 */
1506 0x8080808000808000ull, /* F6 */
1507 0x8080808000808080ull, /* F7 */
1508 0x8080808080000000ull, /* F8 */
1509 0x8080808080000080ull, /* F9 */
1510 0x8080808080008000ull, /* FA */
1511 0x8080808080008080ull, /* FB */
1512 0x8080808080800000ull, /* FC */
1513 0x8080808080800080ull, /* FD */
1514 0x8080808080808000ull, /* FE */
1515 0x8080808080808080ull, /* FF */
1518 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1520 int i;
1521 uint64_t t[2] = { 0, 0 };
1523 VECTOR_FOR_INORDER_I(i, u8) {
1524 #if defined(HOST_WORDS_BIGENDIAN)
1525 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1526 #else
1527 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1528 #endif
1531 r->u64[0] = t[0];
1532 r->u64[1] = t[1];
1535 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1536 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1538 int i, j; \
1539 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1541 VECTOR_FOR_INORDER_I(i, srcfld) { \
1542 prod[i] = 0; \
1543 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1544 if (a->srcfld[i] & (1ull<<j)) { \
1545 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1550 VECTOR_FOR_INORDER_I(i, trgfld) { \
1551 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1555 PMSUM(vpmsumb, u8, u16, uint16_t)
1556 PMSUM(vpmsumh, u16, u32, uint32_t)
1557 PMSUM(vpmsumw, u32, u64, uint64_t)
1559 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1562 #ifdef CONFIG_INT128
1563 int i, j;
1564 __uint128_t prod[2];
1566 VECTOR_FOR_INORDER_I(i, u64) {
1567 prod[i] = 0;
1568 for (j = 0; j < 64; j++) {
1569 if (a->u64[i] & (1ull<<j)) {
1570 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1575 r->u128 = prod[0] ^ prod[1];
1577 #else
1578 int i, j;
1579 ppc_avr_t prod[2];
1581 VECTOR_FOR_INORDER_I(i, u64) {
1582 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1583 for (j = 0; j < 64; j++) {
1584 if (a->u64[i] & (1ull<<j)) {
1585 ppc_avr_t bshift;
1586 if (j == 0) {
1587 bshift.u64[HI_IDX] = 0;
1588 bshift.u64[LO_IDX] = b->u64[i];
1589 } else {
1590 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1591 bshift.u64[LO_IDX] = b->u64[i] << j;
1593 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1594 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1599 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1600 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1601 #endif
1605 #if defined(HOST_WORDS_BIGENDIAN)
1606 #define PKBIG 1
1607 #else
1608 #define PKBIG 0
1609 #endif
1610 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1612 int i, j;
1613 ppc_avr_t result;
1614 #if defined(HOST_WORDS_BIGENDIAN)
1615 const ppc_avr_t *x[2] = { a, b };
1616 #else
1617 const ppc_avr_t *x[2] = { b, a };
1618 #endif
1620 VECTOR_FOR_INORDER_I(i, u64) {
1621 VECTOR_FOR_INORDER_I(j, u32) {
1622 uint32_t e = x[i]->u32[j];
1624 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1625 ((e >> 6) & 0x3e0) |
1626 ((e >> 3) & 0x1f));
1629 *r = result;
1632 #define VPK(suffix, from, to, cvt, dosat) \
1633 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1634 ppc_avr_t *a, ppc_avr_t *b) \
1636 int i; \
1637 int sat = 0; \
1638 ppc_avr_t result; \
1639 ppc_avr_t *a0 = PKBIG ? a : b; \
1640 ppc_avr_t *a1 = PKBIG ? b : a; \
1642 VECTOR_FOR_INORDER_I(i, from) { \
1643 result.to[i] = cvt(a0->from[i], &sat); \
1644 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1646 *r = result; \
1647 if (dosat && sat) { \
1648 env->vscr |= (1 << VSCR_SAT); \
1651 #define I(x, y) (x)
1652 VPK(shss, s16, s8, cvtshsb, 1)
1653 VPK(shus, s16, u8, cvtshub, 1)
1654 VPK(swss, s32, s16, cvtswsh, 1)
1655 VPK(swus, s32, u16, cvtswuh, 1)
1656 VPK(sdss, s64, s32, cvtsdsw, 1)
1657 VPK(sdus, s64, u32, cvtsduw, 1)
1658 VPK(uhus, u16, u8, cvtuhub, 1)
1659 VPK(uwus, u32, u16, cvtuwuh, 1)
1660 VPK(udus, u64, u32, cvtuduw, 1)
1661 VPK(uhum, u16, u8, I, 0)
1662 VPK(uwum, u32, u16, I, 0)
1663 VPK(udum, u64, u32, I, 0)
1664 #undef I
1665 #undef VPK
1666 #undef PKBIG
1668 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1670 int i;
1672 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1673 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1677 #define VRFI(suffix, rounding) \
1678 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1679 ppc_avr_t *b) \
1681 int i; \
1682 float_status s = env->vec_status; \
1684 set_float_rounding_mode(rounding, &s); \
1685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1686 r->f[i] = float32_round_to_int (b->f[i], &s); \
1689 VRFI(n, float_round_nearest_even)
1690 VRFI(m, float_round_down)
1691 VRFI(p, float_round_up)
1692 VRFI(z, float_round_to_zero)
1693 #undef VRFI
1695 #define VROTATE(suffix, element, mask) \
1696 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1698 int i; \
1700 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1701 unsigned int shift = b->element[i] & mask; \
1702 r->element[i] = (a->element[i] << shift) | \
1703 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1706 VROTATE(b, u8, 0x7)
1707 VROTATE(h, u16, 0xF)
1708 VROTATE(w, u32, 0x1F)
1709 VROTATE(d, u64, 0x3F)
1710 #undef VROTATE
1712 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1714 int i;
1716 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1717 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1719 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1723 #define VRLMI(name, size, element, insert) \
1724 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1726 int i; \
1727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1728 uint##size##_t src1 = a->element[i]; \
1729 uint##size##_t src2 = b->element[i]; \
1730 uint##size##_t src3 = r->element[i]; \
1731 uint##size##_t begin, end, shift, mask, rot_val; \
1733 shift = extract##size(src2, 0, 6); \
1734 end = extract##size(src2, 8, 6); \
1735 begin = extract##size(src2, 16, 6); \
1736 rot_val = rol##size(src1, shift); \
1737 mask = mask_u##size(begin, end); \
1738 if (insert) { \
1739 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1740 } else { \
1741 r->element[i] = (rot_val & mask); \
1746 VRLMI(vrldmi, 64, u64, 1);
1747 VRLMI(vrlwmi, 32, u32, 1);
1748 VRLMI(vrldnm, 64, u64, 0);
1749 VRLMI(vrlwnm, 32, u32, 0);
1751 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1752 ppc_avr_t *c)
1754 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1755 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1758 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1760 int i;
1762 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1763 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1767 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1769 int i;
1771 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1772 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1776 #if defined(HOST_WORDS_BIGENDIAN)
1777 #define VEXTU_X_DO(name, size, left) \
1778 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1780 int index; \
1781 if (left) { \
1782 index = (a & 0xf) * 8; \
1783 } else { \
1784 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1786 return int128_getlo(int128_rshift(b->s128, index)) & \
1787 MAKE_64BIT_MASK(0, size); \
1789 #else
1790 #define VEXTU_X_DO(name, size, left) \
1791 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1793 int index; \
1794 if (left) { \
1795 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1796 } else { \
1797 index = (a & 0xf) * 8; \
1799 return int128_getlo(int128_rshift(b->s128, index)) & \
1800 MAKE_64BIT_MASK(0, size); \
1802 #endif
1804 VEXTU_X_DO(vextublx, 8, 1)
1805 VEXTU_X_DO(vextuhlx, 16, 1)
1806 VEXTU_X_DO(vextuwlx, 32, 1)
1807 VEXTU_X_DO(vextubrx, 8, 0)
1808 VEXTU_X_DO(vextuhrx, 16, 0)
1809 VEXTU_X_DO(vextuwrx, 32, 0)
1810 #undef VEXTU_X_DO
1812 /* The specification says that the results are undefined if all of the
1813 * shift counts are not identical. We check to make sure that they are
1814 * to conform to what real hardware appears to do. */
1815 #define VSHIFT(suffix, leftp) \
1816 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1818 int shift = b->u8[LO_IDX*15] & 0x7; \
1819 int doit = 1; \
1820 int i; \
1822 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1823 doit = doit && ((b->u8[i] & 0x7) == shift); \
1825 if (doit) { \
1826 if (shift == 0) { \
1827 *r = *a; \
1828 } else if (leftp) { \
1829 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1831 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1832 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1833 } else { \
1834 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1836 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1837 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1841 VSHIFT(l, 1)
1842 VSHIFT(r, 0)
1843 #undef VSHIFT
1845 #define VSL(suffix, element, mask) \
1846 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1848 int i; \
1850 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1851 unsigned int shift = b->element[i] & mask; \
1853 r->element[i] = a->element[i] << shift; \
1856 VSL(b, u8, 0x7)
1857 VSL(h, u16, 0x0F)
1858 VSL(w, u32, 0x1F)
1859 VSL(d, u64, 0x3F)
1860 #undef VSL
1862 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1864 int i;
1865 unsigned int shift, bytes, size;
1867 size = ARRAY_SIZE(r->u8);
1868 for (i = 0; i < size; i++) {
1869 shift = b->u8[i] & 0x7; /* extract shift value */
1870 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1871 (((i + 1) < size) ? a->u8[i + 1] : 0);
1872 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1876 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1878 int i;
1879 unsigned int shift, bytes;
1881 /* Use reverse order, as destination and source register can be same. Its
1882 * being modified in place saving temporary, reverse order will guarantee
1883 * that computed result is not fed back.
1885 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1886 shift = b->u8[i] & 0x7; /* extract shift value */
1887 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1888 /* extract adjacent bytes */
1889 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1893 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1895 int sh = shift & 0xf;
1896 int i;
1897 ppc_avr_t result;
1899 #if defined(HOST_WORDS_BIGENDIAN)
1900 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1901 int index = sh + i;
1902 if (index > 0xf) {
1903 result.u8[i] = b->u8[index - 0x10];
1904 } else {
1905 result.u8[i] = a->u8[index];
1908 #else
1909 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1910 int index = (16 - sh) + i;
1911 if (index > 0xf) {
1912 result.u8[i] = a->u8[index - 0x10];
1913 } else {
1914 result.u8[i] = b->u8[index];
1917 #endif
1918 *r = result;
1921 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1923 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1925 #if defined(HOST_WORDS_BIGENDIAN)
1926 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1927 memset(&r->u8[16-sh], 0, sh);
1928 #else
1929 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1930 memset(&r->u8[0], 0, sh);
1931 #endif
1934 /* Experimental testing shows that hardware masks the immediate. */
1935 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1936 #if defined(HOST_WORDS_BIGENDIAN)
1937 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1938 #else
1939 #define SPLAT_ELEMENT(element) \
1940 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1941 #endif
1942 #define VSPLT(suffix, element) \
1943 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1945 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1946 int i; \
1948 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1949 r->element[i] = s; \
1952 VSPLT(b, u8)
1953 VSPLT(h, u16)
1954 VSPLT(w, u32)
1955 #undef VSPLT
1956 #undef SPLAT_ELEMENT
1957 #undef _SPLAT_MASKED
1958 #if defined(HOST_WORDS_BIGENDIAN)
1959 #define VINSERT(suffix, element) \
1960 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1962 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1963 sizeof(r->element[0])); \
1965 #else
1966 #define VINSERT(suffix, element) \
1967 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1969 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1970 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1972 #endif
1973 VINSERT(b, u8)
1974 VINSERT(h, u16)
1975 VINSERT(w, u32)
1976 VINSERT(d, u64)
1977 #undef VINSERT
1978 #if defined(HOST_WORDS_BIGENDIAN)
1979 #define VEXTRACT(suffix, element) \
1980 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1982 uint32_t es = sizeof(r->element[0]); \
1983 memmove(&r->u8[8 - es], &b->u8[index], es); \
1984 memset(&r->u8[8], 0, 8); \
1985 memset(&r->u8[0], 0, 8 - es); \
1987 #else
1988 #define VEXTRACT(suffix, element) \
1989 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1991 uint32_t es = sizeof(r->element[0]); \
1992 uint32_t s = (16 - index) - es; \
1993 memmove(&r->u8[8], &b->u8[s], es); \
1994 memset(&r->u8[0], 0, 8); \
1995 memset(&r->u8[8 + es], 0, 8 - es); \
1997 #endif
1998 VEXTRACT(ub, u8)
1999 VEXTRACT(uh, u16)
2000 VEXTRACT(uw, u32)
2001 VEXTRACT(d, u64)
2002 #undef VEXTRACT
2004 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
2005 target_ulong xbn, uint32_t index)
2007 ppc_vsr_t xt, xb;
2008 size_t es = sizeof(uint32_t);
2009 uint32_t ext_index;
2010 int i;
2012 getVSR(xbn, &xb, env);
2013 memset(&xt, 0, sizeof(xt));
2015 #if defined(HOST_WORDS_BIGENDIAN)
2016 ext_index = index;
2017 for (i = 0; i < es; i++, ext_index++) {
2018 xt.u8[8 - es + i] = xb.u8[ext_index % 16];
2020 #else
2021 ext_index = 15 - index;
2022 for (i = es - 1; i >= 0; i--, ext_index--) {
2023 xt.u8[8 + i] = xb.u8[ext_index % 16];
2025 #endif
2027 putVSR(xtn, &xt, env);
2030 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
2031 target_ulong xbn, uint32_t index)
2033 ppc_vsr_t xt, xb;
2034 size_t es = sizeof(uint32_t);
2035 int ins_index, i = 0;
2037 getVSR(xbn, &xb, env);
2038 getVSR(xtn, &xt, env);
2040 #if defined(HOST_WORDS_BIGENDIAN)
2041 ins_index = index;
2042 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
2043 xt.u8[ins_index] = xb.u8[8 - es + i];
2045 #else
2046 ins_index = 15 - index;
2047 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
2048 xt.u8[ins_index] = xb.u8[8 + i];
2050 #endif
2052 putVSR(xtn, &xt, env);
2055 #define VEXT_SIGNED(name, element, mask, cast, recast) \
2056 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2058 int i; \
2059 VECTOR_FOR_INORDER_I(i, element) { \
2060 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
2063 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2064 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2065 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2066 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2067 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2068 #undef VEXT_SIGNED
2070 #define VNEG(name, element) \
2071 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2073 int i; \
2074 VECTOR_FOR_INORDER_I(i, element) { \
2075 r->element[i] = -b->element[i]; \
2078 VNEG(vnegw, s32)
2079 VNEG(vnegd, s64)
2080 #undef VNEG
2082 #define VSPLTI(suffix, element, splat_type) \
2083 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2085 splat_type x = (int8_t)(splat << 3) >> 3; \
2086 int i; \
2088 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2089 r->element[i] = x; \
2092 VSPLTI(b, s8, int8_t)
2093 VSPLTI(h, s16, int16_t)
2094 VSPLTI(w, s32, int32_t)
2095 #undef VSPLTI
2097 #define VSR(suffix, element, mask) \
2098 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2100 int i; \
2102 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2103 unsigned int shift = b->element[i] & mask; \
2104 r->element[i] = a->element[i] >> shift; \
2107 VSR(ab, s8, 0x7)
2108 VSR(ah, s16, 0xF)
2109 VSR(aw, s32, 0x1F)
2110 VSR(ad, s64, 0x3F)
2111 VSR(b, u8, 0x7)
2112 VSR(h, u16, 0xF)
2113 VSR(w, u32, 0x1F)
2114 VSR(d, u64, 0x3F)
2115 #undef VSR
2117 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2119 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2121 #if defined(HOST_WORDS_BIGENDIAN)
2122 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2123 memset(&r->u8[0], 0, sh);
2124 #else
2125 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2126 memset(&r->u8[16 - sh], 0, sh);
2127 #endif
2130 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2132 int i;
2134 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2135 r->u32[i] = a->u32[i] >= b->u32[i];
2139 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2141 int64_t t;
2142 int i, upper;
2143 ppc_avr_t result;
2144 int sat = 0;
2146 #if defined(HOST_WORDS_BIGENDIAN)
2147 upper = ARRAY_SIZE(r->s32)-1;
2148 #else
2149 upper = 0;
2150 #endif
2151 t = (int64_t)b->s32[upper];
2152 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2153 t += a->s32[i];
2154 result.s32[i] = 0;
2156 result.s32[upper] = cvtsdsw(t, &sat);
2157 *r = result;
2159 if (sat) {
2160 env->vscr |= (1 << VSCR_SAT);
2164 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2166 int i, j, upper;
2167 ppc_avr_t result;
2168 int sat = 0;
2170 #if defined(HOST_WORDS_BIGENDIAN)
2171 upper = 1;
2172 #else
2173 upper = 0;
2174 #endif
2175 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2176 int64_t t = (int64_t)b->s32[upper + i * 2];
2178 result.u64[i] = 0;
2179 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2180 t += a->s32[2 * i + j];
2182 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2185 *r = result;
2186 if (sat) {
2187 env->vscr |= (1 << VSCR_SAT);
2191 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2193 int i, j;
2194 int sat = 0;
2196 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2197 int64_t t = (int64_t)b->s32[i];
2199 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2200 t += a->s8[4 * i + j];
2202 r->s32[i] = cvtsdsw(t, &sat);
2205 if (sat) {
2206 env->vscr |= (1 << VSCR_SAT);
2210 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2212 int sat = 0;
2213 int i;
2215 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2216 int64_t t = (int64_t)b->s32[i];
2218 t += a->s16[2 * i] + a->s16[2 * i + 1];
2219 r->s32[i] = cvtsdsw(t, &sat);
2222 if (sat) {
2223 env->vscr |= (1 << VSCR_SAT);
2227 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2229 int i, j;
2230 int sat = 0;
2232 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2233 uint64_t t = (uint64_t)b->u32[i];
2235 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2236 t += a->u8[4 * i + j];
2238 r->u32[i] = cvtuduw(t, &sat);
2241 if (sat) {
2242 env->vscr |= (1 << VSCR_SAT);
2246 #if defined(HOST_WORDS_BIGENDIAN)
2247 #define UPKHI 1
2248 #define UPKLO 0
2249 #else
2250 #define UPKHI 0
2251 #define UPKLO 1
2252 #endif
2253 #define VUPKPX(suffix, hi) \
2254 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2256 int i; \
2257 ppc_avr_t result; \
2259 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2260 uint16_t e = b->u16[hi ? i : i+4]; \
2261 uint8_t a = (e >> 15) ? 0xff : 0; \
2262 uint8_t r = (e >> 10) & 0x1f; \
2263 uint8_t g = (e >> 5) & 0x1f; \
2264 uint8_t b = e & 0x1f; \
2266 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2268 *r = result; \
2270 VUPKPX(lpx, UPKLO)
2271 VUPKPX(hpx, UPKHI)
2272 #undef VUPKPX
2274 #define VUPK(suffix, unpacked, packee, hi) \
2275 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2277 int i; \
2278 ppc_avr_t result; \
2280 if (hi) { \
2281 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2282 result.unpacked[i] = b->packee[i]; \
2284 } else { \
2285 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2286 i++) { \
2287 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2290 *r = result; \
2292 VUPK(hsb, s16, s8, UPKHI)
2293 VUPK(hsh, s32, s16, UPKHI)
2294 VUPK(hsw, s64, s32, UPKHI)
2295 VUPK(lsb, s16, s8, UPKLO)
2296 VUPK(lsh, s32, s16, UPKLO)
2297 VUPK(lsw, s64, s32, UPKLO)
2298 #undef VUPK
2299 #undef UPKHI
2300 #undef UPKLO
2302 #define VGENERIC_DO(name, element) \
2303 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2305 int i; \
2307 VECTOR_FOR_INORDER_I(i, element) { \
2308 r->element[i] = name(b->element[i]); \
2312 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2313 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2314 #define clzw(v) clz32((v))
2315 #define clzd(v) clz64((v))
2317 VGENERIC_DO(clzb, u8)
2318 VGENERIC_DO(clzh, u16)
2319 VGENERIC_DO(clzw, u32)
2320 VGENERIC_DO(clzd, u64)
2322 #undef clzb
2323 #undef clzh
2324 #undef clzw
2325 #undef clzd
2327 #define ctzb(v) ((v) ? ctz32(v) : 8)
2328 #define ctzh(v) ((v) ? ctz32(v) : 16)
2329 #define ctzw(v) ctz32((v))
2330 #define ctzd(v) ctz64((v))
2332 VGENERIC_DO(ctzb, u8)
2333 VGENERIC_DO(ctzh, u16)
2334 VGENERIC_DO(ctzw, u32)
2335 VGENERIC_DO(ctzd, u64)
2337 #undef ctzb
2338 #undef ctzh
2339 #undef ctzw
2340 #undef ctzd
2342 #define popcntb(v) ctpop8(v)
2343 #define popcnth(v) ctpop16(v)
2344 #define popcntw(v) ctpop32(v)
2345 #define popcntd(v) ctpop64(v)
2347 VGENERIC_DO(popcntb, u8)
2348 VGENERIC_DO(popcnth, u16)
2349 VGENERIC_DO(popcntw, u32)
2350 VGENERIC_DO(popcntd, u64)
2352 #undef popcntb
2353 #undef popcnth
2354 #undef popcntw
2355 #undef popcntd
2357 #undef VGENERIC_DO
2359 #if defined(HOST_WORDS_BIGENDIAN)
2360 #define QW_ONE { .u64 = { 0, 1 } }
2361 #else
2362 #define QW_ONE { .u64 = { 1, 0 } }
2363 #endif
2365 #ifndef CONFIG_INT128
2367 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2369 t->u64[0] = ~a.u64[0];
2370 t->u64[1] = ~a.u64[1];
2373 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2375 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2376 return -1;
2377 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2378 return 1;
2379 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2380 return -1;
2381 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2382 return 1;
2383 } else {
2384 return 0;
2388 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2390 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2391 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2392 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2395 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2397 ppc_avr_t not_a;
2398 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2399 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2400 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2401 avr_qw_not(&not_a, a);
2402 return avr_qw_cmpu(not_a, b) < 0;
2405 #endif
2407 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2409 #ifdef CONFIG_INT128
2410 r->u128 = a->u128 + b->u128;
2411 #else
2412 avr_qw_add(r, *a, *b);
2413 #endif
2416 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2418 #ifdef CONFIG_INT128
2419 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2420 #else
2422 if (c->u64[LO_IDX] & 1) {
2423 ppc_avr_t tmp;
2425 tmp.u64[HI_IDX] = 0;
2426 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2427 avr_qw_add(&tmp, *a, tmp);
2428 avr_qw_add(r, tmp, *b);
2429 } else {
2430 avr_qw_add(r, *a, *b);
2432 #endif
2435 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2437 #ifdef CONFIG_INT128
2438 r->u128 = (~a->u128 < b->u128);
2439 #else
2440 ppc_avr_t not_a;
2442 avr_qw_not(&not_a, *a);
2444 r->u64[HI_IDX] = 0;
2445 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2446 #endif
2449 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2451 #ifdef CONFIG_INT128
2452 int carry_out = (~a->u128 < b->u128);
2453 if (!carry_out && (c->u128 & 1)) {
2454 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2455 ((a->u128 != 0) || (b->u128 != 0));
2457 r->u128 = carry_out;
2458 #else
2460 int carry_in = c->u64[LO_IDX] & 1;
2461 int carry_out = 0;
2462 ppc_avr_t tmp;
2464 carry_out = avr_qw_addc(&tmp, *a, *b);
2466 if (!carry_out && carry_in) {
2467 ppc_avr_t one = QW_ONE;
2468 carry_out = avr_qw_addc(&tmp, tmp, one);
2470 r->u64[HI_IDX] = 0;
2471 r->u64[LO_IDX] = carry_out;
2472 #endif
2475 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2477 #ifdef CONFIG_INT128
2478 r->u128 = a->u128 - b->u128;
2479 #else
2480 ppc_avr_t tmp;
2481 ppc_avr_t one = QW_ONE;
2483 avr_qw_not(&tmp, *b);
2484 avr_qw_add(&tmp, *a, tmp);
2485 avr_qw_add(r, tmp, one);
2486 #endif
2489 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2491 #ifdef CONFIG_INT128
2492 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2493 #else
2494 ppc_avr_t tmp, sum;
2496 avr_qw_not(&tmp, *b);
2497 avr_qw_add(&sum, *a, tmp);
2499 tmp.u64[HI_IDX] = 0;
2500 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2501 avr_qw_add(r, sum, tmp);
2502 #endif
2505 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2507 #ifdef CONFIG_INT128
2508 r->u128 = (~a->u128 < ~b->u128) ||
2509 (a->u128 + ~b->u128 == (__uint128_t)-1);
2510 #else
2511 int carry = (avr_qw_cmpu(*a, *b) > 0);
2512 if (!carry) {
2513 ppc_avr_t tmp;
2514 avr_qw_not(&tmp, *b);
2515 avr_qw_add(&tmp, *a, tmp);
2516 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2518 r->u64[HI_IDX] = 0;
2519 r->u64[LO_IDX] = carry;
2520 #endif
2523 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2525 #ifdef CONFIG_INT128
2526 r->u128 =
2527 (~a->u128 < ~b->u128) ||
2528 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2529 #else
2530 int carry_in = c->u64[LO_IDX] & 1;
2531 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2532 if (!carry_out && carry_in) {
2533 ppc_avr_t tmp;
2534 avr_qw_not(&tmp, *b);
2535 avr_qw_add(&tmp, *a, tmp);
2536 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2539 r->u64[HI_IDX] = 0;
2540 r->u64[LO_IDX] = carry_out;
2541 #endif
2544 #define BCD_PLUS_PREF_1 0xC
2545 #define BCD_PLUS_PREF_2 0xF
2546 #define BCD_PLUS_ALT_1 0xA
2547 #define BCD_NEG_PREF 0xD
2548 #define BCD_NEG_ALT 0xB
2549 #define BCD_PLUS_ALT_2 0xE
2550 #define NATIONAL_PLUS 0x2B
2551 #define NATIONAL_NEG 0x2D
2553 #if defined(HOST_WORDS_BIGENDIAN)
2554 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2555 #else
2556 #define BCD_DIG_BYTE(n) ((n) / 2)
2557 #endif
2559 static int bcd_get_sgn(ppc_avr_t *bcd)
2561 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2562 case BCD_PLUS_PREF_1:
2563 case BCD_PLUS_PREF_2:
2564 case BCD_PLUS_ALT_1:
2565 case BCD_PLUS_ALT_2:
2567 return 1;
2570 case BCD_NEG_PREF:
2571 case BCD_NEG_ALT:
2573 return -1;
2576 default:
2578 return 0;
2583 static int bcd_preferred_sgn(int sgn, int ps)
2585 if (sgn >= 0) {
2586 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2587 } else {
2588 return BCD_NEG_PREF;
2592 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2594 uint8_t result;
2595 if (n & 1) {
2596 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2597 } else {
2598 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2601 if (unlikely(result > 9)) {
2602 *invalid = true;
2604 return result;
2607 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2609 if (n & 1) {
2610 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2611 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2612 } else {
2613 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2614 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2618 static bool bcd_is_valid(ppc_avr_t *bcd)
2620 int i;
2621 int invalid = 0;
2623 if (bcd_get_sgn(bcd) == 0) {
2624 return false;
2627 for (i = 1; i < 32; i++) {
2628 bcd_get_digit(bcd, i, &invalid);
2629 if (unlikely(invalid)) {
2630 return false;
2633 return true;
2636 static int bcd_cmp_zero(ppc_avr_t *bcd)
2638 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2639 return CRF_EQ;
2640 } else {
2641 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2645 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2647 #if defined(HOST_WORDS_BIGENDIAN)
2648 return reg->u16[7 - n];
2649 #else
2650 return reg->u16[n];
2651 #endif
2654 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2656 #if defined(HOST_WORDS_BIGENDIAN)
2657 reg->u16[7 - n] = val;
2658 #else
2659 reg->u16[n] = val;
2660 #endif
2663 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2665 int i;
2666 int invalid = 0;
2667 for (i = 31; i > 0; i--) {
2668 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2669 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2670 if (unlikely(invalid)) {
2671 return 0; /* doesn't matter */
2672 } else if (dig_a > dig_b) {
2673 return 1;
2674 } else if (dig_a < dig_b) {
2675 return -1;
2679 return 0;
2682 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2683 int *overflow)
2685 int carry = 0;
2686 int i;
2687 int is_zero = 1;
2688 for (i = 1; i <= 31; i++) {
2689 uint8_t digit = bcd_get_digit(a, i, invalid) +
2690 bcd_get_digit(b, i, invalid) + carry;
2691 is_zero &= (digit == 0);
2692 if (digit > 9) {
2693 carry = 1;
2694 digit -= 10;
2695 } else {
2696 carry = 0;
2699 bcd_put_digit(t, digit, i);
2701 if (unlikely(*invalid)) {
2702 return -1;
2706 *overflow = carry;
2707 return is_zero;
2710 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2711 int *overflow)
2713 int carry = 0;
2714 int i;
2715 int is_zero = 1;
2716 for (i = 1; i <= 31; i++) {
2717 uint8_t digit = bcd_get_digit(a, i, invalid) -
2718 bcd_get_digit(b, i, invalid) + carry;
2719 is_zero &= (digit == 0);
2720 if (digit & 0x80) {
2721 carry = -1;
2722 digit += 10;
2723 } else {
2724 carry = 0;
2727 bcd_put_digit(t, digit, i);
2729 if (unlikely(*invalid)) {
2730 return -1;
2734 *overflow = carry;
2735 return is_zero;
2738 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2741 int sgna = bcd_get_sgn(a);
2742 int sgnb = bcd_get_sgn(b);
2743 int invalid = (sgna == 0) || (sgnb == 0);
2744 int overflow = 0;
2745 int zero = 0;
2746 uint32_t cr = 0;
2747 ppc_avr_t result = { .u64 = { 0, 0 } };
2749 if (!invalid) {
2750 if (sgna == sgnb) {
2751 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2752 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2753 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2754 } else if (bcd_cmp_mag(a, b) > 0) {
2755 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2756 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2757 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2758 } else {
2759 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2760 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2761 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2765 if (unlikely(invalid)) {
2766 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2767 cr = CRF_SO;
2768 } else if (overflow) {
2769 cr |= CRF_SO;
2770 } else if (zero) {
2771 cr = CRF_EQ;
2774 *r = result;
2776 return cr;
2779 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2781 ppc_avr_t bcopy = *b;
2782 int sgnb = bcd_get_sgn(b);
2783 if (sgnb < 0) {
2784 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2785 } else if (sgnb > 0) {
2786 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2788 /* else invalid ... defer to bcdadd code for proper handling */
2790 return helper_bcdadd(r, a, &bcopy, ps);
2793 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2795 int i;
2796 int cr = 0;
2797 uint16_t national = 0;
2798 uint16_t sgnb = get_national_digit(b, 0);
2799 ppc_avr_t ret = { .u64 = { 0, 0 } };
2800 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2802 for (i = 1; i < 8; i++) {
2803 national = get_national_digit(b, i);
2804 if (unlikely(national < 0x30 || national > 0x39)) {
2805 invalid = 1;
2806 break;
2809 bcd_put_digit(&ret, national & 0xf, i);
2812 if (sgnb == NATIONAL_PLUS) {
2813 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2814 } else {
2815 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2818 cr = bcd_cmp_zero(&ret);
2820 if (unlikely(invalid)) {
2821 cr = CRF_SO;
2824 *r = ret;
2826 return cr;
2829 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2831 int i;
2832 int cr = 0;
2833 int sgnb = bcd_get_sgn(b);
2834 int invalid = (sgnb == 0);
2835 ppc_avr_t ret = { .u64 = { 0, 0 } };
2837 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2839 for (i = 1; i < 8; i++) {
2840 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2842 if (unlikely(invalid)) {
2843 break;
2846 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2848 cr = bcd_cmp_zero(b);
2850 if (ox_flag) {
2851 cr |= CRF_SO;
2854 if (unlikely(invalid)) {
2855 cr = CRF_SO;
2858 *r = ret;
2860 return cr;
2863 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2865 int i;
2866 int cr = 0;
2867 int invalid = 0;
2868 int zone_digit = 0;
2869 int zone_lead = ps ? 0xF : 0x3;
2870 int digit = 0;
2871 ppc_avr_t ret = { .u64 = { 0, 0 } };
2872 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2874 if (unlikely((sgnb < 0xA) && ps)) {
2875 invalid = 1;
2878 for (i = 0; i < 16; i++) {
2879 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2880 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2881 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2882 invalid = 1;
2883 break;
2886 bcd_put_digit(&ret, digit, i + 1);
2889 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2890 (!ps && (sgnb & 0x4))) {
2891 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2892 } else {
2893 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2896 cr = bcd_cmp_zero(&ret);
2898 if (unlikely(invalid)) {
2899 cr = CRF_SO;
2902 *r = ret;
2904 return cr;
2907 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2909 int i;
2910 int cr = 0;
2911 uint8_t digit = 0;
2912 int sgnb = bcd_get_sgn(b);
2913 int zone_lead = (ps) ? 0xF0 : 0x30;
2914 int invalid = (sgnb == 0);
2915 ppc_avr_t ret = { .u64 = { 0, 0 } };
2917 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2919 for (i = 0; i < 16; i++) {
2920 digit = bcd_get_digit(b, i + 1, &invalid);
2922 if (unlikely(invalid)) {
2923 break;
2926 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2929 if (ps) {
2930 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2931 } else {
2932 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2935 cr = bcd_cmp_zero(b);
2937 if (ox_flag) {
2938 cr |= CRF_SO;
2941 if (unlikely(invalid)) {
2942 cr = CRF_SO;
2945 *r = ret;
2947 return cr;
2950 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2952 int i;
2953 int cr = 0;
2954 uint64_t lo_value;
2955 uint64_t hi_value;
2956 ppc_avr_t ret = { .u64 = { 0, 0 } };
2958 if (b->s64[HI_IDX] < 0) {
2959 lo_value = -b->s64[LO_IDX];
2960 hi_value = ~b->u64[HI_IDX] + !lo_value;
2961 bcd_put_digit(&ret, 0xD, 0);
2962 } else {
2963 lo_value = b->u64[LO_IDX];
2964 hi_value = b->u64[HI_IDX];
2965 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2968 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2969 lo_value > 9999999999999999ULL) {
2970 cr = CRF_SO;
2973 for (i = 1; i < 16; hi_value /= 10, i++) {
2974 bcd_put_digit(&ret, hi_value % 10, i);
2977 for (; i < 32; lo_value /= 10, i++) {
2978 bcd_put_digit(&ret, lo_value % 10, i);
2981 cr |= bcd_cmp_zero(&ret);
2983 *r = ret;
2985 return cr;
2988 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2990 uint8_t i;
2991 int cr;
2992 uint64_t carry;
2993 uint64_t unused;
2994 uint64_t lo_value;
2995 uint64_t hi_value = 0;
2996 int sgnb = bcd_get_sgn(b);
2997 int invalid = (sgnb == 0);
2999 lo_value = bcd_get_digit(b, 31, &invalid);
3000 for (i = 30; i > 0; i--) {
3001 mulu64(&lo_value, &carry, lo_value, 10ULL);
3002 mulu64(&hi_value, &unused, hi_value, 10ULL);
3003 lo_value += bcd_get_digit(b, i, &invalid);
3004 hi_value += carry;
3006 if (unlikely(invalid)) {
3007 break;
3011 if (sgnb == -1) {
3012 r->s64[LO_IDX] = -lo_value;
3013 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
3014 } else {
3015 r->s64[LO_IDX] = lo_value;
3016 r->s64[HI_IDX] = hi_value;
3019 cr = bcd_cmp_zero(b);
3021 if (unlikely(invalid)) {
3022 cr = CRF_SO;
3025 return cr;
3028 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3030 int i;
3031 int invalid = 0;
3033 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
3034 return CRF_SO;
3037 *r = *a;
3038 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
3040 for (i = 1; i < 32; i++) {
3041 bcd_get_digit(a, i, &invalid);
3042 bcd_get_digit(b, i, &invalid);
3043 if (unlikely(invalid)) {
3044 return CRF_SO;
3048 return bcd_cmp_zero(r);
3051 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
3053 int sgnb = bcd_get_sgn(b);
3055 *r = *b;
3056 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
3058 if (bcd_is_valid(b) == false) {
3059 return CRF_SO;
3062 return bcd_cmp_zero(r);
3065 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3067 int cr;
3068 #if defined(HOST_WORDS_BIGENDIAN)
3069 int i = a->s8[7];
3070 #else
3071 int i = a->s8[8];
3072 #endif
3073 bool ox_flag = false;
3074 int sgnb = bcd_get_sgn(b);
3075 ppc_avr_t ret = *b;
3076 ret.u64[LO_IDX] &= ~0xf;
3078 if (bcd_is_valid(b) == false) {
3079 return CRF_SO;
3082 if (unlikely(i > 31)) {
3083 i = 31;
3084 } else if (unlikely(i < -31)) {
3085 i = -31;
3088 if (i > 0) {
3089 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3090 } else {
3091 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3093 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3095 *r = ret;
3097 cr = bcd_cmp_zero(r);
3098 if (ox_flag) {
3099 cr |= CRF_SO;
3102 return cr;
3105 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3107 int cr;
3108 int i;
3109 int invalid = 0;
3110 bool ox_flag = false;
3111 ppc_avr_t ret = *b;
3113 for (i = 0; i < 32; i++) {
3114 bcd_get_digit(b, i, &invalid);
3116 if (unlikely(invalid)) {
3117 return CRF_SO;
3121 #if defined(HOST_WORDS_BIGENDIAN)
3122 i = a->s8[7];
3123 #else
3124 i = a->s8[8];
3125 #endif
3126 if (i >= 32) {
3127 ox_flag = true;
3128 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3129 } else if (i <= -32) {
3130 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3131 } else if (i > 0) {
3132 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3133 } else {
3134 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3136 *r = ret;
3138 cr = bcd_cmp_zero(r);
3139 if (ox_flag) {
3140 cr |= CRF_SO;
3143 return cr;
3146 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3148 int cr;
3149 int unused = 0;
3150 int invalid = 0;
3151 bool ox_flag = false;
3152 int sgnb = bcd_get_sgn(b);
3153 ppc_avr_t ret = *b;
3154 ret.u64[LO_IDX] &= ~0xf;
3156 #if defined(HOST_WORDS_BIGENDIAN)
3157 int i = a->s8[7];
3158 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3159 #else
3160 int i = a->s8[8];
3161 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3162 #endif
3164 if (bcd_is_valid(b) == false) {
3165 return CRF_SO;
3168 if (unlikely(i > 31)) {
3169 i = 31;
3170 } else if (unlikely(i < -31)) {
3171 i = -31;
3174 if (i > 0) {
3175 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3176 } else {
3177 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3179 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3180 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3183 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3185 cr = bcd_cmp_zero(&ret);
3186 if (ox_flag) {
3187 cr |= CRF_SO;
3189 *r = ret;
3191 return cr;
3194 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3196 uint64_t mask;
3197 uint32_t ox_flag = 0;
3198 #if defined(HOST_WORDS_BIGENDIAN)
3199 int i = a->s16[3] + 1;
3200 #else
3201 int i = a->s16[4] + 1;
3202 #endif
3203 ppc_avr_t ret = *b;
3205 if (bcd_is_valid(b) == false) {
3206 return CRF_SO;
3209 if (i > 16 && i < 32) {
3210 mask = (uint64_t)-1 >> (128 - i * 4);
3211 if (ret.u64[HI_IDX] & ~mask) {
3212 ox_flag = CRF_SO;
3215 ret.u64[HI_IDX] &= mask;
3216 } else if (i >= 0 && i <= 16) {
3217 mask = (uint64_t)-1 >> (64 - i * 4);
3218 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3219 ox_flag = CRF_SO;
3222 ret.u64[LO_IDX] &= mask;
3223 ret.u64[HI_IDX] = 0;
3225 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3226 *r = ret;
3228 return bcd_cmp_zero(&ret) | ox_flag;
3231 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3233 int i;
3234 uint64_t mask;
3235 uint32_t ox_flag = 0;
3236 int invalid = 0;
3237 ppc_avr_t ret = *b;
3239 for (i = 0; i < 32; i++) {
3240 bcd_get_digit(b, i, &invalid);
3242 if (unlikely(invalid)) {
3243 return CRF_SO;
3247 #if defined(HOST_WORDS_BIGENDIAN)
3248 i = a->s16[3];
3249 #else
3250 i = a->s16[4];
3251 #endif
3252 if (i > 16 && i < 33) {
3253 mask = (uint64_t)-1 >> (128 - i * 4);
3254 if (ret.u64[HI_IDX] & ~mask) {
3255 ox_flag = CRF_SO;
3258 ret.u64[HI_IDX] &= mask;
3259 } else if (i > 0 && i <= 16) {
3260 mask = (uint64_t)-1 >> (64 - i * 4);
3261 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3262 ox_flag = CRF_SO;
3265 ret.u64[LO_IDX] &= mask;
3266 ret.u64[HI_IDX] = 0;
3267 } else if (i == 0) {
3268 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
3269 ox_flag = CRF_SO;
3271 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
3274 *r = ret;
3275 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
3276 return ox_flag | CRF_EQ;
3279 return ox_flag | CRF_GT;
3282 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3284 int i;
3285 VECTOR_FOR_INORDER_I(i, u8) {
3286 r->u8[i] = AES_sbox[a->u8[i]];
3290 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3292 ppc_avr_t result;
3293 int i;
3295 VECTOR_FOR_INORDER_I(i, u32) {
3296 result.AVRW(i) = b->AVRW(i) ^
3297 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
3298 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
3299 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
3300 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
3302 *r = result;
3305 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3307 ppc_avr_t result;
3308 int i;
3310 VECTOR_FOR_INORDER_I(i, u8) {
3311 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
3313 *r = result;
3316 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3318 /* This differs from what is written in ISA V2.07. The RTL is */
3319 /* incorrect and will be fixed in V2.07B. */
3320 int i;
3321 ppc_avr_t tmp;
3323 VECTOR_FOR_INORDER_I(i, u8) {
3324 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
3327 VECTOR_FOR_INORDER_I(i, u32) {
3328 r->AVRW(i) =
3329 AES_imc[tmp.AVRB(4*i + 0)][0] ^
3330 AES_imc[tmp.AVRB(4*i + 1)][1] ^
3331 AES_imc[tmp.AVRB(4*i + 2)][2] ^
3332 AES_imc[tmp.AVRB(4*i + 3)][3];
3336 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3338 ppc_avr_t result;
3339 int i;
3341 VECTOR_FOR_INORDER_I(i, u8) {
3342 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
3344 *r = result;
3347 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
3348 #if defined(HOST_WORDS_BIGENDIAN)
3349 #define EL_IDX(i) (i)
3350 #else
3351 #define EL_IDX(i) (3 - (i))
3352 #endif
3354 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3356 int st = (st_six & 0x10) != 0;
3357 int six = st_six & 0xF;
3358 int i;
3360 VECTOR_FOR_INORDER_I(i, u32) {
3361 if (st == 0) {
3362 if ((six & (0x8 >> i)) == 0) {
3363 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3364 ROTRu32(a->u32[EL_IDX(i)], 18) ^
3365 (a->u32[EL_IDX(i)] >> 3);
3366 } else { /* six.bit[i] == 1 */
3367 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3368 ROTRu32(a->u32[EL_IDX(i)], 19) ^
3369 (a->u32[EL_IDX(i)] >> 10);
3371 } else { /* st == 1 */
3372 if ((six & (0x8 >> i)) == 0) {
3373 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3374 ROTRu32(a->u32[EL_IDX(i)], 13) ^
3375 ROTRu32(a->u32[EL_IDX(i)], 22);
3376 } else { /* six.bit[i] == 1 */
3377 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3378 ROTRu32(a->u32[EL_IDX(i)], 11) ^
3379 ROTRu32(a->u32[EL_IDX(i)], 25);
3385 #undef ROTRu32
3386 #undef EL_IDX
3388 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3389 #if defined(HOST_WORDS_BIGENDIAN)
3390 #define EL_IDX(i) (i)
3391 #else
3392 #define EL_IDX(i) (1 - (i))
3393 #endif
3395 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3397 int st = (st_six & 0x10) != 0;
3398 int six = st_six & 0xF;
3399 int i;
3401 VECTOR_FOR_INORDER_I(i, u64) {
3402 if (st == 0) {
3403 if ((six & (0x8 >> (2*i))) == 0) {
3404 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3405 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3406 (a->u64[EL_IDX(i)] >> 7);
3407 } else { /* six.bit[2*i] == 1 */
3408 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3409 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3410 (a->u64[EL_IDX(i)] >> 6);
3412 } else { /* st == 1 */
3413 if ((six & (0x8 >> (2*i))) == 0) {
3414 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3415 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3416 ROTRu64(a->u64[EL_IDX(i)], 39);
3417 } else { /* six.bit[2*i] == 1 */
3418 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3419 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3420 ROTRu64(a->u64[EL_IDX(i)], 41);
3426 #undef ROTRu64
3427 #undef EL_IDX
3429 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3431 ppc_avr_t result;
3432 int i;
3434 VECTOR_FOR_INORDER_I(i, u8) {
3435 int indexA = c->u8[i] >> 4;
3436 int indexB = c->u8[i] & 0xF;
3437 #if defined(HOST_WORDS_BIGENDIAN)
3438 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3439 #else
3440 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3441 #endif
3443 *r = result;
3446 #undef VECTOR_FOR_INORDER_I
3447 #undef HI_IDX
3448 #undef LO_IDX
3450 /*****************************************************************************/
3451 /* SPE extension helpers */
3452 /* Use a table to make this quicker */
3453 static const uint8_t hbrev[16] = {
3454 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3455 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3458 static inline uint8_t byte_reverse(uint8_t val)
3460 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3463 static inline uint32_t word_reverse(uint32_t val)
3465 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3466 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3469 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3470 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3472 uint32_t a, b, d, mask;
3474 mask = UINT32_MAX >> (32 - MASKBITS);
3475 a = arg1 & mask;
3476 b = arg2 & mask;
3477 d = word_reverse(1 + word_reverse(a | ~b));
3478 return (arg1 & ~mask) | (d & b);
3481 uint32_t helper_cntlsw32(uint32_t val)
3483 if (val & 0x80000000) {
3484 return clz32(~val);
3485 } else {
3486 return clz32(val);
3490 uint32_t helper_cntlzw32(uint32_t val)
3492 return clz32(val);
3495 /* 440 specific */
3496 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3497 target_ulong low, uint32_t update_Rc)
3499 target_ulong mask;
3500 int i;
3502 i = 1;
3503 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3504 if ((high & mask) == 0) {
3505 if (update_Rc) {
3506 env->crf[0] = 0x4;
3508 goto done;
3510 i++;
3512 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3513 if ((low & mask) == 0) {
3514 if (update_Rc) {
3515 env->crf[0] = 0x8;
3517 goto done;
3519 i++;
3521 i = 8;
3522 if (update_Rc) {
3523 env->crf[0] = 0x2;
3525 done:
3526 env->xer = (env->xer & ~0x7F) | i;
3527 if (update_Rc) {
3528 env->crf[0] |= xer_so;
3530 return i;