2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/host-utils.h"
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
28 uint64_t helper_mulldo(CPUPPCState
*env
, uint64_t arg1
, uint64_t arg2
)
33 muls64(&tl
, (uint64_t *)&th
, arg1
, arg2
);
34 /* If th != 0 && th != -1, then we had an overflow */
35 if (likely((uint64_t)(th
+ 1) <= 1)) {
38 env
->so
= env
->ov
= 1;
44 target_ulong
helper_divweu(CPUPPCState
*env
, target_ulong ra
, target_ulong rb
,
50 uint64_t dividend
= (uint64_t)ra
<< 32;
51 uint64_t divisor
= (uint32_t)rb
;
53 if (unlikely(divisor
== 0)) {
56 rt
= dividend
/ divisor
;
57 overflow
= rt
> UINT32_MAX
;
60 if (unlikely(overflow
)) {
61 rt
= 0; /* Undefined */
65 if (unlikely(overflow
)) {
66 env
->so
= env
->ov
= 1;
72 return (target_ulong
)rt
;
75 target_ulong
helper_divwe(CPUPPCState
*env
, target_ulong ra
, target_ulong rb
,
81 int64_t dividend
= (int64_t)ra
<< 32;
82 int64_t divisor
= (int64_t)((int32_t)rb
);
84 if (unlikely((divisor
== 0) ||
85 ((divisor
== -1ull) && (dividend
== INT64_MIN
)))) {
88 rt
= dividend
/ divisor
;
89 overflow
= rt
!= (int32_t)rt
;
92 if (unlikely(overflow
)) {
93 rt
= 0; /* Undefined */
97 if (unlikely(overflow
)) {
98 env
->so
= env
->ov
= 1;
104 return (target_ulong
)rt
;
107 #if defined(TARGET_PPC64)
109 uint64_t helper_divdeu(CPUPPCState
*env
, uint64_t ra
, uint64_t rb
, uint32_t oe
)
114 overflow
= divu128(&rt
, &ra
, rb
);
116 if (unlikely(overflow
)) {
117 rt
= 0; /* Undefined */
121 if (unlikely(overflow
)) {
122 env
->so
= env
->ov
= 1;
131 uint64_t helper_divde(CPUPPCState
*env
, uint64_t rau
, uint64_t rbu
, uint32_t oe
)
134 int64_t ra
= (int64_t)rau
;
135 int64_t rb
= (int64_t)rbu
;
136 int overflow
= divs128(&rt
, &ra
, rb
);
138 if (unlikely(overflow
)) {
139 rt
= 0; /* Undefined */
144 if (unlikely(overflow
)) {
145 env
->so
= env
->ov
= 1;
157 target_ulong
helper_cntlzw(target_ulong t
)
162 #if defined(TARGET_PPC64)
163 target_ulong
helper_cntlzd(target_ulong t
)
169 #if defined(TARGET_PPC64)
171 uint64_t helper_bpermd(uint64_t rs
, uint64_t rb
)
176 for (i
= 0; i
< 8; i
++) {
177 int index
= (rs
>> (i
*8)) & 0xFF;
179 if (rb
& (1ull << (63-index
))) {
189 target_ulong
helper_cmpb(target_ulong rs
, target_ulong rb
)
191 target_ulong mask
= 0xff;
195 for (i
= 0; i
< sizeof(target_ulong
); i
++) {
196 if ((rs
& mask
) == (rb
& mask
)) {
204 /* shift right arithmetic helper */
205 target_ulong
helper_sraw(CPUPPCState
*env
, target_ulong value
,
210 if (likely(!(shift
& 0x20))) {
211 if (likely((uint32_t)shift
!= 0)) {
213 ret
= (int32_t)value
>> shift
;
214 if (likely(ret
>= 0 || (value
& ((1 << shift
) - 1)) == 0)) {
220 ret
= (int32_t)value
;
224 ret
= (int32_t)value
>> 31;
225 env
->ca
= (ret
!= 0);
227 return (target_long
)ret
;
230 #if defined(TARGET_PPC64)
231 target_ulong
helper_srad(CPUPPCState
*env
, target_ulong value
,
236 if (likely(!(shift
& 0x40))) {
237 if (likely((uint64_t)shift
!= 0)) {
239 ret
= (int64_t)value
>> shift
;
240 if (likely(ret
>= 0 || (value
& ((1 << shift
) - 1)) == 0)) {
246 ret
= (int64_t)value
;
250 ret
= (int64_t)value
>> 63;
251 env
->ca
= (ret
!= 0);
257 #if defined(TARGET_PPC64)
258 target_ulong
helper_popcntb(target_ulong val
)
260 val
= (val
& 0x5555555555555555ULL
) + ((val
>> 1) &
261 0x5555555555555555ULL
);
262 val
= (val
& 0x3333333333333333ULL
) + ((val
>> 2) &
263 0x3333333333333333ULL
);
264 val
= (val
& 0x0f0f0f0f0f0f0f0fULL
) + ((val
>> 4) &
265 0x0f0f0f0f0f0f0f0fULL
);
269 target_ulong
helper_popcntw(target_ulong val
)
271 val
= (val
& 0x5555555555555555ULL
) + ((val
>> 1) &
272 0x5555555555555555ULL
);
273 val
= (val
& 0x3333333333333333ULL
) + ((val
>> 2) &
274 0x3333333333333333ULL
);
275 val
= (val
& 0x0f0f0f0f0f0f0f0fULL
) + ((val
>> 4) &
276 0x0f0f0f0f0f0f0f0fULL
);
277 val
= (val
& 0x00ff00ff00ff00ffULL
) + ((val
>> 8) &
278 0x00ff00ff00ff00ffULL
);
279 val
= (val
& 0x0000ffff0000ffffULL
) + ((val
>> 16) &
280 0x0000ffff0000ffffULL
);
284 target_ulong
helper_popcntd(target_ulong val
)
289 target_ulong
helper_popcntb(target_ulong val
)
291 val
= (val
& 0x55555555) + ((val
>> 1) & 0x55555555);
292 val
= (val
& 0x33333333) + ((val
>> 2) & 0x33333333);
293 val
= (val
& 0x0f0f0f0f) + ((val
>> 4) & 0x0f0f0f0f);
297 target_ulong
helper_popcntw(target_ulong val
)
299 val
= (val
& 0x55555555) + ((val
>> 1) & 0x55555555);
300 val
= (val
& 0x33333333) + ((val
>> 2) & 0x33333333);
301 val
= (val
& 0x0f0f0f0f) + ((val
>> 4) & 0x0f0f0f0f);
302 val
= (val
& 0x00ff00ff) + ((val
>> 8) & 0x00ff00ff);
303 val
= (val
& 0x0000ffff) + ((val
>> 16) & 0x0000ffff);
308 /*****************************************************************************/
309 /* PowerPC 601 specific instructions (POWER bridge) */
310 target_ulong
helper_div(CPUPPCState
*env
, target_ulong arg1
, target_ulong arg2
)
312 uint64_t tmp
= (uint64_t)arg1
<< 32 | env
->spr
[SPR_MQ
];
314 if (((int32_t)tmp
== INT32_MIN
&& (int32_t)arg2
== (int32_t)-1) ||
315 (int32_t)arg2
== 0) {
316 env
->spr
[SPR_MQ
] = 0;
319 env
->spr
[SPR_MQ
] = tmp
% arg2
;
320 return tmp
/ (int32_t)arg2
;
324 target_ulong
helper_divo(CPUPPCState
*env
, target_ulong arg1
,
327 uint64_t tmp
= (uint64_t)arg1
<< 32 | env
->spr
[SPR_MQ
];
329 if (((int32_t)tmp
== INT32_MIN
&& (int32_t)arg2
== (int32_t)-1) ||
330 (int32_t)arg2
== 0) {
331 env
->so
= env
->ov
= 1;
332 env
->spr
[SPR_MQ
] = 0;
335 env
->spr
[SPR_MQ
] = tmp
% arg2
;
336 tmp
/= (int32_t)arg2
;
337 if ((int32_t)tmp
!= tmp
) {
338 env
->so
= env
->ov
= 1;
346 target_ulong
helper_divs(CPUPPCState
*env
, target_ulong arg1
,
349 if (((int32_t)arg1
== INT32_MIN
&& (int32_t)arg2
== (int32_t)-1) ||
350 (int32_t)arg2
== 0) {
351 env
->spr
[SPR_MQ
] = 0;
354 env
->spr
[SPR_MQ
] = (int32_t)arg1
% (int32_t)arg2
;
355 return (int32_t)arg1
/ (int32_t)arg2
;
359 target_ulong
helper_divso(CPUPPCState
*env
, target_ulong arg1
,
362 if (((int32_t)arg1
== INT32_MIN
&& (int32_t)arg2
== (int32_t)-1) ||
363 (int32_t)arg2
== 0) {
364 env
->so
= env
->ov
= 1;
365 env
->spr
[SPR_MQ
] = 0;
369 env
->spr
[SPR_MQ
] = (int32_t)arg1
% (int32_t)arg2
;
370 return (int32_t)arg1
/ (int32_t)arg2
;
374 /*****************************************************************************/
375 /* 602 specific instructions */
376 /* mfrom is the most crazy instruction ever seen, imho ! */
377 /* Real implementation uses a ROM table. Do the same */
378 /* Extremely decomposed:
380 * return 256 * log10(10 + 1.0) + 0.5
382 #if !defined(CONFIG_USER_ONLY)
383 target_ulong
helper_602_mfrom(target_ulong arg
)
385 if (likely(arg
< 602)) {
386 #include "mfrom_table.c"
387 return mfrom_ROM_table
[arg
];
394 /*****************************************************************************/
395 /* Altivec extension helpers */
396 #if defined(HOST_WORDS_BIGENDIAN)
404 #if defined(HOST_WORDS_BIGENDIAN)
405 #define VECTOR_FOR_INORDER_I(index, element) \
406 for (index = 0; index < ARRAY_SIZE(r->element); index++)
408 #define VECTOR_FOR_INORDER_I(index, element) \
409 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
412 /* Saturating arithmetic helpers. */
413 #define SATCVT(from, to, from_type, to_type, min, max) \
414 static inline to_type cvt##from##to(from_type x, int *sat) \
418 if (x < (from_type)min) { \
421 } else if (x > (from_type)max) { \
429 #define SATCVTU(from, to, from_type, to_type, min, max) \
430 static inline to_type cvt##from##to(from_type x, int *sat) \
434 if (x > (from_type)max) { \
442 SATCVT(sh
, sb
, int16_t, int8_t, INT8_MIN
, INT8_MAX
)
443 SATCVT(sw
, sh
, int32_t, int16_t, INT16_MIN
, INT16_MAX
)
444 SATCVT(sd
, sw
, int64_t, int32_t, INT32_MIN
, INT32_MAX
)
446 SATCVTU(uh
, ub
, uint16_t, uint8_t, 0, UINT8_MAX
)
447 SATCVTU(uw
, uh
, uint32_t, uint16_t, 0, UINT16_MAX
)
448 SATCVTU(ud
, uw
, uint64_t, uint32_t, 0, UINT32_MAX
)
449 SATCVT(sh
, ub
, int16_t, uint8_t, 0, UINT8_MAX
)
450 SATCVT(sw
, uh
, int32_t, uint16_t, 0, UINT16_MAX
)
451 SATCVT(sd
, uw
, int64_t, uint32_t, 0, UINT32_MAX
)
455 void helper_lvsl(ppc_avr_t
*r
, target_ulong sh
)
457 int i
, j
= (sh
& 0xf);
459 VECTOR_FOR_INORDER_I(i
, u8
) {
464 void helper_lvsr(ppc_avr_t
*r
, target_ulong sh
)
466 int i
, j
= 0x10 - (sh
& 0xf);
468 VECTOR_FOR_INORDER_I(i
, u8
) {
473 void helper_mtvscr(CPUPPCState
*env
, ppc_avr_t
*r
)
475 #if defined(HOST_WORDS_BIGENDIAN)
476 env
->vscr
= r
->u32
[3];
478 env
->vscr
= r
->u32
[0];
480 set_flush_to_zero(vscr_nj
, &env
->vec_status
);
483 void helper_vaddcuw(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
487 for (i
= 0; i
< ARRAY_SIZE(r
->u32
); i
++) {
488 r
->u32
[i
] = ~a
->u32
[i
] < b
->u32
[i
];
492 #define VARITH_DO(name, op, element) \
493 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
497 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
498 r->element[i] = a->element[i] op b->element[i]; \
501 #define VARITH(suffix, element) \
502 VARITH_DO(add##suffix, +, element) \
503 VARITH_DO(sub##suffix, -, element)
508 VARITH_DO(muluwm
, *, u32
)
512 #define VARITHFP(suffix, func) \
513 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
518 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
519 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
522 VARITHFP(addfp
, float32_add
)
523 VARITHFP(subfp
, float32_sub
)
524 VARITHFP(minfp
, float32_min
)
525 VARITHFP(maxfp
, float32_max
)
528 #define VARITHFPFMA(suffix, type) \
529 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
530 ppc_avr_t *b, ppc_avr_t *c) \
533 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
534 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
535 type, &env->vec_status); \
538 VARITHFPFMA(maddfp
, 0);
539 VARITHFPFMA(nmsubfp
, float_muladd_negate_result
| float_muladd_negate_c
);
542 #define VARITHSAT_CASE(type, op, cvt, element) \
544 type result = (type)a->element[i] op (type)b->element[i]; \
545 r->element[i] = cvt(result, &sat); \
548 #define VARITHSAT_DO(name, op, optype, cvt, element) \
549 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
556 switch (sizeof(r->element[0])) { \
558 VARITHSAT_CASE(optype, op, cvt, element); \
561 VARITHSAT_CASE(optype, op, cvt, element); \
564 VARITHSAT_CASE(optype, op, cvt, element); \
569 env->vscr |= (1 << VSCR_SAT); \
572 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
573 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
574 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
575 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
576 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
577 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
578 VARITHSAT_SIGNED(b
, s8
, int16_t, cvtshsb
)
579 VARITHSAT_SIGNED(h
, s16
, int32_t, cvtswsh
)
580 VARITHSAT_SIGNED(w
, s32
, int64_t, cvtsdsw
)
581 VARITHSAT_UNSIGNED(b
, u8
, uint16_t, cvtshub
)
582 VARITHSAT_UNSIGNED(h
, u16
, uint32_t, cvtswuh
)
583 VARITHSAT_UNSIGNED(w
, u32
, uint64_t, cvtsduw
)
584 #undef VARITHSAT_CASE
586 #undef VARITHSAT_SIGNED
587 #undef VARITHSAT_UNSIGNED
589 #define VAVG_DO(name, element, etype) \
590 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
595 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
596 r->element[i] = x >> 1; \
600 #define VAVG(type, signed_element, signed_type, unsigned_element, \
602 VAVG_DO(avgs##type, signed_element, signed_type) \
603 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
604 VAVG(b
, s8
, int16_t, u8
, uint16_t)
605 VAVG(h
, s16
, int32_t, u16
, uint32_t)
606 VAVG(w
, s32
, int64_t, u32
, uint64_t)
610 #define VCF(suffix, cvt, element) \
611 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
612 ppc_avr_t *b, uint32_t uim) \
616 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
617 float32 t = cvt(b->element[i], &env->vec_status); \
618 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
621 VCF(ux
, uint32_to_float32
, u32
)
622 VCF(sx
, int32_to_float32
, s32
)
625 #define VCMP_DO(suffix, compare, element, record) \
626 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
627 ppc_avr_t *a, ppc_avr_t *b) \
629 uint64_t ones = (uint64_t)-1; \
630 uint64_t all = ones; \
634 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
635 uint64_t result = (a->element[i] compare b->element[i] ? \
637 switch (sizeof(a->element[0])) { \
639 r->u64[i] = result; \
642 r->u32[i] = result; \
645 r->u16[i] = result; \
655 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
658 #define VCMP(suffix, compare, element) \
659 VCMP_DO(suffix, compare, element, 0) \
660 VCMP_DO(suffix##_dot, compare, element, 1)
676 #define VCMPFP_DO(suffix, compare, order, record) \
677 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
678 ppc_avr_t *a, ppc_avr_t *b) \
680 uint32_t ones = (uint32_t)-1; \
681 uint32_t all = ones; \
685 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
687 int rel = float32_compare_quiet(a->f[i], b->f[i], \
689 if (rel == float_relation_unordered) { \
691 } else if (rel compare order) { \
696 r->u32[i] = result; \
701 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
704 #define VCMPFP(suffix, compare, order) \
705 VCMPFP_DO(suffix, compare, order, 0) \
706 VCMPFP_DO(suffix##_dot, compare, order, 1)
707 VCMPFP(eqfp
, ==, float_relation_equal
)
708 VCMPFP(gefp
, !=, float_relation_less
)
709 VCMPFP(gtfp
, ==, float_relation_greater
)
713 static inline void vcmpbfp_internal(CPUPPCState
*env
, ppc_avr_t
*r
,
714 ppc_avr_t
*a
, ppc_avr_t
*b
, int record
)
719 for (i
= 0; i
< ARRAY_SIZE(r
->f
); i
++) {
720 int le_rel
= float32_compare_quiet(a
->f
[i
], b
->f
[i
], &env
->vec_status
);
721 if (le_rel
== float_relation_unordered
) {
722 r
->u32
[i
] = 0xc0000000;
723 /* ALL_IN does not need to be updated here. */
725 float32 bneg
= float32_chs(b
->f
[i
]);
726 int ge_rel
= float32_compare_quiet(a
->f
[i
], bneg
, &env
->vec_status
);
727 int le
= le_rel
!= float_relation_greater
;
728 int ge
= ge_rel
!= float_relation_less
;
730 r
->u32
[i
] = ((!le
) << 31) | ((!ge
) << 30);
731 all_in
|= (!le
| !ge
);
735 env
->crf
[6] = (all_in
== 0) << 1;
739 void helper_vcmpbfp(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
741 vcmpbfp_internal(env
, r
, a
, b
, 0);
744 void helper_vcmpbfp_dot(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
747 vcmpbfp_internal(env
, r
, a
, b
, 1);
750 #define VCT(suffix, satcvt, element) \
751 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
752 ppc_avr_t *b, uint32_t uim) \
756 float_status s = env->vec_status; \
758 set_float_rounding_mode(float_round_to_zero, &s); \
759 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
760 if (float32_is_any_nan(b->f[i])) { \
763 float64 t = float32_to_float64(b->f[i], &s); \
766 t = float64_scalbn(t, uim, &s); \
767 j = float64_to_int64(t, &s); \
768 r->element[i] = satcvt(j, &sat); \
772 env->vscr |= (1 << VSCR_SAT); \
775 VCT(uxs
, cvtsduw
, u32
)
776 VCT(sxs
, cvtsdsw
, s32
)
779 void helper_vmhaddshs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
780 ppc_avr_t
*b
, ppc_avr_t
*c
)
785 for (i
= 0; i
< ARRAY_SIZE(r
->s16
); i
++) {
786 int32_t prod
= a
->s16
[i
] * b
->s16
[i
];
787 int32_t t
= (int32_t)c
->s16
[i
] + (prod
>> 15);
789 r
->s16
[i
] = cvtswsh(t
, &sat
);
793 env
->vscr
|= (1 << VSCR_SAT
);
797 void helper_vmhraddshs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
798 ppc_avr_t
*b
, ppc_avr_t
*c
)
803 for (i
= 0; i
< ARRAY_SIZE(r
->s16
); i
++) {
804 int32_t prod
= a
->s16
[i
] * b
->s16
[i
] + 0x00004000;
805 int32_t t
= (int32_t)c
->s16
[i
] + (prod
>> 15);
806 r
->s16
[i
] = cvtswsh(t
, &sat
);
810 env
->vscr
|= (1 << VSCR_SAT
);
814 #define VMINMAX_DO(name, compare, element) \
815 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
819 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
820 if (a->element[i] compare b->element[i]) { \
821 r->element[i] = b->element[i]; \
823 r->element[i] = a->element[i]; \
827 #define VMINMAX(suffix, element) \
828 VMINMAX_DO(min##suffix, >, element) \
829 VMINMAX_DO(max##suffix, <, element)
841 void helper_vmladduhm(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, ppc_avr_t
*c
)
845 for (i
= 0; i
< ARRAY_SIZE(r
->s16
); i
++) {
846 int32_t prod
= a
->s16
[i
] * b
->s16
[i
];
847 r
->s16
[i
] = (int16_t) (prod
+ c
->s16
[i
]);
851 #define VMRG_DO(name, element, highp) \
852 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
856 size_t n_elems = ARRAY_SIZE(r->element); \
858 for (i = 0; i < n_elems / 2; i++) { \
860 result.element[i*2+HI_IDX] = a->element[i]; \
861 result.element[i*2+LO_IDX] = b->element[i]; \
863 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
864 b->element[n_elems - i - 1]; \
865 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
866 a->element[n_elems - i - 1]; \
871 #if defined(HOST_WORDS_BIGENDIAN)
878 #define VMRG(suffix, element) \
879 VMRG_DO(mrgl##suffix, element, MRGHI) \
880 VMRG_DO(mrgh##suffix, element, MRGLO)
889 void helper_vmsummbm(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
890 ppc_avr_t
*b
, ppc_avr_t
*c
)
895 for (i
= 0; i
< ARRAY_SIZE(r
->s8
); i
++) {
896 prod
[i
] = (int32_t)a
->s8
[i
] * b
->u8
[i
];
899 VECTOR_FOR_INORDER_I(i
, s32
) {
900 r
->s32
[i
] = c
->s32
[i
] + prod
[4 * i
] + prod
[4 * i
+ 1] +
901 prod
[4 * i
+ 2] + prod
[4 * i
+ 3];
905 void helper_vmsumshm(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
906 ppc_avr_t
*b
, ppc_avr_t
*c
)
911 for (i
= 0; i
< ARRAY_SIZE(r
->s16
); i
++) {
912 prod
[i
] = a
->s16
[i
] * b
->s16
[i
];
915 VECTOR_FOR_INORDER_I(i
, s32
) {
916 r
->s32
[i
] = c
->s32
[i
] + prod
[2 * i
] + prod
[2 * i
+ 1];
920 void helper_vmsumshs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
921 ppc_avr_t
*b
, ppc_avr_t
*c
)
927 for (i
= 0; i
< ARRAY_SIZE(r
->s16
); i
++) {
928 prod
[i
] = (int32_t)a
->s16
[i
] * b
->s16
[i
];
931 VECTOR_FOR_INORDER_I(i
, s32
) {
932 int64_t t
= (int64_t)c
->s32
[i
] + prod
[2 * i
] + prod
[2 * i
+ 1];
934 r
->u32
[i
] = cvtsdsw(t
, &sat
);
938 env
->vscr
|= (1 << VSCR_SAT
);
942 void helper_vmsumubm(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
943 ppc_avr_t
*b
, ppc_avr_t
*c
)
948 for (i
= 0; i
< ARRAY_SIZE(r
->u8
); i
++) {
949 prod
[i
] = a
->u8
[i
] * b
->u8
[i
];
952 VECTOR_FOR_INORDER_I(i
, u32
) {
953 r
->u32
[i
] = c
->u32
[i
] + prod
[4 * i
] + prod
[4 * i
+ 1] +
954 prod
[4 * i
+ 2] + prod
[4 * i
+ 3];
958 void helper_vmsumuhm(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
959 ppc_avr_t
*b
, ppc_avr_t
*c
)
964 for (i
= 0; i
< ARRAY_SIZE(r
->u16
); i
++) {
965 prod
[i
] = a
->u16
[i
] * b
->u16
[i
];
968 VECTOR_FOR_INORDER_I(i
, u32
) {
969 r
->u32
[i
] = c
->u32
[i
] + prod
[2 * i
] + prod
[2 * i
+ 1];
973 void helper_vmsumuhs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
,
974 ppc_avr_t
*b
, ppc_avr_t
*c
)
980 for (i
= 0; i
< ARRAY_SIZE(r
->u16
); i
++) {
981 prod
[i
] = a
->u16
[i
] * b
->u16
[i
];
984 VECTOR_FOR_INORDER_I(i
, s32
) {
985 uint64_t t
= (uint64_t)c
->u32
[i
] + prod
[2 * i
] + prod
[2 * i
+ 1];
987 r
->u32
[i
] = cvtuduw(t
, &sat
);
991 env
->vscr
|= (1 << VSCR_SAT
);
995 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
996 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1000 VECTOR_FOR_INORDER_I(i, prod_element) { \
1002 r->prod_element[i] = \
1003 (cast)a->mul_element[i * 2 + HI_IDX] * \
1004 (cast)b->mul_element[i * 2 + HI_IDX]; \
1006 r->prod_element[i] = \
1007 (cast)a->mul_element[i * 2 + LO_IDX] * \
1008 (cast)b->mul_element[i * 2 + LO_IDX]; \
1012 #define VMUL(suffix, mul_element, prod_element, cast) \
1013 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1014 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1015 VMUL(sb
, s8
, s16
, int16_t)
1016 VMUL(sh
, s16
, s32
, int32_t)
1017 VMUL(sw
, s32
, s64
, int64_t)
1018 VMUL(ub
, u8
, u16
, uint16_t)
1019 VMUL(uh
, u16
, u32
, uint32_t)
1020 VMUL(uw
, u32
, u64
, uint64_t)
1024 void helper_vperm(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
,
1030 VECTOR_FOR_INORDER_I(i
, u8
) {
1031 int s
= c
->u8
[i
] & 0x1f;
1032 #if defined(HOST_WORDS_BIGENDIAN)
1033 int index
= s
& 0xf;
1035 int index
= 15 - (s
& 0xf);
1039 result
.u8
[i
] = b
->u8
[index
];
1041 result
.u8
[i
] = a
->u8
[index
];
1047 #if defined(HOST_WORDS_BIGENDIAN)
1048 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1049 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1051 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1052 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1055 void helper_vbpermq(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1060 VECTOR_FOR_INORDER_I(i
, u8
) {
1061 int index
= VBPERMQ_INDEX(b
, i
);
1064 uint64_t mask
= (1ull << (63-(index
& 0x3F)));
1065 if (a
->u64
[VBPERMQ_DW(index
)] & mask
) {
1066 perm
|= (0x8000 >> i
);
1071 r
->u64
[HI_IDX
] = perm
;
1075 #undef VBPERMQ_INDEX
1078 static const uint64_t VGBBD_MASKS
[256] = {
1079 0x0000000000000000ull
, /* 00 */
1080 0x0000000000000080ull
, /* 01 */
1081 0x0000000000008000ull
, /* 02 */
1082 0x0000000000008080ull
, /* 03 */
1083 0x0000000000800000ull
, /* 04 */
1084 0x0000000000800080ull
, /* 05 */
1085 0x0000000000808000ull
, /* 06 */
1086 0x0000000000808080ull
, /* 07 */
1087 0x0000000080000000ull
, /* 08 */
1088 0x0000000080000080ull
, /* 09 */
1089 0x0000000080008000ull
, /* 0A */
1090 0x0000000080008080ull
, /* 0B */
1091 0x0000000080800000ull
, /* 0C */
1092 0x0000000080800080ull
, /* 0D */
1093 0x0000000080808000ull
, /* 0E */
1094 0x0000000080808080ull
, /* 0F */
1095 0x0000008000000000ull
, /* 10 */
1096 0x0000008000000080ull
, /* 11 */
1097 0x0000008000008000ull
, /* 12 */
1098 0x0000008000008080ull
, /* 13 */
1099 0x0000008000800000ull
, /* 14 */
1100 0x0000008000800080ull
, /* 15 */
1101 0x0000008000808000ull
, /* 16 */
1102 0x0000008000808080ull
, /* 17 */
1103 0x0000008080000000ull
, /* 18 */
1104 0x0000008080000080ull
, /* 19 */
1105 0x0000008080008000ull
, /* 1A */
1106 0x0000008080008080ull
, /* 1B */
1107 0x0000008080800000ull
, /* 1C */
1108 0x0000008080800080ull
, /* 1D */
1109 0x0000008080808000ull
, /* 1E */
1110 0x0000008080808080ull
, /* 1F */
1111 0x0000800000000000ull
, /* 20 */
1112 0x0000800000000080ull
, /* 21 */
1113 0x0000800000008000ull
, /* 22 */
1114 0x0000800000008080ull
, /* 23 */
1115 0x0000800000800000ull
, /* 24 */
1116 0x0000800000800080ull
, /* 25 */
1117 0x0000800000808000ull
, /* 26 */
1118 0x0000800000808080ull
, /* 27 */
1119 0x0000800080000000ull
, /* 28 */
1120 0x0000800080000080ull
, /* 29 */
1121 0x0000800080008000ull
, /* 2A */
1122 0x0000800080008080ull
, /* 2B */
1123 0x0000800080800000ull
, /* 2C */
1124 0x0000800080800080ull
, /* 2D */
1125 0x0000800080808000ull
, /* 2E */
1126 0x0000800080808080ull
, /* 2F */
1127 0x0000808000000000ull
, /* 30 */
1128 0x0000808000000080ull
, /* 31 */
1129 0x0000808000008000ull
, /* 32 */
1130 0x0000808000008080ull
, /* 33 */
1131 0x0000808000800000ull
, /* 34 */
1132 0x0000808000800080ull
, /* 35 */
1133 0x0000808000808000ull
, /* 36 */
1134 0x0000808000808080ull
, /* 37 */
1135 0x0000808080000000ull
, /* 38 */
1136 0x0000808080000080ull
, /* 39 */
1137 0x0000808080008000ull
, /* 3A */
1138 0x0000808080008080ull
, /* 3B */
1139 0x0000808080800000ull
, /* 3C */
1140 0x0000808080800080ull
, /* 3D */
1141 0x0000808080808000ull
, /* 3E */
1142 0x0000808080808080ull
, /* 3F */
1143 0x0080000000000000ull
, /* 40 */
1144 0x0080000000000080ull
, /* 41 */
1145 0x0080000000008000ull
, /* 42 */
1146 0x0080000000008080ull
, /* 43 */
1147 0x0080000000800000ull
, /* 44 */
1148 0x0080000000800080ull
, /* 45 */
1149 0x0080000000808000ull
, /* 46 */
1150 0x0080000000808080ull
, /* 47 */
1151 0x0080000080000000ull
, /* 48 */
1152 0x0080000080000080ull
, /* 49 */
1153 0x0080000080008000ull
, /* 4A */
1154 0x0080000080008080ull
, /* 4B */
1155 0x0080000080800000ull
, /* 4C */
1156 0x0080000080800080ull
, /* 4D */
1157 0x0080000080808000ull
, /* 4E */
1158 0x0080000080808080ull
, /* 4F */
1159 0x0080008000000000ull
, /* 50 */
1160 0x0080008000000080ull
, /* 51 */
1161 0x0080008000008000ull
, /* 52 */
1162 0x0080008000008080ull
, /* 53 */
1163 0x0080008000800000ull
, /* 54 */
1164 0x0080008000800080ull
, /* 55 */
1165 0x0080008000808000ull
, /* 56 */
1166 0x0080008000808080ull
, /* 57 */
1167 0x0080008080000000ull
, /* 58 */
1168 0x0080008080000080ull
, /* 59 */
1169 0x0080008080008000ull
, /* 5A */
1170 0x0080008080008080ull
, /* 5B */
1171 0x0080008080800000ull
, /* 5C */
1172 0x0080008080800080ull
, /* 5D */
1173 0x0080008080808000ull
, /* 5E */
1174 0x0080008080808080ull
, /* 5F */
1175 0x0080800000000000ull
, /* 60 */
1176 0x0080800000000080ull
, /* 61 */
1177 0x0080800000008000ull
, /* 62 */
1178 0x0080800000008080ull
, /* 63 */
1179 0x0080800000800000ull
, /* 64 */
1180 0x0080800000800080ull
, /* 65 */
1181 0x0080800000808000ull
, /* 66 */
1182 0x0080800000808080ull
, /* 67 */
1183 0x0080800080000000ull
, /* 68 */
1184 0x0080800080000080ull
, /* 69 */
1185 0x0080800080008000ull
, /* 6A */
1186 0x0080800080008080ull
, /* 6B */
1187 0x0080800080800000ull
, /* 6C */
1188 0x0080800080800080ull
, /* 6D */
1189 0x0080800080808000ull
, /* 6E */
1190 0x0080800080808080ull
, /* 6F */
1191 0x0080808000000000ull
, /* 70 */
1192 0x0080808000000080ull
, /* 71 */
1193 0x0080808000008000ull
, /* 72 */
1194 0x0080808000008080ull
, /* 73 */
1195 0x0080808000800000ull
, /* 74 */
1196 0x0080808000800080ull
, /* 75 */
1197 0x0080808000808000ull
, /* 76 */
1198 0x0080808000808080ull
, /* 77 */
1199 0x0080808080000000ull
, /* 78 */
1200 0x0080808080000080ull
, /* 79 */
1201 0x0080808080008000ull
, /* 7A */
1202 0x0080808080008080ull
, /* 7B */
1203 0x0080808080800000ull
, /* 7C */
1204 0x0080808080800080ull
, /* 7D */
1205 0x0080808080808000ull
, /* 7E */
1206 0x0080808080808080ull
, /* 7F */
1207 0x8000000000000000ull
, /* 80 */
1208 0x8000000000000080ull
, /* 81 */
1209 0x8000000000008000ull
, /* 82 */
1210 0x8000000000008080ull
, /* 83 */
1211 0x8000000000800000ull
, /* 84 */
1212 0x8000000000800080ull
, /* 85 */
1213 0x8000000000808000ull
, /* 86 */
1214 0x8000000000808080ull
, /* 87 */
1215 0x8000000080000000ull
, /* 88 */
1216 0x8000000080000080ull
, /* 89 */
1217 0x8000000080008000ull
, /* 8A */
1218 0x8000000080008080ull
, /* 8B */
1219 0x8000000080800000ull
, /* 8C */
1220 0x8000000080800080ull
, /* 8D */
1221 0x8000000080808000ull
, /* 8E */
1222 0x8000000080808080ull
, /* 8F */
1223 0x8000008000000000ull
, /* 90 */
1224 0x8000008000000080ull
, /* 91 */
1225 0x8000008000008000ull
, /* 92 */
1226 0x8000008000008080ull
, /* 93 */
1227 0x8000008000800000ull
, /* 94 */
1228 0x8000008000800080ull
, /* 95 */
1229 0x8000008000808000ull
, /* 96 */
1230 0x8000008000808080ull
, /* 97 */
1231 0x8000008080000000ull
, /* 98 */
1232 0x8000008080000080ull
, /* 99 */
1233 0x8000008080008000ull
, /* 9A */
1234 0x8000008080008080ull
, /* 9B */
1235 0x8000008080800000ull
, /* 9C */
1236 0x8000008080800080ull
, /* 9D */
1237 0x8000008080808000ull
, /* 9E */
1238 0x8000008080808080ull
, /* 9F */
1239 0x8000800000000000ull
, /* A0 */
1240 0x8000800000000080ull
, /* A1 */
1241 0x8000800000008000ull
, /* A2 */
1242 0x8000800000008080ull
, /* A3 */
1243 0x8000800000800000ull
, /* A4 */
1244 0x8000800000800080ull
, /* A5 */
1245 0x8000800000808000ull
, /* A6 */
1246 0x8000800000808080ull
, /* A7 */
1247 0x8000800080000000ull
, /* A8 */
1248 0x8000800080000080ull
, /* A9 */
1249 0x8000800080008000ull
, /* AA */
1250 0x8000800080008080ull
, /* AB */
1251 0x8000800080800000ull
, /* AC */
1252 0x8000800080800080ull
, /* AD */
1253 0x8000800080808000ull
, /* AE */
1254 0x8000800080808080ull
, /* AF */
1255 0x8000808000000000ull
, /* B0 */
1256 0x8000808000000080ull
, /* B1 */
1257 0x8000808000008000ull
, /* B2 */
1258 0x8000808000008080ull
, /* B3 */
1259 0x8000808000800000ull
, /* B4 */
1260 0x8000808000800080ull
, /* B5 */
1261 0x8000808000808000ull
, /* B6 */
1262 0x8000808000808080ull
, /* B7 */
1263 0x8000808080000000ull
, /* B8 */
1264 0x8000808080000080ull
, /* B9 */
1265 0x8000808080008000ull
, /* BA */
1266 0x8000808080008080ull
, /* BB */
1267 0x8000808080800000ull
, /* BC */
1268 0x8000808080800080ull
, /* BD */
1269 0x8000808080808000ull
, /* BE */
1270 0x8000808080808080ull
, /* BF */
1271 0x8080000000000000ull
, /* C0 */
1272 0x8080000000000080ull
, /* C1 */
1273 0x8080000000008000ull
, /* C2 */
1274 0x8080000000008080ull
, /* C3 */
1275 0x8080000000800000ull
, /* C4 */
1276 0x8080000000800080ull
, /* C5 */
1277 0x8080000000808000ull
, /* C6 */
1278 0x8080000000808080ull
, /* C7 */
1279 0x8080000080000000ull
, /* C8 */
1280 0x8080000080000080ull
, /* C9 */
1281 0x8080000080008000ull
, /* CA */
1282 0x8080000080008080ull
, /* CB */
1283 0x8080000080800000ull
, /* CC */
1284 0x8080000080800080ull
, /* CD */
1285 0x8080000080808000ull
, /* CE */
1286 0x8080000080808080ull
, /* CF */
1287 0x8080008000000000ull
, /* D0 */
1288 0x8080008000000080ull
, /* D1 */
1289 0x8080008000008000ull
, /* D2 */
1290 0x8080008000008080ull
, /* D3 */
1291 0x8080008000800000ull
, /* D4 */
1292 0x8080008000800080ull
, /* D5 */
1293 0x8080008000808000ull
, /* D6 */
1294 0x8080008000808080ull
, /* D7 */
1295 0x8080008080000000ull
, /* D8 */
1296 0x8080008080000080ull
, /* D9 */
1297 0x8080008080008000ull
, /* DA */
1298 0x8080008080008080ull
, /* DB */
1299 0x8080008080800000ull
, /* DC */
1300 0x8080008080800080ull
, /* DD */
1301 0x8080008080808000ull
, /* DE */
1302 0x8080008080808080ull
, /* DF */
1303 0x8080800000000000ull
, /* E0 */
1304 0x8080800000000080ull
, /* E1 */
1305 0x8080800000008000ull
, /* E2 */
1306 0x8080800000008080ull
, /* E3 */
1307 0x8080800000800000ull
, /* E4 */
1308 0x8080800000800080ull
, /* E5 */
1309 0x8080800000808000ull
, /* E6 */
1310 0x8080800000808080ull
, /* E7 */
1311 0x8080800080000000ull
, /* E8 */
1312 0x8080800080000080ull
, /* E9 */
1313 0x8080800080008000ull
, /* EA */
1314 0x8080800080008080ull
, /* EB */
1315 0x8080800080800000ull
, /* EC */
1316 0x8080800080800080ull
, /* ED */
1317 0x8080800080808000ull
, /* EE */
1318 0x8080800080808080ull
, /* EF */
1319 0x8080808000000000ull
, /* F0 */
1320 0x8080808000000080ull
, /* F1 */
1321 0x8080808000008000ull
, /* F2 */
1322 0x8080808000008080ull
, /* F3 */
1323 0x8080808000800000ull
, /* F4 */
1324 0x8080808000800080ull
, /* F5 */
1325 0x8080808000808000ull
, /* F6 */
1326 0x8080808000808080ull
, /* F7 */
1327 0x8080808080000000ull
, /* F8 */
1328 0x8080808080000080ull
, /* F9 */
1329 0x8080808080008000ull
, /* FA */
1330 0x8080808080008080ull
, /* FB */
1331 0x8080808080800000ull
, /* FC */
1332 0x8080808080800080ull
, /* FD */
1333 0x8080808080808000ull
, /* FE */
1334 0x8080808080808080ull
, /* FF */
1337 void helper_vgbbd(ppc_avr_t
*r
, ppc_avr_t
*b
)
1340 uint64_t t
[2] = { 0, 0 };
1342 VECTOR_FOR_INORDER_I(i
, u8
) {
1343 #if defined(HOST_WORDS_BIGENDIAN)
1344 t
[i
>>3] |= VGBBD_MASKS
[b
->u8
[i
]] >> (i
& 7);
1346 t
[i
>>3] |= VGBBD_MASKS
[b
->u8
[i
]] >> (7-(i
& 7));
1354 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1355 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1358 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1360 VECTOR_FOR_INORDER_I(i, srcfld) { \
1362 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1363 if (a->srcfld[i] & (1ull<<j)) { \
1364 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1369 VECTOR_FOR_INORDER_I(i, trgfld) { \
1370 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1374 PMSUM(vpmsumb
, u8
, u16
, uint16_t)
1375 PMSUM(vpmsumh
, u16
, u32
, uint32_t)
1376 PMSUM(vpmsumw
, u32
, u64
, uint64_t)
1378 void helper_vpmsumd(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1381 #ifdef CONFIG_INT128
1383 __uint128_t prod
[2];
1385 VECTOR_FOR_INORDER_I(i
, u64
) {
1387 for (j
= 0; j
< 64; j
++) {
1388 if (a
->u64
[i
] & (1ull<<j
)) {
1389 prod
[i
] ^= (((__uint128_t
)b
->u64
[i
]) << j
);
1394 r
->u128
= prod
[0] ^ prod
[1];
1400 VECTOR_FOR_INORDER_I(i
, u64
) {
1401 prod
[i
].u64
[LO_IDX
] = prod
[i
].u64
[HI_IDX
] = 0;
1402 for (j
= 0; j
< 64; j
++) {
1403 if (a
->u64
[i
] & (1ull<<j
)) {
1406 bshift
.u64
[HI_IDX
] = 0;
1407 bshift
.u64
[LO_IDX
] = b
->u64
[i
];
1409 bshift
.u64
[HI_IDX
] = b
->u64
[i
] >> (64-j
);
1410 bshift
.u64
[LO_IDX
] = b
->u64
[i
] << j
;
1412 prod
[i
].u64
[LO_IDX
] ^= bshift
.u64
[LO_IDX
];
1413 prod
[i
].u64
[HI_IDX
] ^= bshift
.u64
[HI_IDX
];
1418 r
->u64
[LO_IDX
] = prod
[0].u64
[LO_IDX
] ^ prod
[1].u64
[LO_IDX
];
1419 r
->u64
[HI_IDX
] = prod
[0].u64
[HI_IDX
] ^ prod
[1].u64
[HI_IDX
];
1424 #if defined(HOST_WORDS_BIGENDIAN)
1429 void helper_vpkpx(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1433 #if defined(HOST_WORDS_BIGENDIAN)
1434 const ppc_avr_t
*x
[2] = { a
, b
};
1436 const ppc_avr_t
*x
[2] = { b
, a
};
1439 VECTOR_FOR_INORDER_I(i
, u64
) {
1440 VECTOR_FOR_INORDER_I(j
, u32
) {
1441 uint32_t e
= x
[i
]->u32
[j
];
1443 result
.u16
[4*i
+j
] = (((e
>> 9) & 0xfc00) |
1444 ((e
>> 6) & 0x3e0) |
1451 #define VPK(suffix, from, to, cvt, dosat) \
1452 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1453 ppc_avr_t *a, ppc_avr_t *b) \
1458 ppc_avr_t *a0 = PKBIG ? a : b; \
1459 ppc_avr_t *a1 = PKBIG ? b : a; \
1461 VECTOR_FOR_INORDER_I(i, from) { \
1462 result.to[i] = cvt(a0->from[i], &sat); \
1463 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1466 if (dosat && sat) { \
1467 env->vscr |= (1 << VSCR_SAT); \
1471 VPK(shss
, s16
, s8
, cvtshsb
, 1)
1472 VPK(shus
, s16
, u8
, cvtshub
, 1)
1473 VPK(swss
, s32
, s16
, cvtswsh
, 1)
1474 VPK(swus
, s32
, u16
, cvtswuh
, 1)
1475 VPK(sdss
, s64
, s32
, cvtsdsw
, 1)
1476 VPK(sdus
, s64
, u32
, cvtsduw
, 1)
1477 VPK(uhus
, u16
, u8
, cvtuhub
, 1)
1478 VPK(uwus
, u32
, u16
, cvtuwuh
, 1)
1479 VPK(udus
, u64
, u32
, cvtuduw
, 1)
1480 VPK(uhum
, u16
, u8
, I
, 0)
1481 VPK(uwum
, u32
, u16
, I
, 0)
1482 VPK(udum
, u64
, u32
, I
, 0)
1487 void helper_vrefp(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*b
)
1491 for (i
= 0; i
< ARRAY_SIZE(r
->f
); i
++) {
1492 r
->f
[i
] = float32_div(float32_one
, b
->f
[i
], &env
->vec_status
);
1496 #define VRFI(suffix, rounding) \
1497 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1501 float_status s = env->vec_status; \
1503 set_float_rounding_mode(rounding, &s); \
1504 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1505 r->f[i] = float32_round_to_int (b->f[i], &s); \
1508 VRFI(n
, float_round_nearest_even
)
1509 VRFI(m
, float_round_down
)
1510 VRFI(p
, float_round_up
)
1511 VRFI(z
, float_round_to_zero
)
1514 #define VROTATE(suffix, element, mask) \
1515 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1519 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1520 unsigned int shift = b->element[i] & mask; \
1521 r->element[i] = (a->element[i] << shift) | \
1522 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1526 VROTATE(h
, u16
, 0xF)
1527 VROTATE(w
, u32
, 0x1F)
1528 VROTATE(d
, u64
, 0x3F)
1531 void helper_vrsqrtefp(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*b
)
1535 for (i
= 0; i
< ARRAY_SIZE(r
->f
); i
++) {
1536 float32 t
= float32_sqrt(b
->f
[i
], &env
->vec_status
);
1538 r
->f
[i
] = float32_div(float32_one
, t
, &env
->vec_status
);
1542 void helper_vsel(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
,
1545 r
->u64
[0] = (a
->u64
[0] & ~c
->u64
[0]) | (b
->u64
[0] & c
->u64
[0]);
1546 r
->u64
[1] = (a
->u64
[1] & ~c
->u64
[1]) | (b
->u64
[1] & c
->u64
[1]);
1549 void helper_vexptefp(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*b
)
1553 for (i
= 0; i
< ARRAY_SIZE(r
->f
); i
++) {
1554 r
->f
[i
] = float32_exp2(b
->f
[i
], &env
->vec_status
);
1558 void helper_vlogefp(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*b
)
1562 for (i
= 0; i
< ARRAY_SIZE(r
->f
); i
++) {
1563 r
->f
[i
] = float32_log2(b
->f
[i
], &env
->vec_status
);
1567 #if defined(HOST_WORDS_BIGENDIAN)
1574 /* The specification says that the results are undefined if all of the
1575 * shift counts are not identical. We check to make sure that they are
1576 * to conform to what real hardware appears to do. */
1577 #define VSHIFT(suffix, leftp) \
1578 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1580 int shift = b->u8[LO_IDX*15] & 0x7; \
1584 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1585 doit = doit && ((b->u8[i] & 0x7) == shift); \
1590 } else if (leftp) { \
1591 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1593 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1594 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1596 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1598 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1599 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1609 #define VSL(suffix, element, mask) \
1610 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1614 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1615 unsigned int shift = b->element[i] & mask; \
1617 r->element[i] = a->element[i] << shift; \
1626 void helper_vsldoi(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, uint32_t shift
)
1628 int sh
= shift
& 0xf;
1632 #if defined(HOST_WORDS_BIGENDIAN)
1633 for (i
= 0; i
< ARRAY_SIZE(r
->u8
); i
++) {
1636 result
.u8
[i
] = b
->u8
[index
- 0x10];
1638 result
.u8
[i
] = a
->u8
[index
];
1642 for (i
= 0; i
< ARRAY_SIZE(r
->u8
); i
++) {
1643 int index
= (16 - sh
) + i
;
1645 result
.u8
[i
] = a
->u8
[index
- 0x10];
1647 result
.u8
[i
] = b
->u8
[index
];
1654 void helper_vslo(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1656 int sh
= (b
->u8
[LO_IDX
*0xf] >> 3) & 0xf;
1658 #if defined(HOST_WORDS_BIGENDIAN)
1659 memmove(&r
->u8
[0], &a
->u8
[sh
], 16 - sh
);
1660 memset(&r
->u8
[16-sh
], 0, sh
);
1662 memmove(&r
->u8
[sh
], &a
->u8
[0], 16 - sh
);
1663 memset(&r
->u8
[0], 0, sh
);
1667 /* Experimental testing shows that hardware masks the immediate. */
1668 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1669 #if defined(HOST_WORDS_BIGENDIAN)
1670 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1672 #define SPLAT_ELEMENT(element) \
1673 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1675 #define VSPLT(suffix, element) \
1676 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1678 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1681 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1682 r->element[i] = s; \
1689 #undef SPLAT_ELEMENT
1690 #undef _SPLAT_MASKED
1692 #define VSPLTI(suffix, element, splat_type) \
1693 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1695 splat_type x = (int8_t)(splat << 3) >> 3; \
1698 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1699 r->element[i] = x; \
1702 VSPLTI(b
, s8
, int8_t)
1703 VSPLTI(h
, s16
, int16_t)
1704 VSPLTI(w
, s32
, int32_t)
1707 #define VSR(suffix, element, mask) \
1708 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1712 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1713 unsigned int shift = b->element[i] & mask; \
1714 r->element[i] = a->element[i] >> shift; \
1727 void helper_vsro(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1729 int sh
= (b
->u8
[LO_IDX
* 0xf] >> 3) & 0xf;
1731 #if defined(HOST_WORDS_BIGENDIAN)
1732 memmove(&r
->u8
[sh
], &a
->u8
[0], 16 - sh
);
1733 memset(&r
->u8
[0], 0, sh
);
1735 memmove(&r
->u8
[0], &a
->u8
[sh
], 16 - sh
);
1736 memset(&r
->u8
[16 - sh
], 0, sh
);
1740 void helper_vsubcuw(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1744 for (i
= 0; i
< ARRAY_SIZE(r
->u32
); i
++) {
1745 r
->u32
[i
] = a
->u32
[i
] >= b
->u32
[i
];
1749 void helper_vsumsws(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1756 #if defined(HOST_WORDS_BIGENDIAN)
1757 upper
= ARRAY_SIZE(r
->s32
)-1;
1761 t
= (int64_t)b
->s32
[upper
];
1762 for (i
= 0; i
< ARRAY_SIZE(r
->s32
); i
++) {
1766 result
.s32
[upper
] = cvtsdsw(t
, &sat
);
1770 env
->vscr
|= (1 << VSCR_SAT
);
1774 void helper_vsum2sws(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1780 #if defined(HOST_WORDS_BIGENDIAN)
1785 for (i
= 0; i
< ARRAY_SIZE(r
->u64
); i
++) {
1786 int64_t t
= (int64_t)b
->s32
[upper
+ i
* 2];
1789 for (j
= 0; j
< ARRAY_SIZE(r
->u64
); j
++) {
1790 t
+= a
->s32
[2 * i
+ j
];
1792 result
.s32
[upper
+ i
* 2] = cvtsdsw(t
, &sat
);
1797 env
->vscr
|= (1 << VSCR_SAT
);
1801 void helper_vsum4sbs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1806 for (i
= 0; i
< ARRAY_SIZE(r
->s32
); i
++) {
1807 int64_t t
= (int64_t)b
->s32
[i
];
1809 for (j
= 0; j
< ARRAY_SIZE(r
->s32
); j
++) {
1810 t
+= a
->s8
[4 * i
+ j
];
1812 r
->s32
[i
] = cvtsdsw(t
, &sat
);
1816 env
->vscr
|= (1 << VSCR_SAT
);
1820 void helper_vsum4shs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1825 for (i
= 0; i
< ARRAY_SIZE(r
->s32
); i
++) {
1826 int64_t t
= (int64_t)b
->s32
[i
];
1828 t
+= a
->s16
[2 * i
] + a
->s16
[2 * i
+ 1];
1829 r
->s32
[i
] = cvtsdsw(t
, &sat
);
1833 env
->vscr
|= (1 << VSCR_SAT
);
1837 void helper_vsum4ubs(CPUPPCState
*env
, ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
1842 for (i
= 0; i
< ARRAY_SIZE(r
->u32
); i
++) {
1843 uint64_t t
= (uint64_t)b
->u32
[i
];
1845 for (j
= 0; j
< ARRAY_SIZE(r
->u32
); j
++) {
1846 t
+= a
->u8
[4 * i
+ j
];
1848 r
->u32
[i
] = cvtuduw(t
, &sat
);
1852 env
->vscr
|= (1 << VSCR_SAT
);
1856 #if defined(HOST_WORDS_BIGENDIAN)
1863 #define VUPKPX(suffix, hi) \
1864 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1869 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1870 uint16_t e = b->u16[hi ? i : i+4]; \
1871 uint8_t a = (e >> 15) ? 0xff : 0; \
1872 uint8_t r = (e >> 10) & 0x1f; \
1873 uint8_t g = (e >> 5) & 0x1f; \
1874 uint8_t b = e & 0x1f; \
1876 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1884 #define VUPK(suffix, unpacked, packee, hi) \
1885 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1891 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1892 result.unpacked[i] = b->packee[i]; \
1895 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1897 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1902 VUPK(hsb
, s16
, s8
, UPKHI
)
1903 VUPK(hsh
, s32
, s16
, UPKHI
)
1904 VUPK(hsw
, s64
, s32
, UPKHI
)
1905 VUPK(lsb
, s16
, s8
, UPKLO
)
1906 VUPK(lsh
, s32
, s16
, UPKLO
)
1907 VUPK(lsw
, s64
, s32
, UPKLO
)
1912 #define VGENERIC_DO(name, element) \
1913 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1917 VECTOR_FOR_INORDER_I(i, element) { \
1918 r->element[i] = name(b->element[i]); \
1922 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1923 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1924 #define clzw(v) clz32((v))
1925 #define clzd(v) clz64((v))
1927 VGENERIC_DO(clzb
, u8
)
1928 VGENERIC_DO(clzh
, u16
)
1929 VGENERIC_DO(clzw
, u32
)
1930 VGENERIC_DO(clzd
, u64
)
1937 #define popcntb(v) ctpop8(v)
1938 #define popcnth(v) ctpop16(v)
1939 #define popcntw(v) ctpop32(v)
1940 #define popcntd(v) ctpop64(v)
1942 VGENERIC_DO(popcntb
, u8
)
1943 VGENERIC_DO(popcnth
, u16
)
1944 VGENERIC_DO(popcntw
, u32
)
1945 VGENERIC_DO(popcntd
, u64
)
1954 #if defined(HOST_WORDS_BIGENDIAN)
1955 #define QW_ONE { .u64 = { 0, 1 } }
1957 #define QW_ONE { .u64 = { 1, 0 } }
1960 #ifndef CONFIG_INT128
1962 static inline void avr_qw_not(ppc_avr_t
*t
, ppc_avr_t a
)
1964 t
->u64
[0] = ~a
.u64
[0];
1965 t
->u64
[1] = ~a
.u64
[1];
1968 static int avr_qw_cmpu(ppc_avr_t a
, ppc_avr_t b
)
1970 if (a
.u64
[HI_IDX
] < b
.u64
[HI_IDX
]) {
1972 } else if (a
.u64
[HI_IDX
] > b
.u64
[HI_IDX
]) {
1974 } else if (a
.u64
[LO_IDX
] < b
.u64
[LO_IDX
]) {
1976 } else if (a
.u64
[LO_IDX
] > b
.u64
[LO_IDX
]) {
1983 static void avr_qw_add(ppc_avr_t
*t
, ppc_avr_t a
, ppc_avr_t b
)
1985 t
->u64
[LO_IDX
] = a
.u64
[LO_IDX
] + b
.u64
[LO_IDX
];
1986 t
->u64
[HI_IDX
] = a
.u64
[HI_IDX
] + b
.u64
[HI_IDX
] +
1987 (~a
.u64
[LO_IDX
] < b
.u64
[LO_IDX
]);
1990 static int avr_qw_addc(ppc_avr_t
*t
, ppc_avr_t a
, ppc_avr_t b
)
1993 t
->u64
[LO_IDX
] = a
.u64
[LO_IDX
] + b
.u64
[LO_IDX
];
1994 t
->u64
[HI_IDX
] = a
.u64
[HI_IDX
] + b
.u64
[HI_IDX
] +
1995 (~a
.u64
[LO_IDX
] < b
.u64
[LO_IDX
]);
1996 avr_qw_not(¬_a
, a
);
1997 return avr_qw_cmpu(not_a
, b
) < 0;
2002 void helper_vadduqm(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2004 #ifdef CONFIG_INT128
2005 r
->u128
= a
->u128
+ b
->u128
;
2007 avr_qw_add(r
, *a
, *b
);
2011 void helper_vaddeuqm(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, ppc_avr_t
*c
)
2013 #ifdef CONFIG_INT128
2014 r
->u128
= a
->u128
+ b
->u128
+ (c
->u128
& 1);
2017 if (c
->u64
[LO_IDX
] & 1) {
2020 tmp
.u64
[HI_IDX
] = 0;
2021 tmp
.u64
[LO_IDX
] = c
->u64
[LO_IDX
] & 1;
2022 avr_qw_add(&tmp
, *a
, tmp
);
2023 avr_qw_add(r
, tmp
, *b
);
2025 avr_qw_add(r
, *a
, *b
);
2030 void helper_vaddcuq(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2032 #ifdef CONFIG_INT128
2033 r
->u128
= (~a
->u128
< b
->u128
);
2037 avr_qw_not(¬_a
, *a
);
2040 r
->u64
[LO_IDX
] = (avr_qw_cmpu(not_a
, *b
) < 0);
2044 void helper_vaddecuq(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, ppc_avr_t
*c
)
2046 #ifdef CONFIG_INT128
2047 int carry_out
= (~a
->u128
< b
->u128
);
2048 if (!carry_out
&& (c
->u128
& 1)) {
2049 carry_out
= ((a
->u128
+ b
->u128
+ 1) == 0) &&
2050 ((a
->u128
!= 0) || (b
->u128
!= 0));
2052 r
->u128
= carry_out
;
2055 int carry_in
= c
->u64
[LO_IDX
] & 1;
2059 carry_out
= avr_qw_addc(&tmp
, *a
, *b
);
2061 if (!carry_out
&& carry_in
) {
2062 ppc_avr_t one
= QW_ONE
;
2063 carry_out
= avr_qw_addc(&tmp
, tmp
, one
);
2066 r
->u64
[LO_IDX
] = carry_out
;
2070 void helper_vsubuqm(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2072 #ifdef CONFIG_INT128
2073 r
->u128
= a
->u128
- b
->u128
;
2076 ppc_avr_t one
= QW_ONE
;
2078 avr_qw_not(&tmp
, *b
);
2079 avr_qw_add(&tmp
, *a
, tmp
);
2080 avr_qw_add(r
, tmp
, one
);
2084 void helper_vsubeuqm(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, ppc_avr_t
*c
)
2086 #ifdef CONFIG_INT128
2087 r
->u128
= a
->u128
+ ~b
->u128
+ (c
->u128
& 1);
2091 avr_qw_not(&tmp
, *b
);
2092 avr_qw_add(&sum
, *a
, tmp
);
2094 tmp
.u64
[HI_IDX
] = 0;
2095 tmp
.u64
[LO_IDX
] = c
->u64
[LO_IDX
] & 1;
2096 avr_qw_add(r
, sum
, tmp
);
2100 void helper_vsubcuq(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2102 #ifdef CONFIG_INT128
2103 r
->u128
= (~a
->u128
< ~b
->u128
) ||
2104 (a
->u128
+ ~b
->u128
== (__uint128_t
)-1);
2106 int carry
= (avr_qw_cmpu(*a
, *b
) > 0);
2109 avr_qw_not(&tmp
, *b
);
2110 avr_qw_add(&tmp
, *a
, tmp
);
2111 carry
= ((tmp
.s64
[HI_IDX
] == -1ull) && (tmp
.s64
[LO_IDX
] == -1ull));
2114 r
->u64
[LO_IDX
] = carry
;
2118 void helper_vsubecuq(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, ppc_avr_t
*c
)
2120 #ifdef CONFIG_INT128
2122 (~a
->u128
< ~b
->u128
) ||
2123 ((c
->u128
& 1) && (a
->u128
+ ~b
->u128
== (__uint128_t
)-1));
2125 int carry_in
= c
->u64
[LO_IDX
] & 1;
2126 int carry_out
= (avr_qw_cmpu(*a
, *b
) > 0);
2127 if (!carry_out
&& carry_in
) {
2129 avr_qw_not(&tmp
, *b
);
2130 avr_qw_add(&tmp
, *a
, tmp
);
2131 carry_out
= ((tmp
.u64
[HI_IDX
] == -1ull) && (tmp
.u64
[LO_IDX
] == -1ull));
2135 r
->u64
[LO_IDX
] = carry_out
;
2139 #define BCD_PLUS_PREF_1 0xC
2140 #define BCD_PLUS_PREF_2 0xF
2141 #define BCD_PLUS_ALT_1 0xA
2142 #define BCD_NEG_PREF 0xD
2143 #define BCD_NEG_ALT 0xB
2144 #define BCD_PLUS_ALT_2 0xE
2146 #if defined(HOST_WORDS_BIGENDIAN)
2147 #define BCD_DIG_BYTE(n) (15 - (n/2))
2149 #define BCD_DIG_BYTE(n) (n/2)
2152 static int bcd_get_sgn(ppc_avr_t
*bcd
)
2154 switch (bcd
->u8
[BCD_DIG_BYTE(0)] & 0xF) {
2155 case BCD_PLUS_PREF_1
:
2156 case BCD_PLUS_PREF_2
:
2157 case BCD_PLUS_ALT_1
:
2158 case BCD_PLUS_ALT_2
:
2176 static int bcd_preferred_sgn(int sgn
, int ps
)
2179 return (ps
== 0) ? BCD_PLUS_PREF_1
: BCD_PLUS_PREF_2
;
2181 return BCD_NEG_PREF
;
2185 static uint8_t bcd_get_digit(ppc_avr_t
*bcd
, int n
, int *invalid
)
2189 result
= bcd
->u8
[BCD_DIG_BYTE(n
)] >> 4;
2191 result
= bcd
->u8
[BCD_DIG_BYTE(n
)] & 0xF;
2194 if (unlikely(result
> 9)) {
2200 static void bcd_put_digit(ppc_avr_t
*bcd
, uint8_t digit
, int n
)
2203 bcd
->u8
[BCD_DIG_BYTE(n
)] &= 0x0F;
2204 bcd
->u8
[BCD_DIG_BYTE(n
)] |= (digit
<<4);
2206 bcd
->u8
[BCD_DIG_BYTE(n
)] &= 0xF0;
2207 bcd
->u8
[BCD_DIG_BYTE(n
)] |= digit
;
2211 static int bcd_cmp_mag(ppc_avr_t
*a
, ppc_avr_t
*b
)
2215 for (i
= 31; i
> 0; i
--) {
2216 uint8_t dig_a
= bcd_get_digit(a
, i
, &invalid
);
2217 uint8_t dig_b
= bcd_get_digit(b
, i
, &invalid
);
2218 if (unlikely(invalid
)) {
2219 return 0; /* doesn't matter */
2220 } else if (dig_a
> dig_b
) {
2222 } else if (dig_a
< dig_b
) {
2230 static int bcd_add_mag(ppc_avr_t
*t
, ppc_avr_t
*a
, ppc_avr_t
*b
, int *invalid
,
2236 for (i
= 1; i
<= 31; i
++) {
2237 uint8_t digit
= bcd_get_digit(a
, i
, invalid
) +
2238 bcd_get_digit(b
, i
, invalid
) + carry
;
2239 is_zero
&= (digit
== 0);
2247 bcd_put_digit(t
, digit
, i
);
2249 if (unlikely(*invalid
)) {
2258 static int bcd_sub_mag(ppc_avr_t
*t
, ppc_avr_t
*a
, ppc_avr_t
*b
, int *invalid
,
2264 for (i
= 1; i
<= 31; i
++) {
2265 uint8_t digit
= bcd_get_digit(a
, i
, invalid
) -
2266 bcd_get_digit(b
, i
, invalid
) + carry
;
2267 is_zero
&= (digit
== 0);
2275 bcd_put_digit(t
, digit
, i
);
2277 if (unlikely(*invalid
)) {
2286 uint32_t helper_bcdadd(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, uint32_t ps
)
2289 int sgna
= bcd_get_sgn(a
);
2290 int sgnb
= bcd_get_sgn(b
);
2291 int invalid
= (sgna
== 0) || (sgnb
== 0);
2295 ppc_avr_t result
= { .u64
= { 0, 0 } };
2299 result
.u8
[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna
, ps
);
2300 zero
= bcd_add_mag(&result
, a
, b
, &invalid
, &overflow
);
2301 cr
= (sgna
> 0) ? 4 : 8;
2302 } else if (bcd_cmp_mag(a
, b
) > 0) {
2303 result
.u8
[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna
, ps
);
2304 zero
= bcd_sub_mag(&result
, a
, b
, &invalid
, &overflow
);
2305 cr
= (sgna
> 0) ? 4 : 8;
2307 result
.u8
[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb
, ps
);
2308 zero
= bcd_sub_mag(&result
, b
, a
, &invalid
, &overflow
);
2309 cr
= (sgnb
> 0) ? 4 : 8;
2313 if (unlikely(invalid
)) {
2314 result
.u64
[HI_IDX
] = result
.u64
[LO_IDX
] = -1;
2316 } else if (overflow
) {
2327 uint32_t helper_bcdsub(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, uint32_t ps
)
2329 ppc_avr_t bcopy
= *b
;
2330 int sgnb
= bcd_get_sgn(b
);
2332 bcd_put_digit(&bcopy
, BCD_PLUS_PREF_1
, 0);
2333 } else if (sgnb
> 0) {
2334 bcd_put_digit(&bcopy
, BCD_NEG_PREF
, 0);
2336 /* else invalid ... defer to bcdadd code for proper handling */
2338 return helper_bcdadd(r
, a
, &bcopy
, ps
);
2341 static uint8_t SBOX
[256] = {
2342 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
2343 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
2344 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
2345 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
2346 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
2347 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
2348 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
2349 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
2350 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
2351 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
2352 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
2353 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
2354 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
2355 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
2356 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
2357 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
2358 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
2359 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
2360 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
2361 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
2362 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
2363 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
2364 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
2365 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
2366 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
2367 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
2368 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
2369 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
2370 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
2371 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
2372 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
2373 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
2376 static void SubBytes(ppc_avr_t
*r
, ppc_avr_t
*a
)
2379 VECTOR_FOR_INORDER_I(i
, u8
) {
2380 r
->u8
[i
] = SBOX
[a
->u8
[i
]];
2384 static uint8_t InvSBOX
[256] = {
2385 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
2386 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
2387 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
2388 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
2389 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
2390 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
2391 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
2392 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
2393 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
2394 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
2395 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
2396 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
2397 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
2398 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
2399 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
2400 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
2401 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
2402 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
2403 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
2404 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
2405 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
2406 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
2407 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
2408 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
2409 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
2410 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
2411 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
2412 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
2413 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
2414 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
2415 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
2416 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
2419 static void InvSubBytes(ppc_avr_t
*r
, ppc_avr_t
*a
)
2422 VECTOR_FOR_INORDER_I(i
, u8
) {
2423 r
->u8
[i
] = InvSBOX
[a
->u8
[i
]];
2427 static uint8_t ROTL8(uint8_t x
, int n
)
2429 return (x
<< n
) | (x
>> (8-n
));
2432 static inline int BIT8(uint8_t x
, int n
)
2434 return (x
& (0x80 >> n
)) != 0;
2437 static uint8_t GFx02(uint8_t x
)
2439 return ROTL8(x
, 1) ^ (BIT8(x
, 0) ? 0x1A : 0);
2442 static uint8_t GFx03(uint8_t x
)
2444 return x
^ ROTL8(x
, 1) ^ (BIT8(x
, 0) ? 0x1A : 0);
2447 static uint8_t GFx09(uint8_t x
)
2449 uint8_t term2
= ROTL8(x
, 3);
2450 uint8_t term3
= (BIT8(x
, 0) ? 0x68 : 0) | (BIT8(x
, 1) ? 0x14 : 0) |
2451 (BIT8(x
, 2) ? 0x02 : 0);
2452 uint8_t term4
= (BIT8(x
, 1) ? 0x20 : 0) | (BIT8(x
, 2) ? 0x18 : 0);
2453 return x
^ term2
^ term3
^ term4
;
2456 static uint8_t GFx0B(uint8_t x
)
2458 uint8_t term2
= ROTL8(x
, 1);
2459 uint8_t term3
= (x
<< 3) | (BIT8(x
, 0) ? 0x06 : 0) |
2460 (BIT8(x
, 2) ? 0x01 : 0);
2461 uint8_t term4
= (BIT8(x
, 0) ? 0x70 : 0) | (BIT8(x
, 1) ? 0x06 : 0) |
2462 (BIT8(x
, 2) ? 0x08 : 0);
2463 uint8_t term5
= (BIT8(x
, 1) ? 0x30 : 0) | (BIT8(x
, 2) ? 0x02 : 0);
2464 uint8_t term6
= BIT8(x
, 2) ? 0x10 : 0;
2465 return x
^ term2
^ term3
^ term4
^ term5
^ term6
;
2468 static uint8_t GFx0D(uint8_t x
)
2470 uint8_t term2
= ROTL8(x
, 2);
2471 uint8_t term3
= (x
<< 3) | (BIT8(x
, 1) ? 0x04 : 0) |
2472 (BIT8(x
, 2) ? 0x03 : 0);
2473 uint8_t term4
= (BIT8(x
, 0) ? 0x58 : 0) | (BIT8(x
, 1) ? 0x20 : 0);
2474 uint8_t term5
= (BIT8(x
, 1) ? 0x08 : 0) | (BIT8(x
, 2) ? 0x10 : 0);
2475 uint8_t term6
= BIT8(x
, 2) ? 0x08 : 0;
2476 return x
^ term2
^ term3
^ term4
^ term5
^ term6
;
2479 static uint8_t GFx0E(uint8_t x
)
2481 uint8_t term1
= ROTL8(x
, 1);
2482 uint8_t term2
= (x
<< 2) | (BIT8(x
, 2) ? 0x02 : 0) |
2483 (BIT8(x
, 1) ? 0x01 : 0);
2484 uint8_t term3
= (x
<< 3) | (BIT8(x
, 1) ? 0x04 : 0) |
2485 (BIT8(x
, 2) ? 0x01 : 0);
2486 uint8_t term4
= (BIT8(x
, 0) ? 0x40 : 0) | (BIT8(x
, 1) ? 0x28 : 0) |
2487 (BIT8(x
, 2) ? 0x10 : 0);
2488 uint8_t term5
= (BIT8(x
, 2) ? 0x08 : 0);
2489 return term1
^ term2
^ term3
^ term4
^ term5
;
2492 #if defined(HOST_WORDS_BIGENDIAN)
2493 #define MCB(x, i, b) ((x)->u8[(i)*4 + (b)])
2495 #define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))])
2498 static void MixColumns(ppc_avr_t
*r
, ppc_avr_t
*x
)
2501 for (i
= 0; i
< 4; i
++) {
2502 MCB(r
, i
, 0) = GFx02(MCB(x
, i
, 0)) ^ GFx03(MCB(x
, i
, 1)) ^
2503 MCB(x
, i
, 2) ^ MCB(x
, i
, 3);
2504 MCB(r
, i
, 1) = MCB(x
, i
, 0) ^ GFx02(MCB(x
, i
, 1)) ^
2505 GFx03(MCB(x
, i
, 2)) ^ MCB(x
, i
, 3);
2506 MCB(r
, i
, 2) = MCB(x
, i
, 0) ^ MCB(x
, i
, 1) ^
2507 GFx02(MCB(x
, i
, 2)) ^ GFx03(MCB(x
, i
, 3));
2508 MCB(r
, i
, 3) = GFx03(MCB(x
, i
, 0)) ^ MCB(x
, i
, 1) ^
2509 MCB(x
, i
, 2) ^ GFx02(MCB(x
, i
, 3));
2513 static void InvMixColumns(ppc_avr_t
*r
, ppc_avr_t
*x
)
2516 for (i
= 0; i
< 4; i
++) {
2517 MCB(r
, i
, 0) = GFx0E(MCB(x
, i
, 0)) ^ GFx0B(MCB(x
, i
, 1)) ^
2518 GFx0D(MCB(x
, i
, 2)) ^ GFx09(MCB(x
, i
, 3));
2519 MCB(r
, i
, 1) = GFx09(MCB(x
, i
, 0)) ^ GFx0E(MCB(x
, i
, 1)) ^
2520 GFx0B(MCB(x
, i
, 2)) ^ GFx0D(MCB(x
, i
, 3));
2521 MCB(r
, i
, 2) = GFx0D(MCB(x
, i
, 0)) ^ GFx09(MCB(x
, i
, 1)) ^
2522 GFx0E(MCB(x
, i
, 2)) ^ GFx0B(MCB(x
, i
, 3));
2523 MCB(r
, i
, 3) = GFx0B(MCB(x
, i
, 0)) ^ GFx0D(MCB(x
, i
, 1)) ^
2524 GFx09(MCB(x
, i
, 2)) ^ GFx0E(MCB(x
, i
, 3));
2528 static void ShiftRows(ppc_avr_t
*r
, ppc_avr_t
*x
)
2530 MCB(r
, 0, 0) = MCB(x
, 0, 0);
2531 MCB(r
, 1, 0) = MCB(x
, 1, 0);
2532 MCB(r
, 2, 0) = MCB(x
, 2, 0);
2533 MCB(r
, 3, 0) = MCB(x
, 3, 0);
2535 MCB(r
, 0, 1) = MCB(x
, 1, 1);
2536 MCB(r
, 1, 1) = MCB(x
, 2, 1);
2537 MCB(r
, 2, 1) = MCB(x
, 3, 1);
2538 MCB(r
, 3, 1) = MCB(x
, 0, 1);
2540 MCB(r
, 0, 2) = MCB(x
, 2, 2);
2541 MCB(r
, 1, 2) = MCB(x
, 3, 2);
2542 MCB(r
, 2, 2) = MCB(x
, 0, 2);
2543 MCB(r
, 3, 2) = MCB(x
, 1, 2);
2545 MCB(r
, 0, 3) = MCB(x
, 3, 3);
2546 MCB(r
, 1, 3) = MCB(x
, 0, 3);
2547 MCB(r
, 2, 3) = MCB(x
, 1, 3);
2548 MCB(r
, 3, 3) = MCB(x
, 2, 3);
2551 static void InvShiftRows(ppc_avr_t
*r
, ppc_avr_t
*x
)
2553 MCB(r
, 0, 0) = MCB(x
, 0, 0);
2554 MCB(r
, 1, 0) = MCB(x
, 1, 0);
2555 MCB(r
, 2, 0) = MCB(x
, 2, 0);
2556 MCB(r
, 3, 0) = MCB(x
, 3, 0);
2558 MCB(r
, 0, 1) = MCB(x
, 3, 1);
2559 MCB(r
, 1, 1) = MCB(x
, 0, 1);
2560 MCB(r
, 2, 1) = MCB(x
, 1, 1);
2561 MCB(r
, 3, 1) = MCB(x
, 2, 1);
2563 MCB(r
, 0, 2) = MCB(x
, 2, 2);
2564 MCB(r
, 1, 2) = MCB(x
, 3, 2);
2565 MCB(r
, 2, 2) = MCB(x
, 0, 2);
2566 MCB(r
, 3, 2) = MCB(x
, 1, 2);
2568 MCB(r
, 0, 3) = MCB(x
, 1, 3);
2569 MCB(r
, 1, 3) = MCB(x
, 2, 3);
2570 MCB(r
, 2, 3) = MCB(x
, 3, 3);
2571 MCB(r
, 3, 3) = MCB(x
, 0, 3);
2576 void helper_vsbox(ppc_avr_t
*r
, ppc_avr_t
*a
)
2581 void helper_vcipher(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2583 ppc_avr_t vtemp1
, vtemp2
, vtemp3
;
2584 SubBytes(&vtemp1
, a
);
2585 ShiftRows(&vtemp2
, &vtemp1
);
2586 MixColumns(&vtemp3
, &vtemp2
);
2587 r
->u64
[0] = vtemp3
.u64
[0] ^ b
->u64
[0];
2588 r
->u64
[1] = vtemp3
.u64
[1] ^ b
->u64
[1];
2591 void helper_vcipherlast(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2593 ppc_avr_t vtemp1
, vtemp2
;
2594 SubBytes(&vtemp1
, a
);
2595 ShiftRows(&vtemp2
, &vtemp1
);
2596 r
->u64
[0] = vtemp2
.u64
[0] ^ b
->u64
[0];
2597 r
->u64
[1] = vtemp2
.u64
[1] ^ b
->u64
[1];
2600 void helper_vncipher(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2602 /* This differs from what is written in ISA V2.07. The RTL is */
2603 /* incorrect and will be fixed in V2.07B. */
2604 ppc_avr_t vtemp1
, vtemp2
, vtemp3
;
2605 InvShiftRows(&vtemp1
, a
);
2606 InvSubBytes(&vtemp2
, &vtemp1
);
2607 vtemp3
.u64
[0] = vtemp2
.u64
[0] ^ b
->u64
[0];
2608 vtemp3
.u64
[1] = vtemp2
.u64
[1] ^ b
->u64
[1];
2609 InvMixColumns(r
, &vtemp3
);
2612 void helper_vncipherlast(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
)
2614 ppc_avr_t vtemp1
, vtemp2
;
2615 InvShiftRows(&vtemp1
, a
);
2616 InvSubBytes(&vtemp2
, &vtemp1
);
2617 r
->u64
[0] = vtemp2
.u64
[0] ^ b
->u64
[0];
2618 r
->u64
[1] = vtemp2
.u64
[1] ^ b
->u64
[1];
2621 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2622 #if defined(HOST_WORDS_BIGENDIAN)
2623 #define EL_IDX(i) (i)
2625 #define EL_IDX(i) (3 - (i))
2628 void helper_vshasigmaw(ppc_avr_t
*r
, ppc_avr_t
*a
, uint32_t st_six
)
2630 int st
= (st_six
& 0x10) != 0;
2631 int six
= st_six
& 0xF;
2634 VECTOR_FOR_INORDER_I(i
, u32
) {
2636 if ((six
& (0x8 >> i
)) == 0) {
2637 r
->u32
[EL_IDX(i
)] = ROTRu32(a
->u32
[EL_IDX(i
)], 7) ^
2638 ROTRu32(a
->u32
[EL_IDX(i
)], 18) ^
2639 (a
->u32
[EL_IDX(i
)] >> 3);
2640 } else { /* six.bit[i] == 1 */
2641 r
->u32
[EL_IDX(i
)] = ROTRu32(a
->u32
[EL_IDX(i
)], 17) ^
2642 ROTRu32(a
->u32
[EL_IDX(i
)], 19) ^
2643 (a
->u32
[EL_IDX(i
)] >> 10);
2645 } else { /* st == 1 */
2646 if ((six
& (0x8 >> i
)) == 0) {
2647 r
->u32
[EL_IDX(i
)] = ROTRu32(a
->u32
[EL_IDX(i
)], 2) ^
2648 ROTRu32(a
->u32
[EL_IDX(i
)], 13) ^
2649 ROTRu32(a
->u32
[EL_IDX(i
)], 22);
2650 } else { /* six.bit[i] == 1 */
2651 r
->u32
[EL_IDX(i
)] = ROTRu32(a
->u32
[EL_IDX(i
)], 6) ^
2652 ROTRu32(a
->u32
[EL_IDX(i
)], 11) ^
2653 ROTRu32(a
->u32
[EL_IDX(i
)], 25);
2662 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2663 #if defined(HOST_WORDS_BIGENDIAN)
2664 #define EL_IDX(i) (i)
2666 #define EL_IDX(i) (1 - (i))
2669 void helper_vshasigmad(ppc_avr_t
*r
, ppc_avr_t
*a
, uint32_t st_six
)
2671 int st
= (st_six
& 0x10) != 0;
2672 int six
= st_six
& 0xF;
2675 VECTOR_FOR_INORDER_I(i
, u64
) {
2677 if ((six
& (0x8 >> (2*i
))) == 0) {
2678 r
->u64
[EL_IDX(i
)] = ROTRu64(a
->u64
[EL_IDX(i
)], 1) ^
2679 ROTRu64(a
->u64
[EL_IDX(i
)], 8) ^
2680 (a
->u64
[EL_IDX(i
)] >> 7);
2681 } else { /* six.bit[2*i] == 1 */
2682 r
->u64
[EL_IDX(i
)] = ROTRu64(a
->u64
[EL_IDX(i
)], 19) ^
2683 ROTRu64(a
->u64
[EL_IDX(i
)], 61) ^
2684 (a
->u64
[EL_IDX(i
)] >> 6);
2686 } else { /* st == 1 */
2687 if ((six
& (0x8 >> (2*i
))) == 0) {
2688 r
->u64
[EL_IDX(i
)] = ROTRu64(a
->u64
[EL_IDX(i
)], 28) ^
2689 ROTRu64(a
->u64
[EL_IDX(i
)], 34) ^
2690 ROTRu64(a
->u64
[EL_IDX(i
)], 39);
2691 } else { /* six.bit[2*i] == 1 */
2692 r
->u64
[EL_IDX(i
)] = ROTRu64(a
->u64
[EL_IDX(i
)], 14) ^
2693 ROTRu64(a
->u64
[EL_IDX(i
)], 18) ^
2694 ROTRu64(a
->u64
[EL_IDX(i
)], 41);
2703 void helper_vpermxor(ppc_avr_t
*r
, ppc_avr_t
*a
, ppc_avr_t
*b
, ppc_avr_t
*c
)
2706 VECTOR_FOR_INORDER_I(i
, u8
) {
2707 int indexA
= c
->u8
[i
] >> 4;
2708 int indexB
= c
->u8
[i
] & 0xF;
2709 #if defined(HOST_WORDS_BIGENDIAN)
2710 r
->u8
[i
] = a
->u8
[indexA
] ^ b
->u8
[indexB
];
2712 r
->u8
[i
] = a
->u8
[15-indexA
] ^ b
->u8
[15-indexB
];
2717 #undef VECTOR_FOR_INORDER_I
2721 /*****************************************************************************/
2722 /* SPE extension helpers */
2723 /* Use a table to make this quicker */
2724 static const uint8_t hbrev
[16] = {
2725 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2726 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2729 static inline uint8_t byte_reverse(uint8_t val
)
2731 return hbrev
[val
>> 4] | (hbrev
[val
& 0xF] << 4);
2734 static inline uint32_t word_reverse(uint32_t val
)
2736 return byte_reverse(val
>> 24) | (byte_reverse(val
>> 16) << 8) |
2737 (byte_reverse(val
>> 8) << 16) | (byte_reverse(val
) << 24);
2740 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2741 target_ulong
helper_brinc(target_ulong arg1
, target_ulong arg2
)
2743 uint32_t a
, b
, d
, mask
;
2745 mask
= UINT32_MAX
>> (32 - MASKBITS
);
2748 d
= word_reverse(1 + word_reverse(a
| ~b
));
2749 return (arg1
& ~mask
) | (d
& b
);
2752 uint32_t helper_cntlsw32(uint32_t val
)
2754 if (val
& 0x80000000) {
2761 uint32_t helper_cntlzw32(uint32_t val
)
2767 target_ulong
helper_dlmzb(CPUPPCState
*env
, target_ulong high
,
2768 target_ulong low
, uint32_t update_Rc
)
2774 for (mask
= 0xFF000000; mask
!= 0; mask
= mask
>> 8) {
2775 if ((high
& mask
) == 0) {
2783 for (mask
= 0xFF000000; mask
!= 0; mask
= mask
>> 8) {
2784 if ((low
& mask
) == 0) {
2796 env
->xer
= (env
->xer
& ~0x7F) | i
;
2798 env
->crf
[0] |= xer_so
;