target-ppc: use the softfloat min/max functions
[qemu/ar7.git] / target-ppc / int_helper.c
blob61412433d0e91dfc7f4e345897e7bb2238cc35b4
1 /*
2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "cpu.h"
20 #include "host-utils.h"
21 #include "helper.h"
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
28 /* multiply high word */
29 uint64_t helper_mulhd(uint64_t arg1, uint64_t arg2)
31 uint64_t tl, th;
33 muls64(&tl, &th, arg1, arg2);
34 return th;
37 /* multiply high word unsigned */
38 uint64_t helper_mulhdu(uint64_t arg1, uint64_t arg2)
40 uint64_t tl, th;
42 mulu64(&tl, &th, arg1, arg2);
43 return th;
46 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
48 int64_t th;
49 uint64_t tl;
51 muls64(&tl, (uint64_t *)&th, arg1, arg2);
52 /* If th != 0 && th != -1, then we had an overflow */
53 if (likely((uint64_t)(th + 1) <= 1)) {
54 env->xer &= ~(1 << XER_OV);
55 } else {
56 env->xer |= (1 << XER_OV) | (1 << XER_SO);
58 return (int64_t)tl;
60 #endif
62 target_ulong helper_cntlzw(target_ulong t)
64 return clz32(t);
67 #if defined(TARGET_PPC64)
68 target_ulong helper_cntlzd(target_ulong t)
70 return clz64(t);
72 #endif
74 /* shift right arithmetic helper */
75 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
76 target_ulong shift)
78 int32_t ret;
80 if (likely(!(shift & 0x20))) {
81 if (likely((uint32_t)shift != 0)) {
82 shift &= 0x1f;
83 ret = (int32_t)value >> shift;
84 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
85 env->xer &= ~(1 << XER_CA);
86 } else {
87 env->xer |= (1 << XER_CA);
89 } else {
90 ret = (int32_t)value;
91 env->xer &= ~(1 << XER_CA);
93 } else {
94 ret = (int32_t)value >> 31;
95 if (ret) {
96 env->xer |= (1 << XER_CA);
97 } else {
98 env->xer &= ~(1 << XER_CA);
101 return (target_long)ret;
104 #if defined(TARGET_PPC64)
105 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
106 target_ulong shift)
108 int64_t ret;
110 if (likely(!(shift & 0x40))) {
111 if (likely((uint64_t)shift != 0)) {
112 shift &= 0x3f;
113 ret = (int64_t)value >> shift;
114 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
115 env->xer &= ~(1 << XER_CA);
116 } else {
117 env->xer |= (1 << XER_CA);
119 } else {
120 ret = (int64_t)value;
121 env->xer &= ~(1 << XER_CA);
123 } else {
124 ret = (int64_t)value >> 63;
125 if (ret) {
126 env->xer |= (1 << XER_CA);
127 } else {
128 env->xer &= ~(1 << XER_CA);
131 return ret;
133 #endif
135 #if defined(TARGET_PPC64)
136 target_ulong helper_popcntb(target_ulong val)
138 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
139 0x5555555555555555ULL);
140 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
141 0x3333333333333333ULL);
142 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
143 0x0f0f0f0f0f0f0f0fULL);
144 return val;
147 target_ulong helper_popcntw(target_ulong val)
149 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
150 0x5555555555555555ULL);
151 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
152 0x3333333333333333ULL);
153 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
154 0x0f0f0f0f0f0f0f0fULL);
155 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
156 0x00ff00ff00ff00ffULL);
157 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
158 0x0000ffff0000ffffULL);
159 return val;
162 target_ulong helper_popcntd(target_ulong val)
164 return ctpop64(val);
166 #else
167 target_ulong helper_popcntb(target_ulong val)
169 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
170 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
171 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
172 return val;
175 target_ulong helper_popcntw(target_ulong val)
177 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
178 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
179 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
180 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
181 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
182 return val;
184 #endif
186 /*****************************************************************************/
187 /* PowerPC 601 specific instructions (POWER bridge) */
188 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
190 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
192 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
193 (int32_t)arg2 == 0) {
194 env->spr[SPR_MQ] = 0;
195 return INT32_MIN;
196 } else {
197 env->spr[SPR_MQ] = tmp % arg2;
198 return tmp / (int32_t)arg2;
202 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
203 target_ulong arg2)
205 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
207 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
208 (int32_t)arg2 == 0) {
209 env->xer |= (1 << XER_OV) | (1 << XER_SO);
210 env->spr[SPR_MQ] = 0;
211 return INT32_MIN;
212 } else {
213 env->spr[SPR_MQ] = tmp % arg2;
214 tmp /= (int32_t)arg2;
215 if ((int32_t)tmp != tmp) {
216 env->xer |= (1 << XER_OV) | (1 << XER_SO);
217 } else {
218 env->xer &= ~(1 << XER_OV);
220 return tmp;
224 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
225 target_ulong arg2)
227 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
228 (int32_t)arg2 == 0) {
229 env->spr[SPR_MQ] = 0;
230 return INT32_MIN;
231 } else {
232 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
233 return (int32_t)arg1 / (int32_t)arg2;
237 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
238 target_ulong arg2)
240 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
241 (int32_t)arg2 == 0) {
242 env->xer |= (1 << XER_OV) | (1 << XER_SO);
243 env->spr[SPR_MQ] = 0;
244 return INT32_MIN;
245 } else {
246 env->xer &= ~(1 << XER_OV);
247 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
248 return (int32_t)arg1 / (int32_t)arg2;
252 /*****************************************************************************/
253 /* 602 specific instructions */
254 /* mfrom is the most crazy instruction ever seen, imho ! */
255 /* Real implementation uses a ROM table. Do the same */
256 /* Extremely decomposed:
257 * -arg / 256
258 * return 256 * log10(10 + 1.0) + 0.5
260 #if !defined(CONFIG_USER_ONLY)
261 target_ulong helper_602_mfrom(target_ulong arg)
263 if (likely(arg < 602)) {
264 #include "mfrom_table.c"
265 return mfrom_ROM_table[arg];
266 } else {
267 return 0;
270 #endif
272 /*****************************************************************************/
273 /* Altivec extension helpers */
274 #if defined(HOST_WORDS_BIGENDIAN)
275 #define HI_IDX 0
276 #define LO_IDX 1
277 #else
278 #define HI_IDX 1
279 #define LO_IDX 0
280 #endif
282 #if defined(HOST_WORDS_BIGENDIAN)
283 #define VECTOR_FOR_INORDER_I(index, element) \
284 for (index = 0; index < ARRAY_SIZE(r->element); index++)
285 #else
286 #define VECTOR_FOR_INORDER_I(index, element) \
287 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
288 #endif
290 /* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise,
291 * execute the following block. */
292 #define DO_HANDLE_NAN(result, x) \
293 if (float32_is_any_nan(x)) { \
294 CPU_FloatU __f; \
295 __f.f = x; \
296 __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \
297 result = __f.f; \
298 } else
300 #define HANDLE_NAN1(result, x) \
301 DO_HANDLE_NAN(result, x)
302 #define HANDLE_NAN2(result, x, y) \
303 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
304 #define HANDLE_NAN3(result, x, y, z) \
305 DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
307 /* Saturating arithmetic helpers. */
308 #define SATCVT(from, to, from_type, to_type, min, max) \
309 static inline to_type cvt##from##to(from_type x, int *sat) \
311 to_type r; \
313 if (x < (from_type)min) { \
314 r = min; \
315 *sat = 1; \
316 } else if (x > (from_type)max) { \
317 r = max; \
318 *sat = 1; \
319 } else { \
320 r = x; \
322 return r; \
324 #define SATCVTU(from, to, from_type, to_type, min, max) \
325 static inline to_type cvt##from##to(from_type x, int *sat) \
327 to_type r; \
329 if (x > (from_type)max) { \
330 r = max; \
331 *sat = 1; \
332 } else { \
333 r = x; \
335 return r; \
337 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
338 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
339 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
341 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
342 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
343 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
344 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
345 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
346 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
347 #undef SATCVT
348 #undef SATCVTU
350 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
352 int i, j = (sh & 0xf);
354 VECTOR_FOR_INORDER_I(i, u8) {
355 r->u8[i] = j++;
359 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
361 int i, j = 0x10 - (sh & 0xf);
363 VECTOR_FOR_INORDER_I(i, u8) {
364 r->u8[i] = j++;
368 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
370 #if defined(HOST_WORDS_BIGENDIAN)
371 env->vscr = r->u32[3];
372 #else
373 env->vscr = r->u32[0];
374 #endif
375 set_flush_to_zero(vscr_nj, &env->vec_status);
378 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
380 int i;
382 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
383 r->u32[i] = ~a->u32[i] < b->u32[i];
387 #define VARITH_DO(name, op, element) \
388 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
390 int i; \
392 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
393 r->element[i] = a->element[i] op b->element[i]; \
396 #define VARITH(suffix, element) \
397 VARITH_DO(add##suffix, +, element) \
398 VARITH_DO(sub##suffix, -, element)
399 VARITH(ubm, u8)
400 VARITH(uhm, u16)
401 VARITH(uwm, u32)
402 #undef VARITH_DO
403 #undef VARITH
405 #define VARITHFP(suffix, func) \
406 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
407 ppc_avr_t *b) \
409 int i; \
411 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
412 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
415 VARITHFP(addfp, float32_add)
416 VARITHFP(subfp, float32_sub)
417 VARITHFP(minfp, float32_min)
418 VARITHFP(maxfp, float32_max)
419 #undef VARITHFP
421 #define VARITHSAT_CASE(type, op, cvt, element) \
423 type result = (type)a->element[i] op (type)b->element[i]; \
424 r->element[i] = cvt(result, &sat); \
427 #define VARITHSAT_DO(name, op, optype, cvt, element) \
428 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
429 ppc_avr_t *b) \
431 int sat = 0; \
432 int i; \
434 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
435 switch (sizeof(r->element[0])) { \
436 case 1: \
437 VARITHSAT_CASE(optype, op, cvt, element); \
438 break; \
439 case 2: \
440 VARITHSAT_CASE(optype, op, cvt, element); \
441 break; \
442 case 4: \
443 VARITHSAT_CASE(optype, op, cvt, element); \
444 break; \
447 if (sat) { \
448 env->vscr |= (1 << VSCR_SAT); \
451 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
452 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
453 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
454 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
455 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
456 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
457 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
458 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
459 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
460 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
461 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
462 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
463 #undef VARITHSAT_CASE
464 #undef VARITHSAT_DO
465 #undef VARITHSAT_SIGNED
466 #undef VARITHSAT_UNSIGNED
468 #define VAVG_DO(name, element, etype) \
469 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
471 int i; \
473 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
474 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
475 r->element[i] = x >> 1; \
479 #define VAVG(type, signed_element, signed_type, unsigned_element, \
480 unsigned_type) \
481 VAVG_DO(avgs##type, signed_element, signed_type) \
482 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
483 VAVG(b, s8, int16_t, u8, uint16_t)
484 VAVG(h, s16, int32_t, u16, uint32_t)
485 VAVG(w, s32, int64_t, u32, uint64_t)
486 #undef VAVG_DO
487 #undef VAVG
489 #define VCF(suffix, cvt, element) \
490 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
491 ppc_avr_t *b, uint32_t uim) \
493 int i; \
495 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
496 float32 t = cvt(b->element[i], &env->vec_status); \
497 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
500 VCF(ux, uint32_to_float32, u32)
501 VCF(sx, int32_to_float32, s32)
502 #undef VCF
504 #define VCMP_DO(suffix, compare, element, record) \
505 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
506 ppc_avr_t *a, ppc_avr_t *b) \
508 uint32_t ones = (uint32_t)-1; \
509 uint32_t all = ones; \
510 uint32_t none = 0; \
511 int i; \
513 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
514 uint32_t result = (a->element[i] compare b->element[i] ? \
515 ones : 0x0); \
516 switch (sizeof(a->element[0])) { \
517 case 4: \
518 r->u32[i] = result; \
519 break; \
520 case 2: \
521 r->u16[i] = result; \
522 break; \
523 case 1: \
524 r->u8[i] = result; \
525 break; \
527 all &= result; \
528 none |= result; \
530 if (record) { \
531 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
534 #define VCMP(suffix, compare, element) \
535 VCMP_DO(suffix, compare, element, 0) \
536 VCMP_DO(suffix##_dot, compare, element, 1)
537 VCMP(equb, ==, u8)
538 VCMP(equh, ==, u16)
539 VCMP(equw, ==, u32)
540 VCMP(gtub, >, u8)
541 VCMP(gtuh, >, u16)
542 VCMP(gtuw, >, u32)
543 VCMP(gtsb, >, s8)
544 VCMP(gtsh, >, s16)
545 VCMP(gtsw, >, s32)
546 #undef VCMP_DO
547 #undef VCMP
549 #define VCMPFP_DO(suffix, compare, order, record) \
550 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
551 ppc_avr_t *a, ppc_avr_t *b) \
553 uint32_t ones = (uint32_t)-1; \
554 uint32_t all = ones; \
555 uint32_t none = 0; \
556 int i; \
558 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
559 uint32_t result; \
560 int rel = float32_compare_quiet(a->f[i], b->f[i], \
561 &env->vec_status); \
562 if (rel == float_relation_unordered) { \
563 result = 0; \
564 } else if (rel compare order) { \
565 result = ones; \
566 } else { \
567 result = 0; \
569 r->u32[i] = result; \
570 all &= result; \
571 none |= result; \
573 if (record) { \
574 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
577 #define VCMPFP(suffix, compare, order) \
578 VCMPFP_DO(suffix, compare, order, 0) \
579 VCMPFP_DO(suffix##_dot, compare, order, 1)
580 VCMPFP(eqfp, ==, float_relation_equal)
581 VCMPFP(gefp, !=, float_relation_less)
582 VCMPFP(gtfp, ==, float_relation_greater)
583 #undef VCMPFP_DO
584 #undef VCMPFP
586 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
587 ppc_avr_t *a, ppc_avr_t *b, int record)
589 int i;
590 int all_in = 0;
592 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
593 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
594 if (le_rel == float_relation_unordered) {
595 r->u32[i] = 0xc0000000;
596 /* ALL_IN does not need to be updated here. */
597 } else {
598 float32 bneg = float32_chs(b->f[i]);
599 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
600 int le = le_rel != float_relation_greater;
601 int ge = ge_rel != float_relation_less;
603 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
604 all_in |= (!le | !ge);
607 if (record) {
608 env->crf[6] = (all_in == 0) << 1;
612 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
614 vcmpbfp_internal(env, r, a, b, 0);
617 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
618 ppc_avr_t *b)
620 vcmpbfp_internal(env, r, a, b, 1);
623 #define VCT(suffix, satcvt, element) \
624 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
625 ppc_avr_t *b, uint32_t uim) \
627 int i; \
628 int sat = 0; \
629 float_status s = env->vec_status; \
631 set_float_rounding_mode(float_round_to_zero, &s); \
632 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
633 if (float32_is_any_nan(b->f[i])) { \
634 r->element[i] = 0; \
635 } else { \
636 float64 t = float32_to_float64(b->f[i], &s); \
637 int64_t j; \
639 t = float64_scalbn(t, uim, &s); \
640 j = float64_to_int64(t, &s); \
641 r->element[i] = satcvt(j, &sat); \
644 if (sat) { \
645 env->vscr |= (1 << VSCR_SAT); \
648 VCT(uxs, cvtsduw, u32)
649 VCT(sxs, cvtsdsw, s32)
650 #undef VCT
652 void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
653 ppc_avr_t *c)
655 int i;
657 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
658 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
659 /* Need to do the computation in higher precision and round
660 * once at the end. */
661 float64 af, bf, cf, t;
663 af = float32_to_float64(a->f[i], &env->vec_status);
664 bf = float32_to_float64(b->f[i], &env->vec_status);
665 cf = float32_to_float64(c->f[i], &env->vec_status);
666 t = float64_mul(af, cf, &env->vec_status);
667 t = float64_add(t, bf, &env->vec_status);
668 r->f[i] = float64_to_float32(t, &env->vec_status);
673 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
674 ppc_avr_t *b, ppc_avr_t *c)
676 int sat = 0;
677 int i;
679 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
680 int32_t prod = a->s16[i] * b->s16[i];
681 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
683 r->s16[i] = cvtswsh(t, &sat);
686 if (sat) {
687 env->vscr |= (1 << VSCR_SAT);
691 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
692 ppc_avr_t *b, ppc_avr_t *c)
694 int sat = 0;
695 int i;
697 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
698 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
699 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
700 r->s16[i] = cvtswsh(t, &sat);
703 if (sat) {
704 env->vscr |= (1 << VSCR_SAT);
708 #define VMINMAX_DO(name, compare, element) \
709 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
711 int i; \
713 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
714 if (a->element[i] compare b->element[i]) { \
715 r->element[i] = b->element[i]; \
716 } else { \
717 r->element[i] = a->element[i]; \
721 #define VMINMAX(suffix, element) \
722 VMINMAX_DO(min##suffix, >, element) \
723 VMINMAX_DO(max##suffix, <, element)
724 VMINMAX(sb, s8)
725 VMINMAX(sh, s16)
726 VMINMAX(sw, s32)
727 VMINMAX(ub, u8)
728 VMINMAX(uh, u16)
729 VMINMAX(uw, u32)
730 #undef VMINMAX_DO
731 #undef VMINMAX
733 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
735 int i;
737 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
738 int32_t prod = a->s16[i] * b->s16[i];
739 r->s16[i] = (int16_t) (prod + c->s16[i]);
743 #define VMRG_DO(name, element, highp) \
744 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
746 ppc_avr_t result; \
747 int i; \
748 size_t n_elems = ARRAY_SIZE(r->element); \
750 for (i = 0; i < n_elems / 2; i++) { \
751 if (highp) { \
752 result.element[i*2+HI_IDX] = a->element[i]; \
753 result.element[i*2+LO_IDX] = b->element[i]; \
754 } else { \
755 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
756 b->element[n_elems - i - 1]; \
757 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
758 a->element[n_elems - i - 1]; \
761 *r = result; \
763 #if defined(HOST_WORDS_BIGENDIAN)
764 #define MRGHI 0
765 #define MRGLO 1
766 #else
767 #define MRGHI 1
768 #define MRGLO 0
769 #endif
770 #define VMRG(suffix, element) \
771 VMRG_DO(mrgl##suffix, element, MRGHI) \
772 VMRG_DO(mrgh##suffix, element, MRGLO)
773 VMRG(b, u8)
774 VMRG(h, u16)
775 VMRG(w, u32)
776 #undef VMRG_DO
777 #undef VMRG
778 #undef MRGHI
779 #undef MRGLO
781 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
782 ppc_avr_t *b, ppc_avr_t *c)
784 int32_t prod[16];
785 int i;
787 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
788 prod[i] = (int32_t)a->s8[i] * b->u8[i];
791 VECTOR_FOR_INORDER_I(i, s32) {
792 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
793 prod[4 * i + 2] + prod[4 * i + 3];
797 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
798 ppc_avr_t *b, ppc_avr_t *c)
800 int32_t prod[8];
801 int i;
803 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
804 prod[i] = a->s16[i] * b->s16[i];
807 VECTOR_FOR_INORDER_I(i, s32) {
808 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
812 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
813 ppc_avr_t *b, ppc_avr_t *c)
815 int32_t prod[8];
816 int i;
817 int sat = 0;
819 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
820 prod[i] = (int32_t)a->s16[i] * b->s16[i];
823 VECTOR_FOR_INORDER_I(i, s32) {
824 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
826 r->u32[i] = cvtsdsw(t, &sat);
829 if (sat) {
830 env->vscr |= (1 << VSCR_SAT);
834 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
835 ppc_avr_t *b, ppc_avr_t *c)
837 uint16_t prod[16];
838 int i;
840 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
841 prod[i] = a->u8[i] * b->u8[i];
844 VECTOR_FOR_INORDER_I(i, u32) {
845 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
846 prod[4 * i + 2] + prod[4 * i + 3];
850 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
851 ppc_avr_t *b, ppc_avr_t *c)
853 uint32_t prod[8];
854 int i;
856 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
857 prod[i] = a->u16[i] * b->u16[i];
860 VECTOR_FOR_INORDER_I(i, u32) {
861 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
865 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
866 ppc_avr_t *b, ppc_avr_t *c)
868 uint32_t prod[8];
869 int i;
870 int sat = 0;
872 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
873 prod[i] = a->u16[i] * b->u16[i];
876 VECTOR_FOR_INORDER_I(i, s32) {
877 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
879 r->u32[i] = cvtuduw(t, &sat);
882 if (sat) {
883 env->vscr |= (1 << VSCR_SAT);
887 #define VMUL_DO(name, mul_element, prod_element, evenp) \
888 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
890 int i; \
892 VECTOR_FOR_INORDER_I(i, prod_element) { \
893 if (evenp) { \
894 r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] * \
895 b->mul_element[i * 2 + HI_IDX]; \
896 } else { \
897 r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] * \
898 b->mul_element[i * 2 + LO_IDX]; \
902 #define VMUL(suffix, mul_element, prod_element) \
903 VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
904 VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
905 VMUL(sb, s8, s16)
906 VMUL(sh, s16, s32)
907 VMUL(ub, u8, u16)
908 VMUL(uh, u16, u32)
909 #undef VMUL_DO
910 #undef VMUL
912 void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
913 ppc_avr_t *b, ppc_avr_t *c)
915 int i;
917 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
918 HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
919 /* Need to do the computation is higher precision and round
920 * once at the end. */
921 float64 af, bf, cf, t;
923 af = float32_to_float64(a->f[i], &env->vec_status);
924 bf = float32_to_float64(b->f[i], &env->vec_status);
925 cf = float32_to_float64(c->f[i], &env->vec_status);
926 t = float64_mul(af, cf, &env->vec_status);
927 t = float64_sub(t, bf, &env->vec_status);
928 t = float64_chs(t);
929 r->f[i] = float64_to_float32(t, &env->vec_status);
934 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
935 ppc_avr_t *c)
937 ppc_avr_t result;
938 int i;
940 VECTOR_FOR_INORDER_I(i, u8) {
941 int s = c->u8[i] & 0x1f;
942 #if defined(HOST_WORDS_BIGENDIAN)
943 int index = s & 0xf;
944 #else
945 int index = 15 - (s & 0xf);
946 #endif
948 if (s & 0x10) {
949 result.u8[i] = b->u8[index];
950 } else {
951 result.u8[i] = a->u8[index];
954 *r = result;
957 #if defined(HOST_WORDS_BIGENDIAN)
958 #define PKBIG 1
959 #else
960 #define PKBIG 0
961 #endif
962 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
964 int i, j;
965 ppc_avr_t result;
966 #if defined(HOST_WORDS_BIGENDIAN)
967 const ppc_avr_t *x[2] = { a, b };
968 #else
969 const ppc_avr_t *x[2] = { b, a };
970 #endif
972 VECTOR_FOR_INORDER_I(i, u64) {
973 VECTOR_FOR_INORDER_I(j, u32) {
974 uint32_t e = x[i]->u32[j];
976 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
977 ((e >> 6) & 0x3e0) |
978 ((e >> 3) & 0x1f));
981 *r = result;
984 #define VPK(suffix, from, to, cvt, dosat) \
985 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
986 ppc_avr_t *a, ppc_avr_t *b) \
988 int i; \
989 int sat = 0; \
990 ppc_avr_t result; \
991 ppc_avr_t *a0 = PKBIG ? a : b; \
992 ppc_avr_t *a1 = PKBIG ? b : a; \
994 VECTOR_FOR_INORDER_I(i, from) { \
995 result.to[i] = cvt(a0->from[i], &sat); \
996 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
998 *r = result; \
999 if (dosat && sat) { \
1000 env->vscr |= (1 << VSCR_SAT); \
1003 #define I(x, y) (x)
1004 VPK(shss, s16, s8, cvtshsb, 1)
1005 VPK(shus, s16, u8, cvtshub, 1)
1006 VPK(swss, s32, s16, cvtswsh, 1)
1007 VPK(swus, s32, u16, cvtswuh, 1)
1008 VPK(uhus, u16, u8, cvtuhub, 1)
1009 VPK(uwus, u32, u16, cvtuwuh, 1)
1010 VPK(uhum, u16, u8, I, 0)
1011 VPK(uwum, u32, u16, I, 0)
1012 #undef I
1013 #undef VPK
1014 #undef PKBIG
1016 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1018 int i;
1020 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1021 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1025 #define VRFI(suffix, rounding) \
1026 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1027 ppc_avr_t *b) \
1029 int i; \
1030 float_status s = env->vec_status; \
1032 set_float_rounding_mode(rounding, &s); \
1033 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1034 r->f[i] = float32_round_to_int (b->f[i], &s); \
1037 VRFI(n, float_round_nearest_even)
1038 VRFI(m, float_round_down)
1039 VRFI(p, float_round_up)
1040 VRFI(z, float_round_to_zero)
1041 #undef VRFI
1043 #define VROTATE(suffix, element) \
1044 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1046 int i; \
1048 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1049 unsigned int mask = ((1 << \
1050 (3 + (sizeof(a->element[0]) >> 1))) \
1051 - 1); \
1052 unsigned int shift = b->element[i] & mask; \
1053 r->element[i] = (a->element[i] << shift) | \
1054 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1057 VROTATE(b, u8)
1058 VROTATE(h, u16)
1059 VROTATE(w, u32)
1060 #undef VROTATE
1062 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1064 int i;
1066 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1067 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1069 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1073 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1074 ppc_avr_t *c)
1076 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1077 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1080 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1082 int i;
1084 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1085 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1089 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1091 int i;
1093 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1094 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1098 #if defined(HOST_WORDS_BIGENDIAN)
1099 #define LEFT 0
1100 #define RIGHT 1
1101 #else
1102 #define LEFT 1
1103 #define RIGHT 0
1104 #endif
1105 /* The specification says that the results are undefined if all of the
1106 * shift counts are not identical. We check to make sure that they are
1107 * to conform to what real hardware appears to do. */
1108 #define VSHIFT(suffix, leftp) \
1109 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1111 int shift = b->u8[LO_IDX*15] & 0x7; \
1112 int doit = 1; \
1113 int i; \
1115 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1116 doit = doit && ((b->u8[i] & 0x7) == shift); \
1118 if (doit) { \
1119 if (shift == 0) { \
1120 *r = *a; \
1121 } else if (leftp) { \
1122 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1124 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1125 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1126 } else { \
1127 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1129 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1130 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1134 VSHIFT(l, LEFT)
1135 VSHIFT(r, RIGHT)
1136 #undef VSHIFT
1137 #undef LEFT
1138 #undef RIGHT
1140 #define VSL(suffix, element) \
1141 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1143 int i; \
1145 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1146 unsigned int mask = ((1 << \
1147 (3 + (sizeof(a->element[0]) >> 1))) \
1148 - 1); \
1149 unsigned int shift = b->element[i] & mask; \
1151 r->element[i] = a->element[i] << shift; \
1154 VSL(b, u8)
1155 VSL(h, u16)
1156 VSL(w, u32)
1157 #undef VSL
1159 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1161 int sh = shift & 0xf;
1162 int i;
1163 ppc_avr_t result;
1165 #if defined(HOST_WORDS_BIGENDIAN)
1166 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1167 int index = sh + i;
1168 if (index > 0xf) {
1169 result.u8[i] = b->u8[index - 0x10];
1170 } else {
1171 result.u8[i] = a->u8[index];
1174 #else
1175 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1176 int index = (16 - sh) + i;
1177 if (index > 0xf) {
1178 result.u8[i] = a->u8[index - 0x10];
1179 } else {
1180 result.u8[i] = b->u8[index];
1183 #endif
1184 *r = result;
1187 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1189 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1191 #if defined(HOST_WORDS_BIGENDIAN)
1192 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1193 memset(&r->u8[16-sh], 0, sh);
1194 #else
1195 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1196 memset(&r->u8[0], 0, sh);
1197 #endif
1200 /* Experimental testing shows that hardware masks the immediate. */
1201 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1202 #if defined(HOST_WORDS_BIGENDIAN)
1203 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1204 #else
1205 #define SPLAT_ELEMENT(element) \
1206 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1207 #endif
1208 #define VSPLT(suffix, element) \
1209 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1211 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1212 int i; \
1214 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1215 r->element[i] = s; \
1218 VSPLT(b, u8)
1219 VSPLT(h, u16)
1220 VSPLT(w, u32)
1221 #undef VSPLT
1222 #undef SPLAT_ELEMENT
1223 #undef _SPLAT_MASKED
1225 #define VSPLTI(suffix, element, splat_type) \
1226 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1228 splat_type x = (int8_t)(splat << 3) >> 3; \
1229 int i; \
1231 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1232 r->element[i] = x; \
1235 VSPLTI(b, s8, int8_t)
1236 VSPLTI(h, s16, int16_t)
1237 VSPLTI(w, s32, int32_t)
1238 #undef VSPLTI
1240 #define VSR(suffix, element) \
1241 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1243 int i; \
1245 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1246 unsigned int mask = ((1 << \
1247 (3 + (sizeof(a->element[0]) >> 1))) \
1248 - 1); \
1249 unsigned int shift = b->element[i] & mask; \
1251 r->element[i] = a->element[i] >> shift; \
1254 VSR(ab, s8)
1255 VSR(ah, s16)
1256 VSR(aw, s32)
1257 VSR(b, u8)
1258 VSR(h, u16)
1259 VSR(w, u32)
1260 #undef VSR
1262 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1264 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1266 #if defined(HOST_WORDS_BIGENDIAN)
1267 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1268 memset(&r->u8[0], 0, sh);
1269 #else
1270 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1271 memset(&r->u8[16 - sh], 0, sh);
1272 #endif
1275 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1277 int i;
1279 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1280 r->u32[i] = a->u32[i] >= b->u32[i];
1284 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1286 int64_t t;
1287 int i, upper;
1288 ppc_avr_t result;
1289 int sat = 0;
1291 #if defined(HOST_WORDS_BIGENDIAN)
1292 upper = ARRAY_SIZE(r->s32)-1;
1293 #else
1294 upper = 0;
1295 #endif
1296 t = (int64_t)b->s32[upper];
1297 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1298 t += a->s32[i];
1299 result.s32[i] = 0;
1301 result.s32[upper] = cvtsdsw(t, &sat);
1302 *r = result;
1304 if (sat) {
1305 env->vscr |= (1 << VSCR_SAT);
1309 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1311 int i, j, upper;
1312 ppc_avr_t result;
1313 int sat = 0;
1315 #if defined(HOST_WORDS_BIGENDIAN)
1316 upper = 1;
1317 #else
1318 upper = 0;
1319 #endif
1320 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1321 int64_t t = (int64_t)b->s32[upper + i * 2];
1323 result.u64[i] = 0;
1324 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1325 t += a->s32[2 * i + j];
1327 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1330 *r = result;
1331 if (sat) {
1332 env->vscr |= (1 << VSCR_SAT);
1336 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1338 int i, j;
1339 int sat = 0;
1341 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1342 int64_t t = (int64_t)b->s32[i];
1344 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1345 t += a->s8[4 * i + j];
1347 r->s32[i] = cvtsdsw(t, &sat);
1350 if (sat) {
1351 env->vscr |= (1 << VSCR_SAT);
1355 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1357 int sat = 0;
1358 int i;
1360 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1361 int64_t t = (int64_t)b->s32[i];
1363 t += a->s16[2 * i] + a->s16[2 * i + 1];
1364 r->s32[i] = cvtsdsw(t, &sat);
1367 if (sat) {
1368 env->vscr |= (1 << VSCR_SAT);
1372 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1374 int i, j;
1375 int sat = 0;
1377 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1378 uint64_t t = (uint64_t)b->u32[i];
1380 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1381 t += a->u8[4 * i + j];
1383 r->u32[i] = cvtuduw(t, &sat);
1386 if (sat) {
1387 env->vscr |= (1 << VSCR_SAT);
1391 #if defined(HOST_WORDS_BIGENDIAN)
1392 #define UPKHI 1
1393 #define UPKLO 0
1394 #else
1395 #define UPKHI 0
1396 #define UPKLO 1
1397 #endif
1398 #define VUPKPX(suffix, hi) \
1399 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1401 int i; \
1402 ppc_avr_t result; \
1404 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1405 uint16_t e = b->u16[hi ? i : i+4]; \
1406 uint8_t a = (e >> 15) ? 0xff : 0; \
1407 uint8_t r = (e >> 10) & 0x1f; \
1408 uint8_t g = (e >> 5) & 0x1f; \
1409 uint8_t b = e & 0x1f; \
1411 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1413 *r = result; \
1415 VUPKPX(lpx, UPKLO)
1416 VUPKPX(hpx, UPKHI)
1417 #undef VUPKPX
1419 #define VUPK(suffix, unpacked, packee, hi) \
1420 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1422 int i; \
1423 ppc_avr_t result; \
1425 if (hi) { \
1426 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1427 result.unpacked[i] = b->packee[i]; \
1429 } else { \
1430 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1431 i++) { \
1432 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1435 *r = result; \
1437 VUPK(hsb, s16, s8, UPKHI)
1438 VUPK(hsh, s32, s16, UPKHI)
1439 VUPK(lsb, s16, s8, UPKLO)
1440 VUPK(lsh, s32, s16, UPKLO)
1441 #undef VUPK
1442 #undef UPKHI
1443 #undef UPKLO
1445 #undef DO_HANDLE_NAN
1446 #undef HANDLE_NAN1
1447 #undef HANDLE_NAN2
1448 #undef HANDLE_NAN3
1449 #undef VECTOR_FOR_INORDER_I
1450 #undef HI_IDX
1451 #undef LO_IDX
1453 /*****************************************************************************/
1454 /* SPE extension helpers */
1455 /* Use a table to make this quicker */
1456 static const uint8_t hbrev[16] = {
1457 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1458 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1461 static inline uint8_t byte_reverse(uint8_t val)
1463 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1466 static inline uint32_t word_reverse(uint32_t val)
1468 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1469 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1472 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1473 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1475 uint32_t a, b, d, mask;
1477 mask = UINT32_MAX >> (32 - MASKBITS);
1478 a = arg1 & mask;
1479 b = arg2 & mask;
1480 d = word_reverse(1 + word_reverse(a | ~b));
1481 return (arg1 & ~mask) | (d & b);
1484 uint32_t helper_cntlsw32(uint32_t val)
1486 if (val & 0x80000000) {
1487 return clz32(~val);
1488 } else {
1489 return clz32(val);
1493 uint32_t helper_cntlzw32(uint32_t val)
1495 return clz32(val);
1498 /* 440 specific */
1499 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1500 target_ulong low, uint32_t update_Rc)
1502 target_ulong mask;
1503 int i;
1505 i = 1;
1506 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1507 if ((high & mask) == 0) {
1508 if (update_Rc) {
1509 env->crf[0] = 0x4;
1511 goto done;
1513 i++;
1515 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1516 if ((low & mask) == 0) {
1517 if (update_Rc) {
1518 env->crf[0] = 0x8;
1520 goto done;
1522 i++;
1524 if (update_Rc) {
1525 env->crf[0] = 0x2;
1527 done:
1528 env->xer = (env->xer & ~0x7F) | i;
1529 if (update_Rc) {
1530 env->crf[0] |= xer_so;
1532 return i;