2 * QEMU TCG support -- s390x vector floating point instruction support
4 * Copyright (C) 2019 Red Hat Inc
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
14 #include "s390x-internal.h"
16 #include "tcg_s390x.h"
17 #include "tcg/tcg-gvec-desc.h"
18 #include "exec/exec-all.h"
19 #include "exec/helper-proto.h"
20 #include "fpu/softfloat.h"
22 #define VIC_INVALID 0x1
23 #define VIC_DIVBYZERO 0x2
24 #define VIC_OVERFLOW 0x3
25 #define VIC_UNDERFLOW 0x4
26 #define VIC_INEXACT 0x5
28 /* returns the VEX. If the VEX is 0, there is no trap */
29 static uint8_t check_ieee_exc(CPUS390XState
*env
, uint8_t enr
, bool XxC
,
32 uint8_t vece_exc
= 0, trap_exc
;
35 /* Retrieve and clear the softfloat exceptions */
36 qemu_exc
= env
->fpu_status
.float_exception_flags
;
40 env
->fpu_status
.float_exception_flags
= 0;
42 vece_exc
= s390_softfloat_exc_to_ieee(qemu_exc
);
44 /* Add them to the vector-wide s390x exception bits */
47 /* Check for traps and construct the VXC */
48 trap_exc
= vece_exc
& env
->fpc
>> 24;
50 if (trap_exc
& S390_IEEE_MASK_INVALID
) {
51 return enr
<< 4 | VIC_INVALID
;
52 } else if (trap_exc
& S390_IEEE_MASK_DIVBYZERO
) {
53 return enr
<< 4 | VIC_DIVBYZERO
;
54 } else if (trap_exc
& S390_IEEE_MASK_OVERFLOW
) {
55 return enr
<< 4 | VIC_OVERFLOW
;
56 } else if (trap_exc
& S390_IEEE_MASK_UNDERFLOW
) {
57 return enr
<< 4 | VIC_UNDERFLOW
;
59 g_assert(trap_exc
& S390_IEEE_MASK_INEXACT
);
60 /* inexact has lowest priority on traps */
61 return enr
<< 4 | VIC_INEXACT
;
67 static void handle_ieee_exc(CPUS390XState
*env
, uint8_t vxc
, uint8_t vec_exc
,
71 /* on traps, the fpc flags are not updated, instruction is suppressed */
72 tcg_s390_vector_exception(env
, vxc
, retaddr
);
75 /* indicate exceptions for all elements combined */
76 env
->fpc
|= vec_exc
<< 16;
80 static float32
s390_vec_read_float32(const S390Vector
*v
, uint8_t enr
)
82 return make_float32(s390_vec_read_element32(v
, enr
));
85 static float64
s390_vec_read_float64(const S390Vector
*v
, uint8_t enr
)
87 return make_float64(s390_vec_read_element64(v
, enr
));
90 static float128
s390_vec_read_float128(const S390Vector
*v
)
92 return make_float128(s390_vec_read_element64(v
, 0),
93 s390_vec_read_element64(v
, 1));
96 static void s390_vec_write_float32(S390Vector
*v
, uint8_t enr
, float32 data
)
98 return s390_vec_write_element32(v
, enr
, data
);
101 static void s390_vec_write_float64(S390Vector
*v
, uint8_t enr
, float64 data
)
103 return s390_vec_write_element64(v
, enr
, data
);
106 static void s390_vec_write_float128(S390Vector
*v
, float128 data
)
108 s390_vec_write_element64(v
, 0, data
.high
);
109 s390_vec_write_element64(v
, 1, data
.low
);
112 typedef float32 (*vop32_2_fn
)(float32 a
, float_status
*s
);
113 static void vop32_2(S390Vector
*v1
, const S390Vector
*v2
, CPUS390XState
*env
,
114 bool s
, bool XxC
, uint8_t erm
, vop32_2_fn fn
,
117 uint8_t vxc
, vec_exc
= 0;
121 old_mode
= s390_swap_bfp_rounding_mode(env
, erm
);
122 for (i
= 0; i
< 4; i
++) {
123 const float32 a
= s390_vec_read_float32(v2
, i
);
125 s390_vec_write_float32(&tmp
, i
, fn(a
, &env
->fpu_status
));
126 vxc
= check_ieee_exc(env
, i
, XxC
, &vec_exc
);
131 s390_restore_bfp_rounding_mode(env
, old_mode
);
132 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
136 typedef float64 (*vop64_2_fn
)(float64 a
, float_status
*s
);
137 static void vop64_2(S390Vector
*v1
, const S390Vector
*v2
, CPUS390XState
*env
,
138 bool s
, bool XxC
, uint8_t erm
, vop64_2_fn fn
,
141 uint8_t vxc
, vec_exc
= 0;
145 old_mode
= s390_swap_bfp_rounding_mode(env
, erm
);
146 for (i
= 0; i
< 2; i
++) {
147 const float64 a
= s390_vec_read_float64(v2
, i
);
149 s390_vec_write_float64(&tmp
, i
, fn(a
, &env
->fpu_status
));
150 vxc
= check_ieee_exc(env
, i
, XxC
, &vec_exc
);
155 s390_restore_bfp_rounding_mode(env
, old_mode
);
156 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
160 typedef float128 (*vop128_2_fn
)(float128 a
, float_status
*s
);
161 static void vop128_2(S390Vector
*v1
, const S390Vector
*v2
, CPUS390XState
*env
,
162 bool s
, bool XxC
, uint8_t erm
, vop128_2_fn fn
,
165 const float128 a
= s390_vec_read_float128(v2
);
166 uint8_t vxc
, vec_exc
= 0;
170 old_mode
= s390_swap_bfp_rounding_mode(env
, erm
);
171 s390_vec_write_float128(&tmp
, fn(a
, &env
->fpu_status
));
172 vxc
= check_ieee_exc(env
, 0, XxC
, &vec_exc
);
173 s390_restore_bfp_rounding_mode(env
, old_mode
);
174 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
178 static float32
vcdg32(float32 a
, float_status
*s
)
180 return int32_to_float32(a
, s
);
183 static float32
vcdlg32(float32 a
, float_status
*s
)
185 return uint32_to_float32(a
, s
);
188 static float32
vcgd32(float32 a
, float_status
*s
)
190 const float32 tmp
= float32_to_int32(a
, s
);
192 return float32_is_any_nan(a
) ? INT32_MIN
: tmp
;
195 static float32
vclgd32(float32 a
, float_status
*s
)
197 const float32 tmp
= float32_to_uint32(a
, s
);
199 return float32_is_any_nan(a
) ? 0 : tmp
;
202 static float64
vcdg64(float64 a
, float_status
*s
)
204 return int64_to_float64(a
, s
);
207 static float64
vcdlg64(float64 a
, float_status
*s
)
209 return uint64_to_float64(a
, s
);
212 static float64
vcgd64(float64 a
, float_status
*s
)
214 const float64 tmp
= float64_to_int64(a
, s
);
216 return float64_is_any_nan(a
) ? INT64_MIN
: tmp
;
219 static float64
vclgd64(float64 a
, float_status
*s
)
221 const float64 tmp
= float64_to_uint64(a
, s
);
223 return float64_is_any_nan(a
) ? 0 : tmp
;
226 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \
227 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \
230 const uint8_t erm = extract32(simd_data(desc), 4, 4); \
231 const bool se = extract32(simd_data(desc), 3, 1); \
232 const bool XxC = extract32(simd_data(desc), 2, 1); \
234 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \
237 #define DEF_GVEC_VOP2_32(NAME) \
238 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
240 #define DEF_GVEC_VOP2_64(NAME) \
241 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
243 #define DEF_GVEC_VOP2(NAME, OP) \
244 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \
245 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \
246 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
248 DEF_GVEC_VOP2_32(vcdg
)
249 DEF_GVEC_VOP2_32(vcdlg
)
250 DEF_GVEC_VOP2_32(vcgd
)
251 DEF_GVEC_VOP2_32(vclgd
)
252 DEF_GVEC_VOP2_64(vcdg
)
253 DEF_GVEC_VOP2_64(vcdlg
)
254 DEF_GVEC_VOP2_64(vcgd
)
255 DEF_GVEC_VOP2_64(vclgd
)
256 DEF_GVEC_VOP2(vfi
, round_to_int
)
257 DEF_GVEC_VOP2(vfsq
, sqrt
)
259 typedef float32 (*vop32_3_fn
)(float32 a
, float32 b
, float_status
*s
);
260 static void vop32_3(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
261 CPUS390XState
*env
, bool s
, vop32_3_fn fn
,
264 uint8_t vxc
, vec_exc
= 0;
268 for (i
= 0; i
< 4; i
++) {
269 const float32 a
= s390_vec_read_float32(v2
, i
);
270 const float32 b
= s390_vec_read_float32(v3
, i
);
272 s390_vec_write_float32(&tmp
, i
, fn(a
, b
, &env
->fpu_status
));
273 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
278 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
282 typedef float64 (*vop64_3_fn
)(float64 a
, float64 b
, float_status
*s
);
283 static void vop64_3(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
284 CPUS390XState
*env
, bool s
, vop64_3_fn fn
,
287 uint8_t vxc
, vec_exc
= 0;
291 for (i
= 0; i
< 2; i
++) {
292 const float64 a
= s390_vec_read_float64(v2
, i
);
293 const float64 b
= s390_vec_read_float64(v3
, i
);
295 s390_vec_write_float64(&tmp
, i
, fn(a
, b
, &env
->fpu_status
));
296 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
301 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
305 typedef float128 (*vop128_3_fn
)(float128 a
, float128 b
, float_status
*s
);
306 static void vop128_3(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
307 CPUS390XState
*env
, bool s
, vop128_3_fn fn
,
310 const float128 a
= s390_vec_read_float128(v2
);
311 const float128 b
= s390_vec_read_float128(v3
);
312 uint8_t vxc
, vec_exc
= 0;
315 s390_vec_write_float128(&tmp
, fn(a
, b
, &env
->fpu_status
));
316 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
317 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
321 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \
322 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
323 CPUS390XState *env, uint32_t desc) \
325 const bool se = extract32(simd_data(desc), 3, 1); \
327 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \
330 #define DEF_GVEC_VOP3(NAME, OP) \
331 DEF_GVEC_VOP3_B(NAME, OP, 32) \
332 DEF_GVEC_VOP3_B(NAME, OP, 64) \
333 DEF_GVEC_VOP3_B(NAME, OP, 128)
335 DEF_GVEC_VOP3(vfa
, add
)
336 DEF_GVEC_VOP3(vfs
, sub
)
337 DEF_GVEC_VOP3(vfd
, div
)
338 DEF_GVEC_VOP3(vfm
, mul
)
340 static int wfc32(const S390Vector
*v1
, const S390Vector
*v2
,
341 CPUS390XState
*env
, bool signal
, uintptr_t retaddr
)
343 /* only the zero-indexed elements are compared */
344 const float32 a
= s390_vec_read_float32(v1
, 0);
345 const float32 b
= s390_vec_read_float32(v2
, 0);
346 uint8_t vxc
, vec_exc
= 0;
350 cmp
= float32_compare(a
, b
, &env
->fpu_status
);
352 cmp
= float32_compare_quiet(a
, b
, &env
->fpu_status
);
354 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
355 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
357 return float_comp_to_cc(env
, cmp
);
360 static int wfc64(const S390Vector
*v1
, const S390Vector
*v2
,
361 CPUS390XState
*env
, bool signal
, uintptr_t retaddr
)
363 /* only the zero-indexed elements are compared */
364 const float64 a
= s390_vec_read_float64(v1
, 0);
365 const float64 b
= s390_vec_read_float64(v2
, 0);
366 uint8_t vxc
, vec_exc
= 0;
370 cmp
= float64_compare(a
, b
, &env
->fpu_status
);
372 cmp
= float64_compare_quiet(a
, b
, &env
->fpu_status
);
374 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
375 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
377 return float_comp_to_cc(env
, cmp
);
380 static int wfc128(const S390Vector
*v1
, const S390Vector
*v2
,
381 CPUS390XState
*env
, bool signal
, uintptr_t retaddr
)
383 /* only the zero-indexed elements are compared */
384 const float128 a
= s390_vec_read_float128(v1
);
385 const float128 b
= s390_vec_read_float128(v2
);
386 uint8_t vxc
, vec_exc
= 0;
390 cmp
= float128_compare(a
, b
, &env
->fpu_status
);
392 cmp
= float128_compare_quiet(a
, b
, &env
->fpu_status
);
394 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
395 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
397 return float_comp_to_cc(env
, cmp
);
400 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \
401 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \
402 CPUS390XState *env, uint32_t desc) \
404 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \
407 #define DEF_GVEC_WFC(NAME, SIGNAL) \
408 DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \
409 DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \
410 DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
412 DEF_GVEC_WFC(wfc
, false)
413 DEF_GVEC_WFC(wfk
, true)
415 typedef bool (*vfc32_fn
)(float32 a
, float32 b
, float_status
*status
);
416 static int vfc32(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
417 CPUS390XState
*env
, bool s
, vfc32_fn fn
, uintptr_t retaddr
)
419 uint8_t vxc
, vec_exc
= 0;
424 for (i
= 0; i
< 4; i
++) {
425 const float32 a
= s390_vec_read_float32(v2
, i
);
426 const float32 b
= s390_vec_read_float32(v3
, i
);
428 /* swap the order of the parameters, so we can use existing functions */
429 if (fn(b
, a
, &env
->fpu_status
)) {
431 s390_vec_write_element32(&tmp
, i
, -1u);
433 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
439 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
442 return s
|| match
== 4 ? 0 : 1;
447 typedef bool (*vfc64_fn
)(float64 a
, float64 b
, float_status
*status
);
448 static int vfc64(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
449 CPUS390XState
*env
, bool s
, vfc64_fn fn
, uintptr_t retaddr
)
451 uint8_t vxc
, vec_exc
= 0;
456 for (i
= 0; i
< 2; i
++) {
457 const float64 a
= s390_vec_read_float64(v2
, i
);
458 const float64 b
= s390_vec_read_float64(v3
, i
);
460 /* swap the order of the parameters, so we can use existing functions */
461 if (fn(b
, a
, &env
->fpu_status
)) {
463 s390_vec_write_element64(&tmp
, i
, -1ull);
465 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
471 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
474 return s
|| match
== 2 ? 0 : 1;
479 typedef bool (*vfc128_fn
)(float128 a
, float128 b
, float_status
*status
);
480 static int vfc128(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
481 CPUS390XState
*env
, bool s
, vfc128_fn fn
, uintptr_t retaddr
)
483 const float128 a
= s390_vec_read_float128(v2
);
484 const float128 b
= s390_vec_read_float128(v3
);
485 uint8_t vxc
, vec_exc
= 0;
489 /* swap the order of the parameters, so we can use existing functions */
490 if (fn(b
, a
, &env
->fpu_status
)) {
492 s390_vec_write_element64(&tmp
, 0, -1ull);
493 s390_vec_write_element64(&tmp
, 1, -1ull);
495 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
496 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
498 return match
? 0 : 3;
501 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \
502 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
503 CPUS390XState *env, uint32_t desc) \
505 const bool se = extract32(simd_data(desc), 3, 1); \
506 const bool sq = extract32(simd_data(desc), 2, 1); \
507 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
509 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
512 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \
513 CPUS390XState *env, uint32_t desc) \
515 const bool se = extract32(simd_data(desc), 3, 1); \
516 const bool sq = extract32(simd_data(desc), 2, 1); \
517 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
519 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
522 #define DEF_GVEC_VFC(NAME, OP) \
523 DEF_GVEC_VFC_B(NAME, OP, 32) \
524 DEF_GVEC_VFC_B(NAME, OP, 64) \
525 DEF_GVEC_VFC_B(NAME, OP, 128) \
527 DEF_GVEC_VFC(vfce, eq)
528 DEF_GVEC_VFC(vfch
, lt
)
529 DEF_GVEC_VFC(vfche
, le
)
531 void HELPER(gvec_vfll32
)(void *v1
, const void *v2
, CPUS390XState
*env
,
534 const bool s
= extract32(simd_data(desc
), 3, 1);
535 uint8_t vxc
, vec_exc
= 0;
539 for (i
= 0; i
< 2; i
++) {
540 /* load from even element */
541 const float32 a
= s390_vec_read_element32(v2
, i
* 2);
542 const uint64_t ret
= float32_to_float64(a
, &env
->fpu_status
);
544 s390_vec_write_element64(&tmp
, i
, ret
);
545 /* indicate the source element */
546 vxc
= check_ieee_exc(env
, i
* 2, false, &vec_exc
);
551 handle_ieee_exc(env
, vxc
, vec_exc
, GETPC());
552 *(S390Vector
*)v1
= tmp
;
555 void HELPER(gvec_vfll64
)(void *v1
, const void *v2
, CPUS390XState
*env
,
558 /* load from even element */
559 const float128 ret
= float64_to_float128(s390_vec_read_float64(v2
, 0),
561 uint8_t vxc
, vec_exc
= 0;
563 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
564 handle_ieee_exc(env
, vxc
, vec_exc
, GETPC());
565 s390_vec_write_float128(v1
, ret
);
568 void HELPER(gvec_vflr64
)(void *v1
, const void *v2
, CPUS390XState
*env
,
571 const uint8_t erm
= extract32(simd_data(desc
), 4, 4);
572 const bool s
= extract32(simd_data(desc
), 3, 1);
573 const bool XxC
= extract32(simd_data(desc
), 2, 1);
574 uint8_t vxc
, vec_exc
= 0;
578 old_mode
= s390_swap_bfp_rounding_mode(env
, erm
);
579 for (i
= 0; i
< 2; i
++) {
580 float64 a
= s390_vec_read_element64(v2
, i
);
581 uint32_t ret
= float64_to_float32(a
, &env
->fpu_status
);
583 /* place at even element */
584 s390_vec_write_element32(&tmp
, i
* 2, ret
);
585 /* indicate the source element */
586 vxc
= check_ieee_exc(env
, i
, XxC
, &vec_exc
);
591 s390_restore_bfp_rounding_mode(env
, old_mode
);
592 handle_ieee_exc(env
, vxc
, vec_exc
, GETPC());
593 *(S390Vector
*)v1
= tmp
;
596 void HELPER(gvec_vflr128
)(void *v1
, const void *v2
, CPUS390XState
*env
,
599 const uint8_t erm
= extract32(simd_data(desc
), 4, 4);
600 const bool XxC
= extract32(simd_data(desc
), 2, 1);
601 uint8_t vxc
, vec_exc
= 0;
605 old_mode
= s390_swap_bfp_rounding_mode(env
, erm
);
606 ret
= float128_to_float64(s390_vec_read_float128(v2
), &env
->fpu_status
);
607 vxc
= check_ieee_exc(env
, 0, XxC
, &vec_exc
);
608 s390_restore_bfp_rounding_mode(env
, old_mode
);
609 handle_ieee_exc(env
, vxc
, vec_exc
, GETPC());
611 /* place at even element, odd element is unpredictable */
612 s390_vec_write_float64(v1
, 0, ret
);
615 static void vfma32(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
616 const S390Vector
*v4
, CPUS390XState
*env
, bool s
, int flags
,
619 uint8_t vxc
, vec_exc
= 0;
623 for (i
= 0; i
< 4; i
++) {
624 const float32 a
= s390_vec_read_float32(v2
, i
);
625 const float32 b
= s390_vec_read_float32(v3
, i
);
626 const float32 c
= s390_vec_read_float32(v4
, i
);
627 float32 ret
= float32_muladd(a
, b
, c
, flags
, &env
->fpu_status
);
629 s390_vec_write_float32(&tmp
, i
, ret
);
630 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
635 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
639 static void vfma64(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
640 const S390Vector
*v4
, CPUS390XState
*env
, bool s
, int flags
,
643 uint8_t vxc
, vec_exc
= 0;
647 for (i
= 0; i
< 2; i
++) {
648 const float64 a
= s390_vec_read_float64(v2
, i
);
649 const float64 b
= s390_vec_read_float64(v3
, i
);
650 const float64 c
= s390_vec_read_float64(v4
, i
);
651 const float64 ret
= float64_muladd(a
, b
, c
, flags
, &env
->fpu_status
);
653 s390_vec_write_float64(&tmp
, i
, ret
);
654 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
659 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
663 static void vfma128(S390Vector
*v1
, const S390Vector
*v2
, const S390Vector
*v3
,
664 const S390Vector
*v4
, CPUS390XState
*env
, bool s
, int flags
,
667 const float128 a
= s390_vec_read_float128(v2
);
668 const float128 b
= s390_vec_read_float128(v3
);
669 const float128 c
= s390_vec_read_float128(v4
);
670 uint8_t vxc
, vec_exc
= 0;
673 ret
= float128_muladd(a
, b
, c
, flags
, &env
->fpu_status
);
674 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
675 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
676 s390_vec_write_float128(v1
, ret
);
679 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \
680 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
681 const void *v4, CPUS390XState *env, \
684 const bool se = extract32(simd_data(desc), 3, 1); \
686 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \
689 #define DEF_GVEC_VFMA(NAME, FLAGS) \
690 DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \
691 DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \
692 DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
694 DEF_GVEC_VFMA(vfma
, 0)
695 DEF_GVEC_VFMA(vfms
, float_muladd_negate_c
)
696 DEF_GVEC_VFMA(vfnma
, float_muladd_negate_result
)
697 DEF_GVEC_VFMA(vfnms
, float_muladd_negate_c
| float_muladd_negate_result
)
699 void HELPER(gvec_vftci32
)(void *v1
, const void *v2
, CPUS390XState
*env
,
702 uint16_t i3
= extract32(simd_data(desc
), 4, 12);
703 bool s
= extract32(simd_data(desc
), 3, 1);
706 for (i
= 0; i
< 4; i
++) {
707 float32 a
= s390_vec_read_float32(v2
, i
);
709 if (float32_dcmask(env
, a
) & i3
) {
711 s390_vec_write_element32(v1
, i
, -1u);
713 s390_vec_write_element32(v1
, i
, 0);
720 if (match
== 4 || (s
&& match
)) {
729 void HELPER(gvec_vftci64
)(void *v1
, const void *v2
, CPUS390XState
*env
,
732 const uint16_t i3
= extract32(simd_data(desc
), 4, 12);
733 const bool s
= extract32(simd_data(desc
), 3, 1);
736 for (i
= 0; i
< 2; i
++) {
737 const float64 a
= s390_vec_read_float64(v2
, i
);
739 if (float64_dcmask(env
, a
) & i3
) {
741 s390_vec_write_element64(v1
, i
, -1ull);
743 s390_vec_write_element64(v1
, i
, 0);
750 if (match
== 2 || (s
&& match
)) {
759 void HELPER(gvec_vftci128
)(void *v1
, const void *v2
, CPUS390XState
*env
,
762 const float128 a
= s390_vec_read_float128(v2
);
763 uint16_t i3
= extract32(simd_data(desc
), 4, 12);
765 if (float128_dcmask(env
, a
) & i3
) {
767 s390_vec_write_element64(v1
, 0, -1ull);
768 s390_vec_write_element64(v1
, 1, -1ull);
771 s390_vec_write_element64(v1
, 0, 0);
772 s390_vec_write_element64(v1
, 1, 0);
776 typedef enum S390MinMaxType
{
777 S390_MINMAX_TYPE_IEEE
= 0,
778 S390_MINMAX_TYPE_JAVA
,
779 S390_MINMAX_TYPE_C_MACRO
,
780 S390_MINMAX_TYPE_CPP
,
784 typedef enum S390MinMaxRes
{
785 S390_MINMAX_RES_MINMAX
= 0,
788 S390_MINMAX_RES_SILENCE_A
,
789 S390_MINMAX_RES_SILENCE_B
,
792 static S390MinMaxRes
vfmin_res(uint16_t dcmask_a
, uint16_t dcmask_b
,
793 S390MinMaxType type
, float_status
*s
)
795 const bool neg_a
= dcmask_a
& DCMASK_NEGATIVE
;
796 const bool nan_a
= dcmask_a
& DCMASK_NAN
;
797 const bool nan_b
= dcmask_b
& DCMASK_NAN
;
799 g_assert(type
> S390_MINMAX_TYPE_IEEE
&& type
<= S390_MINMAX_TYPE_F
);
801 if (unlikely((dcmask_a
| dcmask_b
) & DCMASK_NAN
)) {
802 const bool sig_a
= dcmask_a
& DCMASK_SIGNALING_NAN
;
803 const bool sig_b
= dcmask_b
& DCMASK_SIGNALING_NAN
;
805 if ((dcmask_a
| dcmask_b
) & DCMASK_SIGNALING_NAN
) {
806 s
->float_exception_flags
|= float_flag_invalid
;
809 case S390_MINMAX_TYPE_JAVA
:
811 return S390_MINMAX_RES_SILENCE_A
;
813 return S390_MINMAX_RES_SILENCE_B
;
815 return nan_a
? S390_MINMAX_RES_A
: S390_MINMAX_RES_B
;
816 case S390_MINMAX_TYPE_F
:
817 return nan_b
? S390_MINMAX_RES_A
: S390_MINMAX_RES_B
;
818 case S390_MINMAX_TYPE_C_MACRO
:
819 s
->float_exception_flags
|= float_flag_invalid
;
820 return S390_MINMAX_RES_B
;
821 case S390_MINMAX_TYPE_CPP
:
822 s
->float_exception_flags
|= float_flag_invalid
;
823 return S390_MINMAX_RES_A
;
825 g_assert_not_reached();
827 } else if (unlikely((dcmask_a
& DCMASK_ZERO
) && (dcmask_b
& DCMASK_ZERO
))) {
829 case S390_MINMAX_TYPE_JAVA
:
830 return neg_a
? S390_MINMAX_RES_A
: S390_MINMAX_RES_B
;
831 case S390_MINMAX_TYPE_C_MACRO
:
832 return S390_MINMAX_RES_B
;
833 case S390_MINMAX_TYPE_F
:
834 return !neg_a
? S390_MINMAX_RES_B
: S390_MINMAX_RES_A
;
835 case S390_MINMAX_TYPE_CPP
:
836 return S390_MINMAX_RES_A
;
838 g_assert_not_reached();
841 return S390_MINMAX_RES_MINMAX
;
844 static S390MinMaxRes
vfmax_res(uint16_t dcmask_a
, uint16_t dcmask_b
,
845 S390MinMaxType type
, float_status
*s
)
847 g_assert(type
> S390_MINMAX_TYPE_IEEE
&& type
<= S390_MINMAX_TYPE_F
);
849 if (unlikely((dcmask_a
| dcmask_b
) & DCMASK_NAN
)) {
850 const bool sig_a
= dcmask_a
& DCMASK_SIGNALING_NAN
;
851 const bool sig_b
= dcmask_b
& DCMASK_SIGNALING_NAN
;
852 const bool nan_a
= dcmask_a
& DCMASK_NAN
;
853 const bool nan_b
= dcmask_b
& DCMASK_NAN
;
855 if ((dcmask_a
| dcmask_b
) & DCMASK_SIGNALING_NAN
) {
856 s
->float_exception_flags
|= float_flag_invalid
;
859 case S390_MINMAX_TYPE_JAVA
:
861 return S390_MINMAX_RES_SILENCE_A
;
863 return S390_MINMAX_RES_SILENCE_B
;
865 return nan_a
? S390_MINMAX_RES_A
: S390_MINMAX_RES_B
;
866 case S390_MINMAX_TYPE_F
:
867 return nan_b
? S390_MINMAX_RES_A
: S390_MINMAX_RES_B
;
868 case S390_MINMAX_TYPE_C_MACRO
:
869 s
->float_exception_flags
|= float_flag_invalid
;
870 return S390_MINMAX_RES_B
;
871 case S390_MINMAX_TYPE_CPP
:
872 s
->float_exception_flags
|= float_flag_invalid
;
873 return S390_MINMAX_RES_A
;
875 g_assert_not_reached();
877 } else if (unlikely((dcmask_a
& DCMASK_ZERO
) && (dcmask_b
& DCMASK_ZERO
))) {
878 const bool neg_a
= dcmask_a
& DCMASK_NEGATIVE
;
881 case S390_MINMAX_TYPE_JAVA
:
882 case S390_MINMAX_TYPE_F
:
883 return neg_a
? S390_MINMAX_RES_B
: S390_MINMAX_RES_A
;
884 case S390_MINMAX_TYPE_C_MACRO
:
885 return S390_MINMAX_RES_B
;
886 case S390_MINMAX_TYPE_CPP
:
887 return S390_MINMAX_RES_A
;
889 g_assert_not_reached();
892 return S390_MINMAX_RES_MINMAX
;
895 static S390MinMaxRes
vfminmax_res(uint16_t dcmask_a
, uint16_t dcmask_b
,
896 S390MinMaxType type
, bool is_min
,
899 return is_min
? vfmin_res(dcmask_a
, dcmask_b
, type
, s
) :
900 vfmax_res(dcmask_a
, dcmask_b
, type
, s
);
903 static void vfminmax32(S390Vector
*v1
, const S390Vector
*v2
,
904 const S390Vector
*v3
, CPUS390XState
*env
,
905 S390MinMaxType type
, bool is_min
, bool is_abs
, bool se
,
908 float_status
*s
= &env
->fpu_status
;
909 uint8_t vxc
, vec_exc
= 0;
913 for (i
= 0; i
< 4; i
++) {
914 float32 a
= s390_vec_read_float32(v2
, i
);
915 float32 b
= s390_vec_read_float32(v3
, i
);
918 if (type
!= S390_MINMAX_TYPE_IEEE
) {
926 res
= vfminmax_res(float32_dcmask(env
, a
), float32_dcmask(env
, b
),
929 case S390_MINMAX_RES_MINMAX
:
930 result
= is_min
? float32_min(a
, b
, s
) : float32_max(a
, b
, s
);
932 case S390_MINMAX_RES_A
:
935 case S390_MINMAX_RES_B
:
938 case S390_MINMAX_RES_SILENCE_A
:
939 result
= float32_silence_nan(a
, s
);
941 case S390_MINMAX_RES_SILENCE_B
:
942 result
= float32_silence_nan(b
, s
);
945 g_assert_not_reached();
947 } else if (!is_abs
) {
948 result
= is_min
? float32_minnum(a
, b
, &env
->fpu_status
) :
949 float32_maxnum(a
, b
, &env
->fpu_status
);
951 result
= is_min
? float32_minnummag(a
, b
, &env
->fpu_status
) :
952 float32_maxnummag(a
, b
, &env
->fpu_status
);
955 s390_vec_write_float32(&tmp
, i
, result
);
956 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
961 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
965 static void vfminmax64(S390Vector
*v1
, const S390Vector
*v2
,
966 const S390Vector
*v3
, CPUS390XState
*env
,
967 S390MinMaxType type
, bool is_min
, bool is_abs
, bool se
,
970 float_status
*s
= &env
->fpu_status
;
971 uint8_t vxc
, vec_exc
= 0;
975 for (i
= 0; i
< 2; i
++) {
976 float64 a
= s390_vec_read_float64(v2
, i
);
977 float64 b
= s390_vec_read_float64(v3
, i
);
980 if (type
!= S390_MINMAX_TYPE_IEEE
) {
988 res
= vfminmax_res(float64_dcmask(env
, a
), float64_dcmask(env
, b
),
991 case S390_MINMAX_RES_MINMAX
:
992 result
= is_min
? float64_min(a
, b
, s
) : float64_max(a
, b
, s
);
994 case S390_MINMAX_RES_A
:
997 case S390_MINMAX_RES_B
:
1000 case S390_MINMAX_RES_SILENCE_A
:
1001 result
= float64_silence_nan(a
, s
);
1003 case S390_MINMAX_RES_SILENCE_B
:
1004 result
= float64_silence_nan(b
, s
);
1007 g_assert_not_reached();
1009 } else if (!is_abs
) {
1010 result
= is_min
? float64_minnum(a
, b
, &env
->fpu_status
) :
1011 float64_maxnum(a
, b
, &env
->fpu_status
);
1013 result
= is_min
? float64_minnummag(a
, b
, &env
->fpu_status
) :
1014 float64_maxnummag(a
, b
, &env
->fpu_status
);
1017 s390_vec_write_float64(&tmp
, i
, result
);
1018 vxc
= check_ieee_exc(env
, i
, false, &vec_exc
);
1023 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
1027 static void vfminmax128(S390Vector
*v1
, const S390Vector
*v2
,
1028 const S390Vector
*v3
, CPUS390XState
*env
,
1029 S390MinMaxType type
, bool is_min
, bool is_abs
, bool se
,
1032 float128 a
= s390_vec_read_float128(v2
);
1033 float128 b
= s390_vec_read_float128(v3
);
1034 float_status
*s
= &env
->fpu_status
;
1035 uint8_t vxc
, vec_exc
= 0;
1038 if (type
!= S390_MINMAX_TYPE_IEEE
) {
1042 a
= float128_abs(a
);
1043 b
= float128_abs(b
);
1046 res
= vfminmax_res(float128_dcmask(env
, a
), float128_dcmask(env
, b
),
1049 case S390_MINMAX_RES_MINMAX
:
1050 result
= is_min
? float128_min(a
, b
, s
) : float128_max(a
, b
, s
);
1052 case S390_MINMAX_RES_A
:
1055 case S390_MINMAX_RES_B
:
1058 case S390_MINMAX_RES_SILENCE_A
:
1059 result
= float128_silence_nan(a
, s
);
1061 case S390_MINMAX_RES_SILENCE_B
:
1062 result
= float128_silence_nan(b
, s
);
1065 g_assert_not_reached();
1067 } else if (!is_abs
) {
1068 result
= is_min
? float128_minnum(a
, b
, &env
->fpu_status
) :
1069 float128_maxnum(a
, b
, &env
->fpu_status
);
1071 result
= is_min
? float128_minnummag(a
, b
, &env
->fpu_status
) :
1072 float128_maxnummag(a
, b
, &env
->fpu_status
);
1075 vxc
= check_ieee_exc(env
, 0, false, &vec_exc
);
1076 handle_ieee_exc(env
, vxc
, vec_exc
, retaddr
);
1077 s390_vec_write_float128(v1
, result
);
1080 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \
1081 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
1082 CPUS390XState *env, uint32_t desc) \
1084 const bool se = extract32(simd_data(desc), 3, 1); \
1085 uint8_t type = extract32(simd_data(desc), 4, 4); \
1086 bool is_abs = false; \
1093 vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \
1096 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \
1097 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \
1098 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \
1099 DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1101 DEF_GVEC_VFMINMAX(vfmax
, false)
1102 DEF_GVEC_VFMINMAX(vfmin
, true)