s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE *
[qemu/kevin.git] / target / s390x / vec_fpu_helper.c
blob67dcd8b50af02d8724497c0992d26e7f3cc4a281
1 /*
2 * QEMU TCG support -- s390x vector floating point instruction support
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "internal.h"
16 #include "vec.h"
17 #include "tcg_s390x.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/exec-all.h"
20 #include "exec/helper-proto.h"
21 #include "fpu/softfloat.h"
23 #define VIC_INVALID 0x1
24 #define VIC_DIVBYZERO 0x2
25 #define VIC_OVERFLOW 0x3
26 #define VIC_UNDERFLOW 0x4
27 #define VIC_INEXACT 0x5
29 /* returns the VEX. If the VEX is 0, there is no trap */
30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
31 uint8_t *vec_exc)
33 uint8_t vece_exc = 0, trap_exc;
34 unsigned qemu_exc;
36 /* Retrieve and clear the softfloat exceptions */
37 qemu_exc = env->fpu_status.float_exception_flags;
38 if (qemu_exc == 0) {
39 return 0;
41 env->fpu_status.float_exception_flags = 0;
43 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
45 /* Add them to the vector-wide s390x exception bits */
46 *vec_exc |= vece_exc;
48 /* Check for traps and construct the VXC */
49 trap_exc = vece_exc & env->fpc >> 24;
50 if (trap_exc) {
51 if (trap_exc & S390_IEEE_MASK_INVALID) {
52 return enr << 4 | VIC_INVALID;
53 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
54 return enr << 4 | VIC_DIVBYZERO;
55 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
56 return enr << 4 | VIC_OVERFLOW;
57 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
58 return enr << 4 | VIC_UNDERFLOW;
59 } else if (!XxC) {
60 g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
61 /* inexact has lowest priority on traps */
62 return enr << 4 | VIC_INEXACT;
65 return 0;
68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
69 uintptr_t retaddr)
71 if (vxc) {
72 /* on traps, the fpc flags are not updated, instruction is suppressed */
73 tcg_s390_vector_exception(env, vxc, retaddr);
75 if (vec_exc) {
76 /* indicate exceptions for all elements combined */
77 env->fpc |= vec_exc << 16;
81 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
83 return make_float32(s390_vec_read_element32(v, enr));
86 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
88 return make_float64(s390_vec_read_element64(v, enr));
91 static float128 s390_vec_read_float128(const S390Vector *v)
93 return make_float128(s390_vec_read_element64(v, 0),
94 s390_vec_read_element64(v, 1));
97 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
99 return s390_vec_write_element32(v, enr, data);
102 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
104 return s390_vec_write_element64(v, enr, data);
107 static void s390_vec_write_float128(S390Vector *v, float128 data)
109 s390_vec_write_element64(v, 0, data.high);
110 s390_vec_write_element64(v, 1, data.low);
113 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
114 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
115 bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
116 uintptr_t retaddr)
118 uint8_t vxc, vec_exc = 0;
119 S390Vector tmp = {};
120 int i, old_mode;
122 old_mode = s390_swap_bfp_rounding_mode(env, erm);
123 for (i = 0; i < 4; i++) {
124 const float32 a = s390_vec_read_float32(v2, i);
126 s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
127 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
128 if (s || vxc) {
129 break;
132 s390_restore_bfp_rounding_mode(env, old_mode);
133 handle_ieee_exc(env, vxc, vec_exc, retaddr);
134 *v1 = tmp;
137 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
138 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
139 bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
140 uintptr_t retaddr)
142 uint8_t vxc, vec_exc = 0;
143 S390Vector tmp = {};
144 int i, old_mode;
146 old_mode = s390_swap_bfp_rounding_mode(env, erm);
147 for (i = 0; i < 2; i++) {
148 const float64 a = s390_vec_read_float64(v2, i);
150 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
151 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
152 if (s || vxc) {
153 break;
156 s390_restore_bfp_rounding_mode(env, old_mode);
157 handle_ieee_exc(env, vxc, vec_exc, retaddr);
158 *v1 = tmp;
161 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
162 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
163 bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
164 uintptr_t retaddr)
166 const float128 a = s390_vec_read_float128(v2);
167 uint8_t vxc, vec_exc = 0;
168 S390Vector tmp = {};
169 int old_mode;
171 old_mode = s390_swap_bfp_rounding_mode(env, erm);
172 s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
173 vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
174 s390_restore_bfp_rounding_mode(env, old_mode);
175 handle_ieee_exc(env, vxc, vec_exc, retaddr);
176 *v1 = tmp;
179 static float64 vcdg64(float64 a, float_status *s)
181 return int64_to_float64(a, s);
184 static float64 vcdlg64(float64 a, float_status *s)
186 return uint64_to_float64(a, s);
189 static float64 vcgd64(float64 a, float_status *s)
191 const float64 tmp = float64_to_int64(a, s);
193 return float64_is_any_nan(a) ? INT64_MIN : tmp;
196 static float64 vclgd64(float64 a, float_status *s)
198 const float64 tmp = float64_to_uint64(a, s);
200 return float64_is_any_nan(a) ? 0 : tmp;
203 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \
204 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \
205 uint32_t desc) \
207 const uint8_t erm = extract32(simd_data(desc), 4, 4); \
208 const bool se = extract32(simd_data(desc), 3, 1); \
209 const bool XxC = extract32(simd_data(desc), 2, 1); \
211 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \
214 #define DEF_GVEC_VOP2_64(NAME) \
215 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
217 #define DEF_GVEC_VOP2(NAME, OP) \
218 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \
219 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \
220 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
222 DEF_GVEC_VOP2_64(vcdg)
223 DEF_GVEC_VOP2_64(vcdlg)
224 DEF_GVEC_VOP2_64(vcgd)
225 DEF_GVEC_VOP2_64(vclgd)
226 DEF_GVEC_VOP2(vfi, round_to_int)
227 DEF_GVEC_VOP2(vfsq, sqrt)
229 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
230 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
231 CPUS390XState *env, bool s, vop32_3_fn fn,
232 uintptr_t retaddr)
234 uint8_t vxc, vec_exc = 0;
235 S390Vector tmp = {};
236 int i;
238 for (i = 0; i < 4; i++) {
239 const float32 a = s390_vec_read_float32(v2, i);
240 const float32 b = s390_vec_read_float32(v3, i);
242 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
243 vxc = check_ieee_exc(env, i, false, &vec_exc);
244 if (s || vxc) {
245 break;
248 handle_ieee_exc(env, vxc, vec_exc, retaddr);
249 *v1 = tmp;
252 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
253 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
254 CPUS390XState *env, bool s, vop64_3_fn fn,
255 uintptr_t retaddr)
257 uint8_t vxc, vec_exc = 0;
258 S390Vector tmp = {};
259 int i;
261 for (i = 0; i < 2; i++) {
262 const float64 a = s390_vec_read_float64(v2, i);
263 const float64 b = s390_vec_read_float64(v3, i);
265 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
266 vxc = check_ieee_exc(env, i, false, &vec_exc);
267 if (s || vxc) {
268 break;
271 handle_ieee_exc(env, vxc, vec_exc, retaddr);
272 *v1 = tmp;
275 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
276 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
277 CPUS390XState *env, bool s, vop128_3_fn fn,
278 uintptr_t retaddr)
280 const float128 a = s390_vec_read_float128(v2);
281 const float128 b = s390_vec_read_float128(v3);
282 uint8_t vxc, vec_exc = 0;
283 S390Vector tmp = {};
285 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
286 vxc = check_ieee_exc(env, 0, false, &vec_exc);
287 handle_ieee_exc(env, vxc, vec_exc, retaddr);
288 *v1 = tmp;
291 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \
292 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
293 CPUS390XState *env, uint32_t desc) \
295 const bool se = extract32(simd_data(desc), 3, 1); \
297 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \
300 #define DEF_GVEC_VOP3(NAME, OP) \
301 DEF_GVEC_VOP3_B(NAME, OP, 32) \
302 DEF_GVEC_VOP3_B(NAME, OP, 64) \
303 DEF_GVEC_VOP3_B(NAME, OP, 128)
305 DEF_GVEC_VOP3(vfa, add)
306 DEF_GVEC_VOP3(vfs, sub)
307 DEF_GVEC_VOP3(vfd, div)
308 DEF_GVEC_VOP3(vfm, mul)
310 static int wfc64(const S390Vector *v1, const S390Vector *v2,
311 CPUS390XState *env, bool signal, uintptr_t retaddr)
313 /* only the zero-indexed elements are compared */
314 const float64 a = s390_vec_read_float64(v1, 0);
315 const float64 b = s390_vec_read_float64(v2, 0);
316 uint8_t vxc, vec_exc = 0;
317 int cmp;
319 if (signal) {
320 cmp = float64_compare(a, b, &env->fpu_status);
321 } else {
322 cmp = float64_compare_quiet(a, b, &env->fpu_status);
324 vxc = check_ieee_exc(env, 0, false, &vec_exc);
325 handle_ieee_exc(env, vxc, vec_exc, retaddr);
327 return float_comp_to_cc(env, cmp);
330 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \
331 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \
332 CPUS390XState *env, uint32_t desc) \
334 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \
337 #define DEF_GVEC_WFC(NAME, SIGNAL) \
338 DEF_GVEC_WFC_B(NAME, SIGNAL, 64)
340 DEF_GVEC_WFC(wfc, false)
341 DEF_GVEC_WFC(wfk, true)
343 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
344 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
345 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
347 uint8_t vxc, vec_exc = 0;
348 S390Vector tmp = {};
349 int match = 0;
350 int i;
352 for (i = 0; i < 4; i++) {
353 const float32 a = s390_vec_read_float32(v2, i);
354 const float32 b = s390_vec_read_float32(v3, i);
356 /* swap the order of the parameters, so we can use existing functions */
357 if (fn(b, a, &env->fpu_status)) {
358 match++;
359 s390_vec_write_element32(&tmp, i, -1u);
361 vxc = check_ieee_exc(env, i, false, &vec_exc);
362 if (s || vxc) {
363 break;
367 handle_ieee_exc(env, vxc, vec_exc, retaddr);
368 *v1 = tmp;
369 if (match) {
370 return s || match == 4 ? 0 : 1;
372 return 3;
375 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
376 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
377 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
379 uint8_t vxc, vec_exc = 0;
380 S390Vector tmp = {};
381 int match = 0;
382 int i;
384 for (i = 0; i < 2; i++) {
385 const float64 a = s390_vec_read_float64(v2, i);
386 const float64 b = s390_vec_read_float64(v3, i);
388 /* swap the order of the parameters, so we can use existing functions */
389 if (fn(b, a, &env->fpu_status)) {
390 match++;
391 s390_vec_write_element64(&tmp, i, -1ull);
393 vxc = check_ieee_exc(env, i, false, &vec_exc);
394 if (s || vxc) {
395 break;
399 handle_ieee_exc(env, vxc, vec_exc, retaddr);
400 *v1 = tmp;
401 if (match) {
402 return s || match == 2 ? 0 : 1;
404 return 3;
407 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
408 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
409 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
411 const float128 a = s390_vec_read_float128(v2);
412 const float128 b = s390_vec_read_float128(v3);
413 uint8_t vxc, vec_exc = 0;
414 S390Vector tmp = {};
415 bool match = false;
417 /* swap the order of the parameters, so we can use existing functions */
418 if (fn(b, a, &env->fpu_status)) {
419 match = true;
420 s390_vec_write_element64(&tmp, 0, -1ull);
421 s390_vec_write_element64(&tmp, 1, -1ull);
423 vxc = check_ieee_exc(env, 0, false, &vec_exc);
424 handle_ieee_exc(env, vxc, vec_exc, retaddr);
425 *v1 = tmp;
426 return match ? 0 : 3;
429 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \
430 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
431 CPUS390XState *env, uint32_t desc) \
433 const bool se = extract32(simd_data(desc), 3, 1); \
434 const bool sq = extract32(simd_data(desc), 2, 1); \
435 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
437 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
440 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \
441 CPUS390XState *env, uint32_t desc) \
443 const bool se = extract32(simd_data(desc), 3, 1); \
444 const bool sq = extract32(simd_data(desc), 2, 1); \
445 vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \
447 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
450 #define DEF_GVEC_VFC(NAME, OP) \
451 DEF_GVEC_VFC_B(NAME, OP, 32) \
452 DEF_GVEC_VFC_B(NAME, OP, 64) \
453 DEF_GVEC_VFC_B(NAME, OP, 128) \
455 DEF_GVEC_VFC(vfce, eq)
456 DEF_GVEC_VFC(vfch, lt)
457 DEF_GVEC_VFC(vfche, le)
459 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
460 uint32_t desc)
462 const bool s = extract32(simd_data(desc), 3, 1);
463 uint8_t vxc, vec_exc = 0;
464 S390Vector tmp = {};
465 int i;
467 for (i = 0; i < 2; i++) {
468 /* load from even element */
469 const float32 a = s390_vec_read_element32(v2, i * 2);
470 const uint64_t ret = float32_to_float64(a, &env->fpu_status);
472 s390_vec_write_element64(&tmp, i, ret);
473 /* indicate the source element */
474 vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
475 if (s || vxc) {
476 break;
479 handle_ieee_exc(env, vxc, vec_exc, GETPC());
480 *(S390Vector *)v1 = tmp;
483 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
484 uint32_t desc)
486 const uint8_t erm = extract32(simd_data(desc), 4, 4);
487 const bool s = extract32(simd_data(desc), 3, 1);
488 const bool XxC = extract32(simd_data(desc), 2, 1);
489 uint8_t vxc, vec_exc = 0;
490 S390Vector tmp = {};
491 int i, old_mode;
493 old_mode = s390_swap_bfp_rounding_mode(env, erm);
494 for (i = 0; i < 2; i++) {
495 float64 a = s390_vec_read_element64(v2, i);
496 uint32_t ret = float64_to_float32(a, &env->fpu_status);
498 /* place at even element */
499 s390_vec_write_element32(&tmp, i * 2, ret);
500 /* indicate the source element */
501 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
502 if (s || vxc) {
503 break;
506 s390_restore_bfp_rounding_mode(env, old_mode);
507 handle_ieee_exc(env, vxc, vec_exc, GETPC());
508 *(S390Vector *)v1 = tmp;
511 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
512 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
513 uintptr_t retaddr)
515 uint8_t vxc, vec_exc = 0;
516 S390Vector tmp = {};
517 int i;
519 for (i = 0; i < 2; i++) {
520 const float64 a = s390_vec_read_float64(v2, i);
521 const float64 b = s390_vec_read_float64(v3, i);
522 const float64 c = s390_vec_read_float64(v4, i);
523 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
525 s390_vec_write_float64(&tmp, i, ret);
526 vxc = check_ieee_exc(env, i, false, &vec_exc);
527 if (s || vxc) {
528 break;
531 handle_ieee_exc(env, vxc, vec_exc, retaddr);
532 *v1 = tmp;
535 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \
536 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
537 const void *v4, CPUS390XState *env, \
538 uint32_t desc) \
540 const bool se = extract32(simd_data(desc), 3, 1); \
542 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \
545 #define DEF_GVEC_VFMA(NAME, FLAGS) \
546 DEF_GVEC_VFMA_B(NAME, FLAGS, 64)
548 DEF_GVEC_VFMA(vfma, 0)
549 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
551 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
552 uint32_t desc)
554 const uint16_t i3 = extract32(simd_data(desc), 4, 12);
555 const bool s = extract32(simd_data(desc), 3, 1);
556 int i, match = 0;
558 for (i = 0; i < 2; i++) {
559 const float64 a = s390_vec_read_float64(v2, i);
561 if (float64_dcmask(env, a) & i3) {
562 match++;
563 s390_vec_write_element64(v1, i, -1ull);
564 } else {
565 s390_vec_write_element64(v1, i, 0);
567 if (s) {
568 break;
572 if (match == 2 || (s && match)) {
573 env->cc_op = 0;
574 } else if (match) {
575 env->cc_op = 1;
576 } else {
577 env->cc_op = 3;