s390x/tcg: Implement 32/128 bit for VECTOR FP (ADD|DIVIDE|MULTIPLY|SUBTRACT)
[qemu/kevin.git] / target / s390x / vec_fpu_helper.c
blob3484c161ba8898b4821377e237d8df79b44d2392
1 /*
2 * QEMU TCG support -- s390x vector floating point instruction support
4 * Copyright (C) 2019 Red Hat Inc
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu-common.h"
14 #include "cpu.h"
15 #include "internal.h"
16 #include "vec.h"
17 #include "tcg_s390x.h"
18 #include "tcg/tcg-gvec-desc.h"
19 #include "exec/exec-all.h"
20 #include "exec/helper-proto.h"
21 #include "fpu/softfloat.h"
23 #define VIC_INVALID 0x1
24 #define VIC_DIVBYZERO 0x2
25 #define VIC_OVERFLOW 0x3
26 #define VIC_UNDERFLOW 0x4
27 #define VIC_INEXACT 0x5
29 /* returns the VEX. If the VEX is 0, there is no trap */
30 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
31 uint8_t *vec_exc)
33 uint8_t vece_exc = 0, trap_exc;
34 unsigned qemu_exc;
36 /* Retrieve and clear the softfloat exceptions */
37 qemu_exc = env->fpu_status.float_exception_flags;
38 if (qemu_exc == 0) {
39 return 0;
41 env->fpu_status.float_exception_flags = 0;
43 vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
45 /* Add them to the vector-wide s390x exception bits */
46 *vec_exc |= vece_exc;
48 /* Check for traps and construct the VXC */
49 trap_exc = vece_exc & env->fpc >> 24;
50 if (trap_exc) {
51 if (trap_exc & S390_IEEE_MASK_INVALID) {
52 return enr << 4 | VIC_INVALID;
53 } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
54 return enr << 4 | VIC_DIVBYZERO;
55 } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
56 return enr << 4 | VIC_OVERFLOW;
57 } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
58 return enr << 4 | VIC_UNDERFLOW;
59 } else if (!XxC) {
60 g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
61 /* inexact has lowest priority on traps */
62 return enr << 4 | VIC_INEXACT;
65 return 0;
68 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
69 uintptr_t retaddr)
71 if (vxc) {
72 /* on traps, the fpc flags are not updated, instruction is suppressed */
73 tcg_s390_vector_exception(env, vxc, retaddr);
75 if (vec_exc) {
76 /* indicate exceptions for all elements combined */
77 env->fpc |= vec_exc << 16;
81 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
83 return make_float32(s390_vec_read_element32(v, enr));
86 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
88 return make_float64(s390_vec_read_element64(v, enr));
91 static float128 s390_vec_read_float128(const S390Vector *v)
93 return make_float128(s390_vec_read_element64(v, 0),
94 s390_vec_read_element64(v, 1));
97 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
99 return s390_vec_write_element32(v, enr, data);
102 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
104 return s390_vec_write_element64(v, enr, data);
107 static void s390_vec_write_float128(S390Vector *v, float128 data)
109 s390_vec_write_element64(v, 0, data.high);
110 s390_vec_write_element64(v, 1, data.low);
113 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
114 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
115 bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
116 uintptr_t retaddr)
118 uint8_t vxc, vec_exc = 0;
119 S390Vector tmp = {};
120 int i, old_mode;
122 old_mode = s390_swap_bfp_rounding_mode(env, erm);
123 for (i = 0; i < 2; i++) {
124 const float64 a = s390_vec_read_float64(v2, i);
126 s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
127 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
128 if (s || vxc) {
129 break;
132 s390_restore_bfp_rounding_mode(env, old_mode);
133 handle_ieee_exc(env, vxc, vec_exc, retaddr);
134 *v1 = tmp;
137 static float64 vcdg64(float64 a, float_status *s)
139 return int64_to_float64(a, s);
142 static float64 vcdlg64(float64 a, float_status *s)
144 return uint64_to_float64(a, s);
147 static float64 vcgd64(float64 a, float_status *s)
149 const float64 tmp = float64_to_int64(a, s);
151 return float64_is_any_nan(a) ? INT64_MIN : tmp;
154 static float64 vclgd64(float64 a, float_status *s)
156 const float64 tmp = float64_to_uint64(a, s);
158 return float64_is_any_nan(a) ? 0 : tmp;
161 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \
162 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \
163 uint32_t desc) \
165 const uint8_t erm = extract32(simd_data(desc), 4, 4); \
166 const bool se = extract32(simd_data(desc), 3, 1); \
167 const bool XxC = extract32(simd_data(desc), 2, 1); \
169 vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \
172 #define DEF_GVEC_VOP2_64(NAME) \
173 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
175 #define DEF_GVEC_VOP2(NAME, OP) \
176 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)
178 DEF_GVEC_VOP2_64(vcdg)
179 DEF_GVEC_VOP2_64(vcdlg)
180 DEF_GVEC_VOP2_64(vcgd)
181 DEF_GVEC_VOP2_64(vclgd)
182 DEF_GVEC_VOP2(vfi, round_to_int)
183 DEF_GVEC_VOP2(vfsq, sqrt)
185 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
186 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
187 CPUS390XState *env, bool s, vop32_3_fn fn,
188 uintptr_t retaddr)
190 uint8_t vxc, vec_exc = 0;
191 S390Vector tmp = {};
192 int i;
194 for (i = 0; i < 4; i++) {
195 const float32 a = s390_vec_read_float32(v2, i);
196 const float32 b = s390_vec_read_float32(v3, i);
198 s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
199 vxc = check_ieee_exc(env, i, false, &vec_exc);
200 if (s || vxc) {
201 break;
204 handle_ieee_exc(env, vxc, vec_exc, retaddr);
205 *v1 = tmp;
208 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
209 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
210 CPUS390XState *env, bool s, vop64_3_fn fn,
211 uintptr_t retaddr)
213 uint8_t vxc, vec_exc = 0;
214 S390Vector tmp = {};
215 int i;
217 for (i = 0; i < 2; i++) {
218 const float64 a = s390_vec_read_float64(v2, i);
219 const float64 b = s390_vec_read_float64(v3, i);
221 s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
222 vxc = check_ieee_exc(env, i, false, &vec_exc);
223 if (s || vxc) {
224 break;
227 handle_ieee_exc(env, vxc, vec_exc, retaddr);
228 *v1 = tmp;
231 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
232 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
233 CPUS390XState *env, bool s, vop128_3_fn fn,
234 uintptr_t retaddr)
236 const float128 a = s390_vec_read_float128(v2);
237 const float128 b = s390_vec_read_float128(v3);
238 uint8_t vxc, vec_exc = 0;
239 S390Vector tmp = {};
241 s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
242 vxc = check_ieee_exc(env, 0, false, &vec_exc);
243 handle_ieee_exc(env, vxc, vec_exc, retaddr);
244 *v1 = tmp;
247 #define DEF_GVEC_VOP3_B(NAME, OP, BITS) \
248 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
249 CPUS390XState *env, uint32_t desc) \
251 const bool se = extract32(simd_data(desc), 3, 1); \
253 vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \
256 #define DEF_GVEC_VOP3(NAME, OP) \
257 DEF_GVEC_VOP3_B(NAME, OP, 32) \
258 DEF_GVEC_VOP3_B(NAME, OP, 64) \
259 DEF_GVEC_VOP3_B(NAME, OP, 128)
261 DEF_GVEC_VOP3(vfa, add)
262 DEF_GVEC_VOP3(vfs, sub)
263 DEF_GVEC_VOP3(vfd, div)
264 DEF_GVEC_VOP3(vfm, mul)
266 static int wfc64(const S390Vector *v1, const S390Vector *v2,
267 CPUS390XState *env, bool signal, uintptr_t retaddr)
269 /* only the zero-indexed elements are compared */
270 const float64 a = s390_vec_read_float64(v1, 0);
271 const float64 b = s390_vec_read_float64(v2, 0);
272 uint8_t vxc, vec_exc = 0;
273 int cmp;
275 if (signal) {
276 cmp = float64_compare(a, b, &env->fpu_status);
277 } else {
278 cmp = float64_compare_quiet(a, b, &env->fpu_status);
280 vxc = check_ieee_exc(env, 0, false, &vec_exc);
281 handle_ieee_exc(env, vxc, vec_exc, retaddr);
283 return float_comp_to_cc(env, cmp);
286 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \
287 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \
288 CPUS390XState *env, uint32_t desc) \
290 env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \
293 #define DEF_GVEC_WFC(NAME, SIGNAL) \
294 DEF_GVEC_WFC_B(NAME, SIGNAL, 64)
296 DEF_GVEC_WFC(wfc, false)
297 DEF_GVEC_WFC(wfk, true)
299 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
300 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
301 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
303 uint8_t vxc, vec_exc = 0;
304 S390Vector tmp = {};
305 int match = 0;
306 int i;
308 for (i = 0; i < 2; i++) {
309 const float64 a = s390_vec_read_float64(v2, i);
310 const float64 b = s390_vec_read_float64(v3, i);
312 /* swap the order of the parameters, so we can use existing functions */
313 if (fn(b, a, &env->fpu_status)) {
314 match++;
315 s390_vec_write_element64(&tmp, i, -1ull);
317 vxc = check_ieee_exc(env, i, false, &vec_exc);
318 if (s || vxc) {
319 break;
323 handle_ieee_exc(env, vxc, vec_exc, retaddr);
324 *v1 = tmp;
325 if (match) {
326 return s || match == 2 ? 0 : 1;
328 return 3;
331 #define DEF_GVEC_VFC_B(NAME, OP, BITS) \
332 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
333 CPUS390XState *env, uint32_t desc) \
335 const bool se = extract32(simd_data(desc), 3, 1); \
336 vfc##BITS##_fn fn = float##BITS##_##OP##_quiet; \
338 vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
341 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \
342 CPUS390XState *env, uint32_t desc) \
344 const bool se = extract32(simd_data(desc), 3, 1); \
345 vfc##BITS##_fn fn = float##BITS##_##OP##_quiet; \
347 env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \
350 #define DEF_GVEC_VFC(NAME, OP) \
351 DEF_GVEC_VFC_B(NAME, OP, 64)
353 DEF_GVEC_VFC(vfce, eq)
354 DEF_GVEC_VFC(vfch, lt)
355 DEF_GVEC_VFC(vfche, le)
357 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
358 uint32_t desc)
360 const bool s = extract32(simd_data(desc), 3, 1);
361 uint8_t vxc, vec_exc = 0;
362 S390Vector tmp = {};
363 int i;
365 for (i = 0; i < 2; i++) {
366 /* load from even element */
367 const float32 a = s390_vec_read_element32(v2, i * 2);
368 const uint64_t ret = float32_to_float64(a, &env->fpu_status);
370 s390_vec_write_element64(&tmp, i, ret);
371 /* indicate the source element */
372 vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
373 if (s || vxc) {
374 break;
377 handle_ieee_exc(env, vxc, vec_exc, GETPC());
378 *(S390Vector *)v1 = tmp;
381 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
382 uint32_t desc)
384 const uint8_t erm = extract32(simd_data(desc), 4, 4);
385 const bool s = extract32(simd_data(desc), 3, 1);
386 const bool XxC = extract32(simd_data(desc), 2, 1);
387 uint8_t vxc, vec_exc = 0;
388 S390Vector tmp = {};
389 int i, old_mode;
391 old_mode = s390_swap_bfp_rounding_mode(env, erm);
392 for (i = 0; i < 2; i++) {
393 float64 a = s390_vec_read_element64(v2, i);
394 uint32_t ret = float64_to_float32(a, &env->fpu_status);
396 /* place at even element */
397 s390_vec_write_element32(&tmp, i * 2, ret);
398 /* indicate the source element */
399 vxc = check_ieee_exc(env, i, XxC, &vec_exc);
400 if (s || vxc) {
401 break;
404 s390_restore_bfp_rounding_mode(env, old_mode);
405 handle_ieee_exc(env, vxc, vec_exc, GETPC());
406 *(S390Vector *)v1 = tmp;
409 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
410 const S390Vector *v4, CPUS390XState *env, bool s, int flags,
411 uintptr_t retaddr)
413 uint8_t vxc, vec_exc = 0;
414 S390Vector tmp = {};
415 int i;
417 for (i = 0; i < 2; i++) {
418 const float64 a = s390_vec_read_float64(v2, i);
419 const float64 b = s390_vec_read_float64(v3, i);
420 const float64 c = s390_vec_read_float64(v4, i);
421 const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
423 s390_vec_write_float64(&tmp, i, ret);
424 vxc = check_ieee_exc(env, i, false, &vec_exc);
425 if (s || vxc) {
426 break;
429 handle_ieee_exc(env, vxc, vec_exc, retaddr);
430 *v1 = tmp;
433 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \
434 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \
435 const void *v4, CPUS390XState *env, \
436 uint32_t desc) \
438 const bool se = extract32(simd_data(desc), 3, 1); \
440 vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \
443 #define DEF_GVEC_VFMA(NAME, FLAGS) \
444 DEF_GVEC_VFMA_B(NAME, FLAGS, 64)
446 DEF_GVEC_VFMA(vfma, 0)
447 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
449 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
450 uint32_t desc)
452 const uint16_t i3 = extract32(simd_data(desc), 4, 12);
453 const bool s = extract32(simd_data(desc), 3, 1);
454 int i, match = 0;
456 for (i = 0; i < 2; i++) {
457 const float64 a = s390_vec_read_float64(v2, i);
459 if (float64_dcmask(env, a) & i3) {
460 match++;
461 s390_vec_write_element64(v1, i, -1ull);
462 } else {
463 s390_vec_write_element64(v1, i, 0);
465 if (s) {
466 break;
470 if (match == 2 || (s && match)) {
471 env->cc_op = 0;
472 } else if (match) {
473 env->cc_op = 1;
474 } else {
475 env->cc_op = 3;