target/arm/helper-a64.c

   1 /*
   2  *  AArch64 specific helpers
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/gdbstub.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "qemu/log.h"
  26 #include "sysemu/sysemu.h"
  27 #include "qemu/bitops.h"
  28 #include "internals.h"
  29 #include "qemu/crc32c.h"
  30 #include "exec/exec-all.h"
  31 #include "exec/cpu_ldst.h"
  32 #include "qemu/int128.h"
  33 #include "tcg.h"
  34 #include "fpu/softfloat.h"
  35 #include <zlib.h> /* For crc32 */
  36
  37 /* C2.4.7 Multiply and divide */
  38 /* special cases for 0 and LLONG_MIN are mandated by the standard */
  39 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
  40 {
  41     if (den == 0) {
  42         return 0;
  43     }
  44     return num / den;
  45 }
  46
  47 int64_t HELPER(sdiv64)(int64_t num, int64_t den)
  48 {
  49     if (den == 0) {
  50         return 0;
  51     }
  52     if (num == LLONG_MIN && den == -1) {
  53         return LLONG_MIN;
  54     }
  55     return num / den;
  56 }
  57
  58 uint64_t HELPER(rbit64)(uint64_t x)
  59 {
  60     return revbit64(x);
  61 }
  62
  63 /* Convert a softfloat float_relation_ (as returned by
  64  * the float*_compare functions) to the correct ARM
  65  * NZCV flag state.
  66  */
  67 static inline uint32_t float_rel_to_flags(int res)
  68 {
  69     uint64_t flags;
  70     switch (res) {
  71     case float_relation_equal:
  72         flags = PSTATE_Z | PSTATE_C;
  73         break;
  74     case float_relation_less:
  75         flags = PSTATE_N;
  76         break;
  77     case float_relation_greater:
  78         flags = PSTATE_C;
  79         break;
  80     case float_relation_unordered:
  81     default:
  82         flags = PSTATE_C | PSTATE_V;
  83         break;
  84     }
  85     return flags;
  86 }
  87
  88 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
  89 {
  90     return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
  91 }
  92
  93 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
  94 {
  95     return float_rel_to_flags(float32_compare(x, y, fp_status));
  96 }
  97
  98 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
  99 {
 100     return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
 101 }
 102
 103 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
 104 {
 105     return float_rel_to_flags(float64_compare(x, y, fp_status));
 106 }
 107
 108 float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
 109 {
 110     float_status *fpst = fpstp;
 111
 112     a = float32_squash_input_denormal(a, fpst);
 113     b = float32_squash_input_denormal(b, fpst);
 114
 115     if ((float32_is_zero(a) && float32_is_infinity(b)) ||
 116         (float32_is_infinity(a) && float32_is_zero(b))) {
 117         /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 118         return make_float32((1U << 30) |
 119                             ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
 120     }
 121     return float32_mul(a, b, fpst);
 122 }
 123
 124 float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
 125 {
 126     float_status *fpst = fpstp;
 127
 128     a = float64_squash_input_denormal(a, fpst);
 129     b = float64_squash_input_denormal(b, fpst);
 130
 131     if ((float64_is_zero(a) && float64_is_infinity(b)) ||
 132         (float64_is_infinity(a) && float64_is_zero(b))) {
 133         /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 134         return make_float64((1ULL << 62) |
 135                             ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
 136     }
 137     return float64_mul(a, b, fpst);
 138 }
 139
 140 uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
 141                           uint32_t rn, uint32_t numregs)
 142 {
 143     /* Helper function for SIMD TBL and TBX. We have to do the table
 144      * lookup part for the 64 bits worth of indices we're passed in.
 145      * result is the initial results vector (either zeroes for TBL
 146      * or some guest values for TBX), rn the register number where
 147      * the table starts, and numregs the number of registers in the table.
 148      * We return the results of the lookups.
 149      */
 150     int shift;
 151
 152     for (shift = 0; shift < 64; shift += 8) {
 153         int index = extract64(indices, shift, 8);
 154         if (index < 16 * numregs) {
 155             /* Convert index (a byte offset into the virtual table
 156              * which is a series of 128-bit vectors concatenated)
 157              * into the correct register element plus a bit offset
 158              * into that element, bearing in mind that the table
 159              * can wrap around from V31 to V0.
 160              */
 161             int elt = (rn * 2 + (index >> 3)) % 64;
 162             int bitidx = (index & 7) * 8;
 163             uint64_t *q = aa64_vfp_qreg(env, elt >> 1);
 164             uint64_t val = extract64(q[elt & 1], bitidx, 8);
 165
 166             result = deposit64(result, shift, 8, val);
 167         }
 168     }
 169     return result;
 170 }
 171
 172 /* 64bit/double versions of the neon float compare functions */
 173 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
 174 {
 175     float_status *fpst = fpstp;
 176     return -float64_eq_quiet(a, b, fpst);
 177 }
 178
 179 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
 180 {
 181     float_status *fpst = fpstp;
 182     return -float64_le(b, a, fpst);
 183 }
 184
 185 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
 186 {
 187     float_status *fpst = fpstp;
 188     return -float64_lt(b, a, fpst);
 189 }
 190
 191 /* Reciprocal step and sqrt step. Note that unlike the A32/T32
 192  * versions, these do a fully fused multiply-add or
 193  * multiply-add-and-halve.
 194  */
 195 #define float32_two make_float32(0x40000000)
 196 #define float32_three make_float32(0x40400000)
 197 #define float32_one_point_five make_float32(0x3fc00000)
 198
 199 #define float64_two make_float64(0x4000000000000000ULL)
 200 #define float64_three make_float64(0x4008000000000000ULL)
 201 #define float64_one_point_five make_float64(0x3FF8000000000000ULL)
 202
 203 float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
 204 {
 205     float_status *fpst = fpstp;
 206
 207     a = float32_squash_input_denormal(a, fpst);
 208     b = float32_squash_input_denormal(b, fpst);
 209
 210     a = float32_chs(a);
 211     if ((float32_is_infinity(a) && float32_is_zero(b)) ||
 212         (float32_is_infinity(b) && float32_is_zero(a))) {
 213         return float32_two;
 214     }
 215     return float32_muladd(a, b, float32_two, 0, fpst);
 216 }
 217
 218 float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
 219 {
 220     float_status *fpst = fpstp;
 221
 222     a = float64_squash_input_denormal(a, fpst);
 223     b = float64_squash_input_denormal(b, fpst);
 224
 225     a = float64_chs(a);
 226     if ((float64_is_infinity(a) && float64_is_zero(b)) ||
 227         (float64_is_infinity(b) && float64_is_zero(a))) {
 228         return float64_two;
 229     }
 230     return float64_muladd(a, b, float64_two, 0, fpst);
 231 }
 232
 233 float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
 234 {
 235     float_status *fpst = fpstp;
 236
 237     a = float32_squash_input_denormal(a, fpst);
 238     b = float32_squash_input_denormal(b, fpst);
 239
 240     a = float32_chs(a);
 241     if ((float32_is_infinity(a) && float32_is_zero(b)) ||
 242         (float32_is_infinity(b) && float32_is_zero(a))) {
 243         return float32_one_point_five;
 244     }
 245     return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
 246 }
 247
 248 float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
 249 {
 250     float_status *fpst = fpstp;
 251
 252     a = float64_squash_input_denormal(a, fpst);
 253     b = float64_squash_input_denormal(b, fpst);
 254
 255     a = float64_chs(a);
 256     if ((float64_is_infinity(a) && float64_is_zero(b)) ||
 257         (float64_is_infinity(b) && float64_is_zero(a))) {
 258         return float64_one_point_five;
 259     }
 260     return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
 261 }
 262
 263 /* Pairwise long add: add pairs of adjacent elements into
 264  * double-width elements in the result (eg _s8 is an 8x8->16 op)
 265  */
 266 uint64_t HELPER(neon_addlp_s8)(uint64_t a)
 267 {
 268     uint64_t nsignmask = 0x0080008000800080ULL;
 269     uint64_t wsignmask = 0x8000800080008000ULL;
 270     uint64_t elementmask = 0x00ff00ff00ff00ffULL;
 271     uint64_t tmp1, tmp2;
 272     uint64_t res, signres;
 273
 274     /* Extract odd elements, sign extend each to a 16 bit field */
 275     tmp1 = a & elementmask;
 276     tmp1 ^= nsignmask;
 277     tmp1 |= wsignmask;
 278     tmp1 = (tmp1 - nsignmask) ^ wsignmask;
 279     /* Ditto for the even elements */
 280     tmp2 = (a >> 8) & elementmask;
 281     tmp2 ^= nsignmask;
 282     tmp2 |= wsignmask;
 283     tmp2 = (tmp2 - nsignmask) ^ wsignmask;
 284
 285     /* calculate the result by summing bits 0..14, 16..22, etc,
 286      * and then adjusting the sign bits 15, 23, etc manually.
 287      * This ensures the addition can't overflow the 16 bit field.
 288      */
 289     signres = (tmp1 ^ tmp2) & wsignmask;
 290     res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
 291     res ^= signres;
 292
 293     return res;
 294 }
 295
 296 uint64_t HELPER(neon_addlp_u8)(uint64_t a)
 297 {
 298     uint64_t tmp;
 299
 300     tmp = a & 0x00ff00ff00ff00ffULL;
 301     tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
 302     return tmp;
 303 }
 304
 305 uint64_t HELPER(neon_addlp_s16)(uint64_t a)
 306 {
 307     int32_t reslo, reshi;
 308
 309     reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
 310     reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
 311
 312     return (uint32_t)reslo | (((uint64_t)reshi) << 32);
 313 }
 314
 315 uint64_t HELPER(neon_addlp_u16)(uint64_t a)
 316 {
 317     uint64_t tmp;
 318
 319     tmp = a & 0x0000ffff0000ffffULL;
 320     tmp += (a >> 16) & 0x0000ffff0000ffffULL;
 321     return tmp;
 322 }
 323
 324 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
 325 float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
 326 {
 327     float_status *fpst = fpstp;
 328     uint32_t val32, sbit;
 329     int32_t exp;
 330
 331     if (float32_is_any_nan(a)) {
 332         float32 nan = a;
 333         if (float32_is_signaling_nan(a, fpst)) {
 334             float_raise(float_flag_invalid, fpst);
 335             nan = float32_maybe_silence_nan(a, fpst);
 336         }
 337         if (fpst->default_nan_mode) {
 338             nan = float32_default_nan(fpst);
 339         }
 340         return nan;
 341     }
 342
 343     val32 = float32_val(a);
 344     sbit = 0x80000000ULL & val32;
 345     exp = extract32(val32, 23, 8);
 346
 347     if (exp == 0) {
 348         return make_float32(sbit | (0xfe << 23));
 349     } else {
 350         return make_float32(sbit | (~exp & 0xff) << 23);
 351     }
 352 }
 353
 354 float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
 355 {
 356     float_status *fpst = fpstp;
 357     uint64_t val64, sbit;
 358     int64_t exp;
 359
 360     if (float64_is_any_nan(a)) {
 361         float64 nan = a;
 362         if (float64_is_signaling_nan(a, fpst)) {
 363             float_raise(float_flag_invalid, fpst);
 364             nan = float64_maybe_silence_nan(a, fpst);
 365         }
 366         if (fpst->default_nan_mode) {
 367             nan = float64_default_nan(fpst);
 368         }
 369         return nan;
 370     }
 371
 372     val64 = float64_val(a);
 373     sbit = 0x8000000000000000ULL & val64;
 374     exp = extract64(float64_val(a), 52, 11);
 375
 376     if (exp == 0) {
 377         return make_float64(sbit | (0x7feULL << 52));
 378     } else {
 379         return make_float64(sbit | (~exp & 0x7ffULL) << 52);
 380     }
 381 }
 382
 383 float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
 384 {
 385     /* Von Neumann rounding is implemented by using round-to-zero
 386      * and then setting the LSB of the result if Inexact was raised.
 387      */
 388     float32 r;
 389     float_status *fpst = &env->vfp.fp_status;
 390     float_status tstat = *fpst;
 391     int exflags;
 392
 393     set_float_rounding_mode(float_round_to_zero, &tstat);
 394     set_float_exception_flags(0, &tstat);
 395     r = float64_to_float32(a, &tstat);
 396     r = float32_maybe_silence_nan(r, &tstat);
 397     exflags = get_float_exception_flags(&tstat);
 398     if (exflags & float_flag_inexact) {
 399         r = make_float32(float32_val(r) | 1);
 400     }
 401     exflags |= get_float_exception_flags(fpst);
 402     set_float_exception_flags(exflags, fpst);
 403     return r;
 404 }
 405
 406 /* 64-bit versions of the CRC helpers. Note that although the operation
 407  * (and the prototypes of crc32c() and crc32() mean that only the bottom
 408  * 32 bits of the accumulator and result are used, we pass and return
 409  * uint64_t for convenience of the generated code. Unlike the 32-bit
 410  * instruction set versions, val may genuinely have 64 bits of data in it.
 411  * The upper bytes of val (above the number specified by 'bytes') must have
 412  * been zeroed out by the caller.
 413  */
 414 uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 415 {
 416     uint8_t buf[8];
 417
 418     stq_le_p(buf, val);
 419
 420     /* zlib crc32 converts the accumulator and output to one's complement.  */
 421     return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
 422 }
 423
 424 uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 425 {
 426     uint8_t buf[8];
 427
 428     stq_le_p(buf, val);
 429
 430     /* Linux crc32c converts the output to one's complement.  */
 431     return crc32c(acc, buf, bytes) ^ 0xffffffff;
 432 }
 433
 434 /* Returns 0 on success; 1 otherwise.  */
 435 static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
 436                                        uint64_t new_lo, uint64_t new_hi,
 437                                        bool parallel, uintptr_t ra)
 438 {
 439     Int128 oldv, cmpv, newv;
 440     bool success;
 441
 442     cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
 443     newv = int128_make128(new_lo, new_hi);
 444
 445     if (parallel) {
 446 #ifndef CONFIG_ATOMIC128
 447         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 448 #else
 449         int mem_idx = cpu_mmu_index(env, false);
 450         TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 451         oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 452         success = int128_eq(oldv, cmpv);
 453 #endif
 454     } else {
 455         uint64_t o0, o1;
 456
 457 #ifdef CONFIG_USER_ONLY
 458         /* ??? Enforce alignment.  */
 459         uint64_t *haddr = g2h(addr);
 460
 461         helper_retaddr = ra;
 462         o0 = ldq_le_p(haddr + 0);
 463         o1 = ldq_le_p(haddr + 1);
 464         oldv = int128_make128(o0, o1);
 465
 466         success = int128_eq(oldv, cmpv);
 467         if (success) {
 468             stq_le_p(haddr + 0, int128_getlo(newv));
 469             stq_le_p(haddr + 1, int128_gethi(newv));
 470         }
 471         helper_retaddr = 0;
 472 #else
 473         int mem_idx = cpu_mmu_index(env, false);
 474         TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 475         TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
 476
 477         o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
 478         o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
 479         oldv = int128_make128(o0, o1);
 480
 481         success = int128_eq(oldv, cmpv);
 482         if (success) {
 483             helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
 484             helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
 485         }
 486 #endif
 487     }
 488
 489     return !success;
 490 }
 491
 492 uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
 493                                               uint64_t new_lo, uint64_t new_hi)
 494 {
 495     return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
 496 }
 497
 498 uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
 499                                               uint64_t new_lo, uint64_t new_hi)
 500 {
 501     return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
 502 }
 503
 504 static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
 505                                        uint64_t new_lo, uint64_t new_hi,
 506                                        bool parallel, uintptr_t ra)
 507 {
 508     Int128 oldv, cmpv, newv;
 509     bool success;
 510
 511     /* high and low need to be switched here because this is not actually a
 512      * 128bit store but two doublewords stored consecutively
 513      */
 514     cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
 515     newv = int128_make128(new_hi, new_lo);
 516
 517     if (parallel) {
 518 #ifndef CONFIG_ATOMIC128
 519         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 520 #else
 521         int mem_idx = cpu_mmu_index(env, false);
 522         TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 523         oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 524         success = int128_eq(oldv, cmpv);
 525 #endif
 526     } else {
 527         uint64_t o0, o1;
 528
 529 #ifdef CONFIG_USER_ONLY
 530         /* ??? Enforce alignment.  */
 531         uint64_t *haddr = g2h(addr);
 532
 533         helper_retaddr = ra;
 534         o1 = ldq_be_p(haddr + 0);
 535         o0 = ldq_be_p(haddr + 1);
 536         oldv = int128_make128(o0, o1);
 537
 538         success = int128_eq(oldv, cmpv);
 539         if (success) {
 540             stq_be_p(haddr + 0, int128_gethi(newv));
 541             stq_be_p(haddr + 1, int128_getlo(newv));
 542         }
 543         helper_retaddr = 0;
 544 #else
 545         int mem_idx = cpu_mmu_index(env, false);
 546         TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 547         TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
 548
 549         o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
 550         o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
 551         oldv = int128_make128(o0, o1);
 552
 553         success = int128_eq(oldv, cmpv);
 554         if (success) {
 555             helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
 556             helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
 557         }
 558 #endif
 559     }
 560
 561     return !success;
 562 }
 563
 564 uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
 565                                      uint64_t new_lo, uint64_t new_hi)
 566 {
 567     return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
 568 }
 569
 570 uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
 571                                      uint64_t new_lo, uint64_t new_hi)
 572 {
 573     return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
 574 }