target/hexagon/op_helper.c

   1 /*
   2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
   3  *
   4  *  This program is free software; you can redistribute it and/or modify
   5  *  it under the terms of the GNU General Public License as published by
   6  *  the Free Software Foundation; either version 2 of the License, or
   7  *  (at your option) any later version.
   8  *
   9  *  This program is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  *  GNU General Public License for more details.
  13  *
  14  *  You should have received a copy of the GNU General Public License
  15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 #include "qemu/osdep.h"
  19 #include "qemu/log.h"
  20 #include "exec/exec-all.h"
  21 #include "exec/cpu_ldst.h"
  22 #include "exec/helper-proto.h"
  23 #include "fpu/softfloat.h"
  24 #include "cpu.h"
  25 #include "internal.h"
  26 #include "macros.h"
  27 #include "arch.h"
  28 #include "hex_arch_types.h"
  29 #include "fma_emu.h"
  30 #include "mmvec/mmvec.h"
  31 #include "mmvec/macros.h"
  32 #include "op_helper.h"
  33 #include "translate.h"
  34
  35 #define SF_BIAS        127
  36 #define SF_MANTBITS    23
  37
  38 /* Exceptions processing helpers */
  39 static G_NORETURN
  40 void do_raise_exception_err(CPUHexagonState *env,
  41                             uint32_t exception,
  42                             uintptr_t pc)
  43 {
  44     CPUState *cs = env_cpu(env);
  45     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
  46     cs->exception_index = exception;
  47     cpu_loop_exit_restore(cs, pc);
  48 }
  49
  50 G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
  51 {
  52     do_raise_exception_err(env, excp, 0);
  53 }
  54
  55 void log_store32(CPUHexagonState *env, target_ulong addr,
  56                  target_ulong val, int width, int slot)
  57 {
  58     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
  59                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
  60                   width, addr, val, val);
  61     env->mem_log_stores[slot].va = addr;
  62     env->mem_log_stores[slot].width = width;
  63     env->mem_log_stores[slot].data32 = val;
  64 }
  65
  66 void log_store64(CPUHexagonState *env, target_ulong addr,
  67                  int64_t val, int width, int slot)
  68 {
  69     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
  70                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
  71                    width, addr, val, val);
  72     env->mem_log_stores[slot].va = addr;
  73     env->mem_log_stores[slot].width = width;
  74     env->mem_log_stores[slot].data64 = val;
  75 }
  76
  77 /* Handy place to set a breakpoint */
  78 void HELPER(debug_start_packet)(CPUHexagonState *env)
  79 {
  80     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
  81                   env->gpr[HEX_REG_PC]);
  82
  83     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
  84         env->reg_written[i] = 0;
  85     }
  86 }
  87
  88 /* Checks for bookkeeping errors between disassembly context and runtime */
  89 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
  90 {
  91     if (env->mem_log_stores[slot].width != check) {
  92         HEX_DEBUG_LOG("ERROR: %d != %d\n",
  93                       env->mem_log_stores[slot].width, check);
  94         g_assert_not_reached();
  95     }
  96 }
  97
  98 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
  99 {
 100     uintptr_t ra = GETPC();
 101     uint8_t width = env->mem_log_stores[slot_num].width;
 102     target_ulong va = env->mem_log_stores[slot_num].va;
 103
 104     switch (width) {
 105     case 1:
 106         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
 107         break;
 108     case 2:
 109         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
 110         break;
 111     case 4:
 112         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
 113         break;
 114     case 8:
 115         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
 116         break;
 117     default:
 118         g_assert_not_reached();
 119     }
 120 }
 121
 122 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
 123 {
 124     mem_gather_store(env, addr, slot);
 125 }
 126
 127 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
 128 {
 129     uintptr_t ra = GETPC();
 130     int i;
 131
 132     /* Normal (possibly masked) vector store */
 133     for (i = 0; i < VSTORES_MAX; i++) {
 134         if (env->vstore_pending[i]) {
 135             env->vstore_pending[i] = 0;
 136             target_ulong va = env->vstore[i].va;
 137             int size = env->vstore[i].size;
 138             for (int j = 0; j < size; j++) {
 139                 if (test_bit(j, env->vstore[i].mask)) {
 140                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
 141                 }
 142             }
 143         }
 144     }
 145
 146     /* Scatter store */
 147     if (env->vtcm_pending) {
 148         env->vtcm_pending = false;
 149         if (env->vtcm_log.op) {
 150             /* Need to perform the scatter read/modify/write at commit time */
 151             if (env->vtcm_log.op_size == 2) {
 152                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
 153             } else if (env->vtcm_log.op_size == 4) {
 154                 /* Word Scatter += */
 155                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
 156             } else {
 157                 g_assert_not_reached();
 158             }
 159         } else {
 160             for (i = 0; i < sizeof(MMVector); i++) {
 161                 if (test_bit(i, env->vtcm_log.mask)) {
 162                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
 163                                     env->vtcm_log.data.ub[i], ra);
 164                     clear_bit(i, env->vtcm_log.mask);
 165                     env->vtcm_log.data.ub[i] = 0;
 166                 }
 167
 168             }
 169         }
 170     }
 171 }
 172
 173 static void print_store(CPUHexagonState *env, int slot)
 174 {
 175     if (!(env->slot_cancelled & (1 << slot))) {
 176         uint8_t width = env->mem_log_stores[slot].width;
 177         if (width == 1) {
 178             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
 179             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
 180                           " (0x%02" PRIx32 ")\n",
 181                           env->mem_log_stores[slot].va, data, data);
 182         } else if (width == 2) {
 183             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
 184             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
 185                           " (0x%04" PRIx32 ")\n",
 186                           env->mem_log_stores[slot].va, data, data);
 187         } else if (width == 4) {
 188             uint32_t data = env->mem_log_stores[slot].data32;
 189             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
 190                           " (0x%08" PRIx32 ")\n",
 191                           env->mem_log_stores[slot].va, data, data);
 192         } else if (width == 8) {
 193             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
 194                           " (0x%016" PRIx64 ")\n",
 195                           env->mem_log_stores[slot].va,
 196                           env->mem_log_stores[slot].data64,
 197                           env->mem_log_stores[slot].data64);
 198         } else {
 199             HEX_DEBUG_LOG("\tBad store width %d\n", width);
 200             g_assert_not_reached();
 201         }
 202     }
 203 }
 204
 205 /* This function is a handy place to set a breakpoint */
 206 void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC,
 207                               int pred_written, int has_st0, int has_st1)
 208 {
 209     bool reg_printed = false;
 210     bool pred_printed = false;
 211     int i;
 212
 213     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC);
 214     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
 215
 216     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
 217         if (env->reg_written[i]) {
 218             if (!reg_printed) {
 219                 HEX_DEBUG_LOG("Regs written\n");
 220                 reg_printed = true;
 221             }
 222             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
 223                           i, env->gpr[i], env->gpr[i]);
 224         }
 225     }
 226
 227     for (i = 0; i < NUM_PREGS; i++) {
 228         if (pred_written & (1 << i)) {
 229             if (!pred_printed) {
 230                 HEX_DEBUG_LOG("Predicates written\n");
 231                 pred_printed = true;
 232             }
 233             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
 234                           i, env->pred[i]);
 235         }
 236     }
 237
 238     if (has_st0 || has_st1) {
 239         HEX_DEBUG_LOG("Stores\n");
 240         if (has_st0) {
 241             print_store(env, 0);
 242         }
 243         if (has_st1) {
 244             print_store(env, 1);
 245         }
 246     }
 247
 248     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_PC]);
 249     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
 250                   ", insn = " TARGET_FMT_lx
 251                   ", hvx = " TARGET_FMT_lx "\n",
 252                   env->gpr[HEX_REG_QEMU_PKT_CNT],
 253                   env->gpr[HEX_REG_QEMU_INSN_CNT],
 254                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
 255
 256 }
 257
 258 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
 259 {
 260     uint32_t K_const = extract32(M, 24, 4);
 261     uint32_t length = extract32(M, 0, 17);
 262     uint32_t new_ptr = RxV + offset;
 263     uint32_t start_addr;
 264     uint32_t end_addr;
 265
 266     if (K_const == 0 && length >= 4) {
 267         start_addr = CS;
 268         end_addr = start_addr + length;
 269     } else {
 270         /*
 271          * Versions v3 and earlier used the K value to specify a power-of-2 size
 272          * 2^(K+2) that is greater than the buffer length
 273          */
 274         int32_t mask = (1 << (K_const + 2)) - 1;
 275         start_addr = RxV & (~mask);
 276         end_addr = start_addr | length;
 277     }
 278
 279     if (new_ptr >= end_addr) {
 280         new_ptr -= length;
 281     } else if (new_ptr < start_addr) {
 282         new_ptr += length;
 283     }
 284
 285     return new_ptr;
 286 }
 287
 288 uint32_t HELPER(fbrev)(uint32_t addr)
 289 {
 290     /*
 291      *  Bit reverse the low 16 bits of the address
 292      */
 293     return deposit32(addr, 0, 16, revbit16(addr));
 294 }
 295
 296 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
 297 {
 298     return make_float32(
 299         ((sign & 1) << 31) |
 300         ((exp & 0xff) << SF_MANTBITS) |
 301         (mant & ((1 << SF_MANTBITS) - 1)));
 302 }
 303
 304 /*
 305  * sfrecipa, sfinvsqrta have two 32-bit results
 306  *     r0,p0=sfrecipa(r1,r2)
 307  *     r0,p0=sfinvsqrta(r1)
 308  *
 309  * Since helpers can only return a single value, we pack the two results
 310  * into a 64-bit value.
 311  */
 312 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
 313 {
 314     int32_t PeV = 0;
 315     float32 RdV;
 316     int idx;
 317     int adjust;
 318     int mant;
 319     int exp;
 320
 321     arch_fpop_start(env);
 322     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
 323         PeV = adjust;
 324         idx = (RtV >> 16) & 0x7f;
 325         mant = (recip_lookup_table[idx] << 15) | 1;
 326         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
 327         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
 328     }
 329     arch_fpop_end(env);
 330     return ((uint64_t)RdV << 32) | PeV;
 331 }
 332
 333 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
 334 {
 335     int PeV = 0;
 336     float32 RdV;
 337     int idx;
 338     int adjust;
 339     int mant;
 340     int exp;
 341
 342     arch_fpop_start(env);
 343     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
 344         PeV = adjust;
 345         idx = (RsV >> 17) & 0x7f;
 346         mant = (invsqrt_lookup_table[idx] << 15);
 347         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
 348         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
 349     }
 350     arch_fpop_end(env);
 351     return ((uint64_t)RdV << 32) | PeV;
 352 }
 353
 354 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
 355                            int64_t RxxV, int64_t RssV, int64_t RttV,
 356                            uint32_t pkt_need_commit)
 357 {
 358     for (int i = 0; i < 4; i++) {
 359         int xv = sextract64(RxxV, i * 16, 16);
 360         int sv = sextract64(RssV, i * 16, 16);
 361         int tv = sextract64(RttV, i * 16, 16);
 362         int max;
 363         xv = xv + tv;
 364         sv = sv - tv;
 365         max = xv > sv ? xv : sv;
 366         /* Note that fSATH can set the OVF bit in usr */
 367         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
 368     }
 369     return RxxV;
 370 }
 371
 372 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
 373                            int64_t RxxV, int64_t RssV, int64_t RttV)
 374 {
 375     int32_t PeV = 0;
 376     for (int i = 0; i < 4; i++) {
 377         int xv = sextract64(RxxV, i * 16, 16);
 378         int sv = sextract64(RssV, i * 16, 16);
 379         int tv = sextract64(RttV, i * 16, 16);
 380         xv = xv + tv;
 381         sv = sv - tv;
 382         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
 383         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
 384     }
 385     return PeV;
 386 }
 387
 388 int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV)
 389 {
 390     int64_t RddV = 0;
 391     size4u_t state;
 392     size4u_t valMPS;
 393     size4u_t bitpos;
 394     size4u_t range;
 395     size4u_t offset;
 396     size4u_t rLPS;
 397     size4u_t rMPS;
 398
 399     state =  fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
 400     valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
 401     bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
 402     range =  fGETWORD(0, RssV);
 403     offset = fGETWORD(1, RssV);
 404
 405     /* calculate rLPS */
 406     range <<= bitpos;
 407     offset <<= bitpos;
 408     rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
 409     rLPS  = rLPS << 23;   /* left aligned */
 410
 411     /* calculate rMPS */
 412     rMPS = (range & 0xff800000) - rLPS;
 413
 414     /* most probable region */
 415     if (offset < rMPS) {
 416         RddV = AC_next_state_MPS_64[state];
 417         fINSERT_RANGE(RddV, 8, 8, valMPS);
 418         fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23));
 419         fSETWORD(1, RddV, offset);
 420     }
 421     /* least probable region */
 422     else {
 423         RddV = AC_next_state_LPS_64[state];
 424         fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS)));
 425         fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23));
 426         fSETWORD(1, RddV, (offset - rMPS));
 427     }
 428     return RddV;
 429 }
 430
 431 int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV)
 432 {
 433     int32_t p0 = 0;
 434     size4u_t state;
 435     size4u_t valMPS;
 436     size4u_t bitpos;
 437     size4u_t range;
 438     size4u_t offset;
 439     size4u_t rLPS;
 440     size4u_t rMPS;
 441
 442     state =  fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
 443     valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
 444     bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
 445     range =  fGETWORD(0, RssV);
 446     offset = fGETWORD(1, RssV);
 447
 448     /* calculate rLPS */
 449     range <<= bitpos;
 450     offset <<= bitpos;
 451     rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
 452     rLPS  = rLPS << 23;   /* left aligned */
 453
 454     /* calculate rMPS */
 455     rMPS = (range & 0xff800000) - rLPS;
 456
 457     /* most probable region */
 458     if (offset < rMPS) {
 459         p0 = valMPS;
 460
 461     }
 462     /* least probable region */
 463     else {
 464         p0 = valMPS ^ 1;
 465     }
 466     return p0;
 467 }
 468
 469 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
 470                         bool is_predicated)
 471 {
 472     if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
 473         size1u_t width = env->mem_log_stores[slot].width;
 474         target_ulong va = env->mem_log_stores[slot].va;
 475         uintptr_t ra = GETPC();
 476         probe_write(env, va, width, mmu_idx, ra);
 477     }
 478 }
 479
 480 /*
 481  * Called from a mem_noshuf packet to make sure the load doesn't
 482  * raise an exception
 483  */
 484 void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
 485                                int size, int mmu_idx)
 486 {
 487     uintptr_t retaddr = GETPC();
 488     probe_read(env, va, size, mmu_idx, retaddr);
 489 }
 490
 491 /* Called during packet commit when there are two scalar stores */
 492 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
 493 {
 494     int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
 495     bool is_predicated =
 496         FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
 497     probe_store(env, 0, mmu_idx, is_predicated);
 498 }
 499
 500 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
 501 {
 502     uintptr_t retaddr = GETPC();
 503     int i;
 504
 505     /* Normal (possibly masked) vector store */
 506     for (i = 0; i < VSTORES_MAX; i++) {
 507         if (env->vstore_pending[i]) {
 508             target_ulong va = env->vstore[i].va;
 509             int size = env->vstore[i].size;
 510             for (int j = 0; j < size; j++) {
 511                 if (test_bit(j, env->vstore[i].mask)) {
 512                     probe_write(env, va + j, 1, mmu_idx, retaddr);
 513                 }
 514             }
 515         }
 516     }
 517
 518     /* Scatter store */
 519     if (env->vtcm_pending) {
 520         if (env->vtcm_log.op) {
 521             /* Need to perform the scatter read/modify/write at commit time */
 522             if (env->vtcm_log.op_size == 2) {
 523                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
 524             } else if (env->vtcm_log.op_size == 4) {
 525                 /* Word Scatter += */
 526                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
 527             } else {
 528                 g_assert_not_reached();
 529             }
 530         } else {
 531             for (int i = 0; i < sizeof(MMVector); i++) {
 532                 if (test_bit(i, env->vtcm_log.mask)) {
 533                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
 534                 }
 535
 536             }
 537         }
 538     }
 539 }
 540
 541 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
 542 {
 543     bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0);
 544     bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1);
 545     bool has_hvx_stores =
 546         FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES);
 547     bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
 548     bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
 549     int mmu_idx = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX);
 550
 551     if (has_st0) {
 552         probe_store(env, 0, mmu_idx, s0_is_pred);
 553     }
 554     if (has_st1) {
 555         probe_store(env, 1, mmu_idx, s1_is_pred);
 556     }
 557     if (has_hvx_stores) {
 558         HELPER(probe_hvx_stores)(env, mmu_idx);
 559     }
 560 }
 561
 562 /*
 563  * mem_noshuf
 564  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
 565  *
 566  * If the load is in slot 0 and there is a store in slot1 (that
 567  * wasn't cancelled), we have to do the store first.
 568  */
 569 static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
 570                          uint32_t slot, target_ulong vaddr, int size)
 571 {
 572     if (slot == 0 && pkt_has_store_s1 &&
 573         ((env->slot_cancelled & (1 << 1)) == 0)) {
 574         HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
 575         HELPER(commit_store)(env, 1);
 576     }
 577 }
 578
 579 uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
 580                   uint32_t slot, target_ulong vaddr)
 581 {
 582     uintptr_t ra = GETPC();
 583     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1);
 584     return cpu_ldub_data_ra(env, vaddr, ra);
 585 }
 586
 587 uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
 588                    uint32_t slot, target_ulong vaddr)
 589 {
 590     uintptr_t ra = GETPC();
 591     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2);
 592     return cpu_lduw_data_ra(env, vaddr, ra);
 593 }
 594
 595 uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
 596                    uint32_t slot, target_ulong vaddr)
 597 {
 598     uintptr_t ra = GETPC();
 599     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4);
 600     return cpu_ldl_data_ra(env, vaddr, ra);
 601 }
 602
 603 uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
 604                    uint32_t slot, target_ulong vaddr)
 605 {
 606     uintptr_t ra = GETPC();
 607     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8);
 608     return cpu_ldq_data_ra(env, vaddr, ra);
 609 }
 610
 611 /* Floating point */
 612 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
 613 {
 614     float64 out_f64;
 615     arch_fpop_start(env);
 616     out_f64 = float32_to_float64(RsV, &env->fp_status);
 617     arch_fpop_end(env);
 618     return out_f64;
 619 }
 620
 621 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
 622 {
 623     float32 out_f32;
 624     arch_fpop_start(env);
 625     out_f32 = float64_to_float32(RssV, &env->fp_status);
 626     arch_fpop_end(env);
 627     return out_f32;
 628 }
 629
 630 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
 631 {
 632     float32 RdV;
 633     arch_fpop_start(env);
 634     RdV = uint32_to_float32(RsV, &env->fp_status);
 635     arch_fpop_end(env);
 636     return RdV;
 637 }
 638
 639 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
 640 {
 641     float64 RddV;
 642     arch_fpop_start(env);
 643     RddV = uint32_to_float64(RsV, &env->fp_status);
 644     arch_fpop_end(env);
 645     return RddV;
 646 }
 647
 648 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
 649 {
 650     float32 RdV;
 651     arch_fpop_start(env);
 652     RdV = int32_to_float32(RsV, &env->fp_status);
 653     arch_fpop_end(env);
 654     return RdV;
 655 }
 656
 657 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
 658 {
 659     float64 RddV;
 660     arch_fpop_start(env);
 661     RddV = int32_to_float64(RsV, &env->fp_status);
 662     arch_fpop_end(env);
 663     return RddV;
 664 }
 665
 666 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
 667 {
 668     float32 RdV;
 669     arch_fpop_start(env);
 670     RdV = uint64_to_float32(RssV, &env->fp_status);
 671     arch_fpop_end(env);
 672     return RdV;
 673 }
 674
 675 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
 676 {
 677     float64 RddV;
 678     arch_fpop_start(env);
 679     RddV = uint64_to_float64(RssV, &env->fp_status);
 680     arch_fpop_end(env);
 681     return RddV;
 682 }
 683
 684 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
 685 {
 686     float32 RdV;
 687     arch_fpop_start(env);
 688     RdV = int64_to_float32(RssV, &env->fp_status);
 689     arch_fpop_end(env);
 690     return RdV;
 691 }
 692
 693 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
 694 {
 695     float64 RddV;
 696     arch_fpop_start(env);
 697     RddV = int64_to_float64(RssV, &env->fp_status);
 698     arch_fpop_end(env);
 699     return RddV;
 700 }
 701
 702 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
 703 {
 704     uint32_t RdV;
 705     arch_fpop_start(env);
 706     /* Hexagon checks the sign before rounding */
 707     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
 708         float_raise(float_flag_invalid, &env->fp_status);
 709         RdV = 0;
 710     } else {
 711         RdV = float32_to_uint32(RsV, &env->fp_status);
 712     }
 713     arch_fpop_end(env);
 714     return RdV;
 715 }
 716
 717 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
 718 {
 719     int32_t RdV;
 720     arch_fpop_start(env);
 721     /* Hexagon returns -1 for NaN */
 722     if (float32_is_any_nan(RsV)) {
 723         float_raise(float_flag_invalid, &env->fp_status);
 724         RdV = -1;
 725     } else {
 726         RdV = float32_to_int32(RsV, &env->fp_status);
 727     }
 728     arch_fpop_end(env);
 729     return RdV;
 730 }
 731
 732 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
 733 {
 734     uint64_t RddV;
 735     arch_fpop_start(env);
 736     /* Hexagon checks the sign before rounding */
 737     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
 738         float_raise(float_flag_invalid, &env->fp_status);
 739         RddV = 0;
 740     } else {
 741         RddV = float32_to_uint64(RsV, &env->fp_status);
 742     }
 743     arch_fpop_end(env);
 744     return RddV;
 745 }
 746
 747 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
 748 {
 749     int64_t RddV;
 750     arch_fpop_start(env);
 751     /* Hexagon returns -1 for NaN */
 752     if (float32_is_any_nan(RsV)) {
 753         float_raise(float_flag_invalid, &env->fp_status);
 754         RddV = -1;
 755     } else {
 756         RddV = float32_to_int64(RsV, &env->fp_status);
 757     }
 758     arch_fpop_end(env);
 759     return RddV;
 760 }
 761
 762 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
 763 {
 764     uint32_t RdV;
 765     arch_fpop_start(env);
 766     /* Hexagon checks the sign before rounding */
 767     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
 768         float_raise(float_flag_invalid, &env->fp_status);
 769         RdV = 0;
 770     } else {
 771         RdV = float64_to_uint32(RssV, &env->fp_status);
 772     }
 773     arch_fpop_end(env);
 774     return RdV;
 775 }
 776
 777 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
 778 {
 779     int32_t RdV;
 780     arch_fpop_start(env);
 781     /* Hexagon returns -1 for NaN */
 782     if (float64_is_any_nan(RssV)) {
 783         float_raise(float_flag_invalid, &env->fp_status);
 784         RdV = -1;
 785     } else {
 786         RdV = float64_to_int32(RssV, &env->fp_status);
 787     }
 788     arch_fpop_end(env);
 789     return RdV;
 790 }
 791
 792 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
 793 {
 794     uint64_t RddV;
 795     arch_fpop_start(env);
 796     /* Hexagon checks the sign before rounding */
 797     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
 798         float_raise(float_flag_invalid, &env->fp_status);
 799         RddV = 0;
 800     } else {
 801         RddV = float64_to_uint64(RssV, &env->fp_status);
 802     }
 803     arch_fpop_end(env);
 804     return RddV;
 805 }
 806
 807 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
 808 {
 809     int64_t RddV;
 810     arch_fpop_start(env);
 811     /* Hexagon returns -1 for NaN */
 812     if (float64_is_any_nan(RssV)) {
 813         float_raise(float_flag_invalid, &env->fp_status);
 814         RddV = -1;
 815     } else {
 816         RddV = float64_to_int64(RssV, &env->fp_status);
 817     }
 818     arch_fpop_end(env);
 819     return RddV;
 820 }
 821
 822 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
 823 {
 824     uint32_t RdV;
 825     arch_fpop_start(env);
 826     /* Hexagon checks the sign before rounding */
 827     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
 828         float_raise(float_flag_invalid, &env->fp_status);
 829         RdV = 0;
 830     } else {
 831         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
 832     }
 833     arch_fpop_end(env);
 834     return RdV;
 835 }
 836
 837 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
 838 {
 839     int32_t RdV;
 840     arch_fpop_start(env);
 841     /* Hexagon returns -1 for NaN */
 842     if (float32_is_any_nan(RsV)) {
 843         float_raise(float_flag_invalid, &env->fp_status);
 844         RdV = -1;
 845     } else {
 846         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
 847     }
 848     arch_fpop_end(env);
 849     return RdV;
 850 }
 851
 852 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
 853 {
 854     uint64_t RddV;
 855     arch_fpop_start(env);
 856     /* Hexagon checks the sign before rounding */
 857     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
 858         float_raise(float_flag_invalid, &env->fp_status);
 859         RddV = 0;
 860     } else {
 861         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
 862     }
 863     arch_fpop_end(env);
 864     return RddV;
 865 }
 866
 867 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
 868 {
 869     int64_t RddV;
 870     arch_fpop_start(env);
 871     /* Hexagon returns -1 for NaN */
 872     if (float32_is_any_nan(RsV)) {
 873         float_raise(float_flag_invalid, &env->fp_status);
 874         RddV = -1;
 875     } else {
 876         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
 877     }
 878     arch_fpop_end(env);
 879     return RddV;
 880 }
 881
 882 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
 883 {
 884     uint32_t RdV;
 885     arch_fpop_start(env);
 886     /* Hexagon checks the sign before rounding */
 887     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
 888         float_raise(float_flag_invalid, &env->fp_status);
 889         RdV = 0;
 890     } else {
 891         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
 892     }
 893     arch_fpop_end(env);
 894     return RdV;
 895 }
 896
 897 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
 898 {
 899     int32_t RdV;
 900     arch_fpop_start(env);
 901     /* Hexagon returns -1 for NaN */
 902     if (float64_is_any_nan(RssV)) {
 903         float_raise(float_flag_invalid, &env->fp_status);
 904         RdV = -1;
 905     } else {
 906         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
 907     }
 908     arch_fpop_end(env);
 909     return RdV;
 910 }
 911
 912 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
 913 {
 914     uint64_t RddV;
 915     arch_fpop_start(env);
 916     /* Hexagon checks the sign before rounding */
 917     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
 918         float_raise(float_flag_invalid, &env->fp_status);
 919         RddV = 0;
 920     } else {
 921         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
 922     }
 923     arch_fpop_end(env);
 924     return RddV;
 925 }
 926
 927 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
 928 {
 929     int64_t RddV;
 930     arch_fpop_start(env);
 931     /* Hexagon returns -1 for NaN */
 932     if (float64_is_any_nan(RssV)) {
 933         float_raise(float_flag_invalid, &env->fp_status);
 934         RddV = -1;
 935     } else {
 936         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
 937     }
 938     arch_fpop_end(env);
 939     return RddV;
 940 }
 941
 942 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
 943 {
 944     float32 RdV;
 945     arch_fpop_start(env);
 946     RdV = float32_add(RsV, RtV, &env->fp_status);
 947     arch_fpop_end(env);
 948     return RdV;
 949 }
 950
 951 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
 952 {
 953     float32 RdV;
 954     arch_fpop_start(env);
 955     RdV = float32_sub(RsV, RtV, &env->fp_status);
 956     arch_fpop_end(env);
 957     return RdV;
 958 }
 959
 960 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
 961 {
 962     int32_t PdV;
 963     arch_fpop_start(env);
 964     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
 965     arch_fpop_end(env);
 966     return PdV;
 967 }
 968
 969 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
 970 {
 971     int cmp;
 972     int32_t PdV;
 973     arch_fpop_start(env);
 974     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
 975     PdV = f8BITSOF(cmp == float_relation_greater);
 976     arch_fpop_end(env);
 977     return PdV;
 978 }
 979
 980 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
 981 {
 982     int cmp;
 983     int32_t PdV;
 984     arch_fpop_start(env);
 985     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
 986     PdV = f8BITSOF(cmp == float_relation_greater ||
 987                    cmp == float_relation_equal);
 988     arch_fpop_end(env);
 989     return PdV;
 990 }
 991
 992 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
 993 {
 994     int32_t PdV;
 995     arch_fpop_start(env);
 996     PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
 997     arch_fpop_end(env);
 998     return PdV;
 999 }
1000
1001 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
1002 {
1003     float32 RdV;
1004     arch_fpop_start(env);
1005     RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
1006     arch_fpop_end(env);
1007     return RdV;
1008 }
1009
1010 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
1011 {
1012     float32 RdV;
1013     arch_fpop_start(env);
1014     RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
1015     arch_fpop_end(env);
1016     return RdV;
1017 }
1018
1019 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
1020 {
1021     int32_t PdV = 0;
1022     arch_fpop_start(env);
1023     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
1024         PdV = 0xff;
1025     }
1026     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
1027         PdV = 0xff;
1028     }
1029     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
1030         PdV = 0xff;
1031     }
1032     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
1033         PdV = 0xff;
1034     }
1035     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
1036         PdV = 0xff;
1037     }
1038     set_float_exception_flags(0, &env->fp_status);
1039     arch_fpop_end(env);
1040     return PdV;
1041 }
1042
1043 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
1044 {
1045     float32 RdV = 0;
1046     int adjust;
1047     arch_fpop_start(env);
1048     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1049     RdV = RsV;
1050     arch_fpop_end(env);
1051     return RdV;
1052 }
1053
1054 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1055 {
1056     float32 RdV = 0;
1057     int adjust;
1058     arch_fpop_start(env);
1059     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1060     RdV = RtV;
1061     arch_fpop_end(env);
1062     return RdV;
1063 }
1064
1065 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1066 {
1067     float32 RdV = 0;
1068     int adjust;
1069     arch_fpop_start(env);
1070     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1071     RdV = RsV;
1072     arch_fpop_end(env);
1073     return RdV;
1074 }
1075
1076 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1077 {
1078     float64 RddV;
1079     arch_fpop_start(env);
1080     RddV = float64_add(RssV, RttV, &env->fp_status);
1081     arch_fpop_end(env);
1082     return RddV;
1083 }
1084
1085 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1086 {
1087     float64 RddV;
1088     arch_fpop_start(env);
1089     RddV = float64_sub(RssV, RttV, &env->fp_status);
1090     arch_fpop_end(env);
1091     return RddV;
1092 }
1093
1094 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1095 {
1096     float64 RddV;
1097     arch_fpop_start(env);
1098     RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1099     arch_fpop_end(env);
1100     return RddV;
1101 }
1102
1103 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1104 {
1105     float64 RddV;
1106     arch_fpop_start(env);
1107     RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1108     arch_fpop_end(env);
1109     return RddV;
1110 }
1111
1112 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1113 {
1114     int32_t PdV;
1115     arch_fpop_start(env);
1116     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1117     arch_fpop_end(env);
1118     return PdV;
1119 }
1120
1121 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1122 {
1123     int cmp;
1124     int32_t PdV;
1125     arch_fpop_start(env);
1126     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1127     PdV = f8BITSOF(cmp == float_relation_greater);
1128     arch_fpop_end(env);
1129     return PdV;
1130 }
1131
1132 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1133 {
1134     int cmp;
1135     int32_t PdV;
1136     arch_fpop_start(env);
1137     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1138     PdV = f8BITSOF(cmp == float_relation_greater ||
1139                    cmp == float_relation_equal);
1140     arch_fpop_end(env);
1141     return PdV;
1142 }
1143
1144 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1145 {
1146     int32_t PdV;
1147     arch_fpop_start(env);
1148     PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1149     arch_fpop_end(env);
1150     return PdV;
1151 }
1152
1153 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1154 {
1155     int32_t PdV = 0;
1156     arch_fpop_start(env);
1157     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1158         PdV = 0xff;
1159     }
1160     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1161         PdV = 0xff;
1162     }
1163     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1164         PdV = 0xff;
1165     }
1166     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1167         PdV = 0xff;
1168     }
1169     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1170         PdV = 0xff;
1171     }
1172     set_float_exception_flags(0, &env->fp_status);
1173     arch_fpop_end(env);
1174     return PdV;
1175 }
1176
1177 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1178 {
1179     float32 RdV;
1180     arch_fpop_start(env);
1181     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1182     arch_fpop_end(env);
1183     return RdV;
1184 }
1185
1186 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1187                       float32 RsV, float32 RtV)
1188 {
1189     arch_fpop_start(env);
1190     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1191     arch_fpop_end(env);
1192     return RxV;
1193 }
1194
1195 static bool is_zero_prod(float32 a, float32 b)
1196 {
1197     return ((float32_is_zero(a) && is_finite(b)) ||
1198             (float32_is_zero(b) && is_finite(a)));
1199 }
1200
1201 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1202 {
1203     float32 ret = dst;
1204     if (float32_is_any_nan(x)) {
1205         if (extract32(x, 22, 1) == 0) {
1206             float_raise(float_flag_invalid, fp_status);
1207         }
1208         ret = make_float32(0xffffffff);    /* nan */
1209     }
1210     return ret;
1211 }
1212
1213 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1214                          float32 RsV, float32 RtV, float32 PuV)
1215 {
1216     size4s_t tmp;
1217     arch_fpop_start(env);
1218     RxV = check_nan(RxV, RxV, &env->fp_status);
1219     RxV = check_nan(RxV, RsV, &env->fp_status);
1220     RxV = check_nan(RxV, RtV, &env->fp_status);
1221     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1222     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1223         RxV = tmp;
1224     }
1225     arch_fpop_end(env);
1226     return RxV;
1227 }
1228
1229 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1230                       float32 RsV, float32 RtV)
1231 {
1232     float32 neg_RsV;
1233     arch_fpop_start(env);
1234     neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
1235     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1236     arch_fpop_end(env);
1237     return RxV;
1238 }
1239
1240 static bool is_inf_prod(int32_t a, int32_t b)
1241 {
1242     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1243            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1244            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1245 }
1246
1247 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1248                           float32 RsV, float32 RtV)
1249 {
1250     bool infinp;
1251     bool infminusinf;
1252     float32 tmp;
1253
1254     arch_fpop_start(env);
1255     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1256     infminusinf = float32_is_infinity(RxV) &&
1257                   is_inf_prod(RsV, RtV) &&
1258                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1259     infinp = float32_is_infinity(RxV) ||
1260              float32_is_infinity(RtV) ||
1261              float32_is_infinity(RsV);
1262     RxV = check_nan(RxV, RxV, &env->fp_status);
1263     RxV = check_nan(RxV, RsV, &env->fp_status);
1264     RxV = check_nan(RxV, RtV, &env->fp_status);
1265     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1266     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1267         RxV = tmp;
1268     }
1269     set_float_exception_flags(0, &env->fp_status);
1270     if (float32_is_infinity(RxV) && !infinp) {
1271         RxV = RxV - 1;
1272     }
1273     if (infminusinf) {
1274         RxV = 0;
1275     }
1276     arch_fpop_end(env);
1277     return RxV;
1278 }
1279
1280 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1281                           float32 RsV, float32 RtV)
1282 {
1283     bool infinp;
1284     bool infminusinf;
1285     float32 tmp;
1286
1287     arch_fpop_start(env);
1288     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1289     infminusinf = float32_is_infinity(RxV) &&
1290                   is_inf_prod(RsV, RtV) &&
1291                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1292     infinp = float32_is_infinity(RxV) ||
1293              float32_is_infinity(RtV) ||
1294              float32_is_infinity(RsV);
1295     RxV = check_nan(RxV, RxV, &env->fp_status);
1296     RxV = check_nan(RxV, RsV, &env->fp_status);
1297     RxV = check_nan(RxV, RtV, &env->fp_status);
1298     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1299     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1300     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1301         RxV = tmp;
1302     }
1303     set_float_exception_flags(0, &env->fp_status);
1304     if (float32_is_infinity(RxV) && !infinp) {
1305         RxV = RxV - 1;
1306     }
1307     if (infminusinf) {
1308         RxV = 0;
1309     }
1310     arch_fpop_end(env);
1311     return RxV;
1312 }
1313
1314 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1315 {
1316     int64_t RddV;
1317     arch_fpop_start(env);
1318     if (float64_is_denormal(RssV) &&
1319         (float64_getexp(RttV) >= 512) &&
1320         float64_is_normal(RttV)) {
1321         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1322                            &env->fp_status);
1323     } else if (float64_is_denormal(RttV) &&
1324                (float64_getexp(RssV) >= 512) &&
1325                float64_is_normal(RssV)) {
1326         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1327                            &env->fp_status);
1328     } else {
1329         RddV = RssV;
1330     }
1331     arch_fpop_end(env);
1332     return RddV;
1333 }
1334
1335 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1336                         float64 RssV, float64 RttV)
1337 {
1338     arch_fpop_start(env);
1339     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1340     arch_fpop_end(env);
1341     return RxxV;
1342 }
1343
1344 /* Histogram instructions */
1345
1346 void HELPER(vhist)(CPUHexagonState *env)
1347 {
1348     MMVector *input = &env->tmp_VRegs[0];
1349
1350     for (int lane = 0; lane < 8; lane++) {
1351         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1352             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1353             unsigned char regno = value >> 3;
1354             unsigned char element = value & 7;
1355
1356             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1357         }
1358     }
1359 }
1360
1361 void HELPER(vhistq)(CPUHexagonState *env)
1362 {
1363     MMVector *input = &env->tmp_VRegs[0];
1364
1365     for (int lane = 0; lane < 8; lane++) {
1366         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1367             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1368             unsigned char regno = value >> 3;
1369             unsigned char element = value & 7;
1370
1371             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1372                 env->VRegs[regno].uh[
1373                     (sizeof(MMVector) / 16) * lane + element]++;
1374             }
1375         }
1376     }
1377 }
1378
1379 void HELPER(vwhist256)(CPUHexagonState *env)
1380 {
1381     MMVector *input = &env->tmp_VRegs[0];
1382
1383     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1384         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1385         unsigned int weight = fGETUBYTE(1, input->h[i]);
1386         unsigned int vindex = (bucket >> 3) & 0x1F;
1387         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1388
1389         env->VRegs[vindex].uh[elindex] =
1390             env->VRegs[vindex].uh[elindex] + weight;
1391     }
1392 }
1393
1394 void HELPER(vwhist256q)(CPUHexagonState *env)
1395 {
1396     MMVector *input = &env->tmp_VRegs[0];
1397
1398     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1399         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1400         unsigned int weight = fGETUBYTE(1, input->h[i]);
1401         unsigned int vindex = (bucket >> 3) & 0x1F;
1402         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1403
1404         if (fGETQBIT(env->qtmp, 2 * i)) {
1405             env->VRegs[vindex].uh[elindex] =
1406                 env->VRegs[vindex].uh[elindex] + weight;
1407         }
1408     }
1409 }
1410
1411 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1412 {
1413     MMVector *input = &env->tmp_VRegs[0];
1414
1415     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1416         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1417         unsigned int weight = fGETUBYTE(1, input->h[i]);
1418         unsigned int vindex = (bucket >> 3) & 0x1F;
1419         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1420
1421         env->VRegs[vindex].uh[elindex] =
1422             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1423     }
1424 }
1425
1426 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1427 {
1428     MMVector *input = &env->tmp_VRegs[0];
1429
1430     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1431         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1432         unsigned int weight = fGETUBYTE(1, input->h[i]);
1433         unsigned int vindex = (bucket >> 3) & 0x1F;
1434         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1435
1436         if (fGETQBIT(env->qtmp, 2 * i)) {
1437             env->VRegs[vindex].uh[elindex] =
1438                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1439         }
1440     }
1441 }
1442
1443 void HELPER(vwhist128)(CPUHexagonState *env)
1444 {
1445     MMVector *input = &env->tmp_VRegs[0];
1446
1447     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1448         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1449         unsigned int weight = fGETUBYTE(1, input->h[i]);
1450         unsigned int vindex = (bucket >> 3) & 0x1F;
1451         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1452
1453         env->VRegs[vindex].uw[elindex] =
1454             env->VRegs[vindex].uw[elindex] + weight;
1455     }
1456 }
1457
1458 void HELPER(vwhist128q)(CPUHexagonState *env)
1459 {
1460     MMVector *input = &env->tmp_VRegs[0];
1461
1462     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1463         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1464         unsigned int weight = fGETUBYTE(1, input->h[i]);
1465         unsigned int vindex = (bucket >> 3) & 0x1F;
1466         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1467
1468         if (fGETQBIT(env->qtmp, 2 * i)) {
1469             env->VRegs[vindex].uw[elindex] =
1470                 env->VRegs[vindex].uw[elindex] + weight;
1471         }
1472     }
1473 }
1474
1475 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1476 {
1477     MMVector *input = &env->tmp_VRegs[0];
1478
1479     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1480         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1481         unsigned int weight = fGETUBYTE(1, input->h[i]);
1482         unsigned int vindex = (bucket >> 3) & 0x1F;
1483         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1484
1485         if ((bucket & 1) == uiV) {
1486             env->VRegs[vindex].uw[elindex] =
1487                 env->VRegs[vindex].uw[elindex] + weight;
1488         }
1489     }
1490 }
1491
1492 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1493 {
1494     MMVector *input = &env->tmp_VRegs[0];
1495
1496     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1497         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1498         unsigned int weight = fGETUBYTE(1, input->h[i]);
1499         unsigned int vindex = (bucket >> 3) & 0x1F;
1500         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1501
1502         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1503             env->VRegs[vindex].uw[elindex] =
1504                 env->VRegs[vindex].uw[elindex] + weight;
1505         }
1506     }
1507 }
1508
1509 /* These macros can be referenced in the generated helper functions */
1510 #define warn(...) /* Nothing */
1511 #define fatal(...) g_assert_not_reached();
1512
1513 #define BOGUS_HELPER(tag) \
1514     printf("ERROR: bogus helper: " #tag "\n")
1515
1516 #include "helper_funcs_generated.c.inc"