target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 /* Select active elememnts from Zn and inactive elements from Zm,
 355  * storing the result in Zd.
 356  */
 357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358 {
 359     static gen_helper_gvec_4 * const fns[4] = {
 360         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362     };
 363     unsigned vsz = vec_full_reg_size(s);
 364     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                        vec_full_reg_offset(s, rn),
 366                        vec_full_reg_offset(s, rm),
 367                        pred_full_reg_offset(s, pg),
 368                        vsz, vsz, 0, fns[esz]);
 369 }
 370
 371 #define DO_ZPZZ(NAME, name) \
 372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 373 {                                                                         \
 374     static gen_helper_gvec_4 * const fns[4] = {                           \
 375         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 376         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 377     };                                                                    \
 378     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 379 }
 380
 381 DO_ZPZZ(AND, and)
 382 DO_ZPZZ(EOR, eor)
 383 DO_ZPZZ(ORR, orr)
 384 DO_ZPZZ(BIC, bic)
 385
 386 DO_ZPZZ(ADD, add)
 387 DO_ZPZZ(SUB, sub)
 388
 389 DO_ZPZZ(SMAX, smax)
 390 DO_ZPZZ(UMAX, umax)
 391 DO_ZPZZ(SMIN, smin)
 392 DO_ZPZZ(UMIN, umin)
 393 DO_ZPZZ(SABD, sabd)
 394 DO_ZPZZ(UABD, uabd)
 395
 396 DO_ZPZZ(MUL, mul)
 397 DO_ZPZZ(SMULH, smulh)
 398 DO_ZPZZ(UMULH, umulh)
 399
 400 DO_ZPZZ(ASR, asr)
 401 DO_ZPZZ(LSR, lsr)
 402 DO_ZPZZ(LSL, lsl)
 403
 404 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 405 {
 406     static gen_helper_gvec_4 * const fns[4] = {
 407         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 408     };
 409     return do_zpzz_ool(s, a, fns[a->esz]);
 410 }
 411
 412 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 413 {
 414     static gen_helper_gvec_4 * const fns[4] = {
 415         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 416     };
 417     return do_zpzz_ool(s, a, fns[a->esz]);
 418 }
 419
 420 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 421 {
 422     if (sve_access_check(s)) {
 423         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 424     }
 425     return true;
 426 }
 427
 428 #undef DO_ZPZZ
 429
 430 /*
 431  *** SVE Integer Arithmetic - Unary Predicated Group
 432  */
 433
 434 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 435 {
 436     if (fn == NULL) {
 437         return false;
 438     }
 439     if (sve_access_check(s)) {
 440         unsigned vsz = vec_full_reg_size(s);
 441         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 442                            vec_full_reg_offset(s, a->rn),
 443                            pred_full_reg_offset(s, a->pg),
 444                            vsz, vsz, 0, fn);
 445     }
 446     return true;
 447 }
 448
 449 #define DO_ZPZ(NAME, name) \
 450 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 451 {                                                                   \
 452     static gen_helper_gvec_3 * const fns[4] = {                     \
 453         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 454         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 455     };                                                              \
 456     return do_zpz_ool(s, a, fns[a->esz]);                           \
 457 }
 458
 459 DO_ZPZ(CLS, cls)
 460 DO_ZPZ(CLZ, clz)
 461 DO_ZPZ(CNT_zpz, cnt_zpz)
 462 DO_ZPZ(CNOT, cnot)
 463 DO_ZPZ(NOT_zpz, not_zpz)
 464 DO_ZPZ(ABS, abs)
 465 DO_ZPZ(NEG, neg)
 466
 467 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 468 {
 469     static gen_helper_gvec_3 * const fns[4] = {
 470         NULL,
 471         gen_helper_sve_fabs_h,
 472         gen_helper_sve_fabs_s,
 473         gen_helper_sve_fabs_d
 474     };
 475     return do_zpz_ool(s, a, fns[a->esz]);
 476 }
 477
 478 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 479 {
 480     static gen_helper_gvec_3 * const fns[4] = {
 481         NULL,
 482         gen_helper_sve_fneg_h,
 483         gen_helper_sve_fneg_s,
 484         gen_helper_sve_fneg_d
 485     };
 486     return do_zpz_ool(s, a, fns[a->esz]);
 487 }
 488
 489 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 490 {
 491     static gen_helper_gvec_3 * const fns[4] = {
 492         NULL,
 493         gen_helper_sve_sxtb_h,
 494         gen_helper_sve_sxtb_s,
 495         gen_helper_sve_sxtb_d
 496     };
 497     return do_zpz_ool(s, a, fns[a->esz]);
 498 }
 499
 500 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 501 {
 502     static gen_helper_gvec_3 * const fns[4] = {
 503         NULL,
 504         gen_helper_sve_uxtb_h,
 505         gen_helper_sve_uxtb_s,
 506         gen_helper_sve_uxtb_d
 507     };
 508     return do_zpz_ool(s, a, fns[a->esz]);
 509 }
 510
 511 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 512 {
 513     static gen_helper_gvec_3 * const fns[4] = {
 514         NULL, NULL,
 515         gen_helper_sve_sxth_s,
 516         gen_helper_sve_sxth_d
 517     };
 518     return do_zpz_ool(s, a, fns[a->esz]);
 519 }
 520
 521 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 522 {
 523     static gen_helper_gvec_3 * const fns[4] = {
 524         NULL, NULL,
 525         gen_helper_sve_uxth_s,
 526         gen_helper_sve_uxth_d
 527     };
 528     return do_zpz_ool(s, a, fns[a->esz]);
 529 }
 530
 531 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 532 {
 533     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 534 }
 535
 536 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 537 {
 538     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 539 }
 540
 541 #undef DO_ZPZ
 542
 543 /*
 544  *** SVE Integer Reduction Group
 545  */
 546
 547 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 548 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 549                        gen_helper_gvec_reduc *fn)
 550 {
 551     unsigned vsz = vec_full_reg_size(s);
 552     TCGv_ptr t_zn, t_pg;
 553     TCGv_i32 desc;
 554     TCGv_i64 temp;
 555
 556     if (fn == NULL) {
 557         return false;
 558     }
 559     if (!sve_access_check(s)) {
 560         return true;
 561     }
 562
 563     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 564     temp = tcg_temp_new_i64();
 565     t_zn = tcg_temp_new_ptr();
 566     t_pg = tcg_temp_new_ptr();
 567
 568     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 569     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 570     fn(temp, t_zn, t_pg, desc);
 571     tcg_temp_free_ptr(t_zn);
 572     tcg_temp_free_ptr(t_pg);
 573     tcg_temp_free_i32(desc);
 574
 575     write_fp_dreg(s, a->rd, temp);
 576     tcg_temp_free_i64(temp);
 577     return true;
 578 }
 579
 580 #define DO_VPZ(NAME, name) \
 581 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 582 {                                                                        \
 583     static gen_helper_gvec_reduc * const fns[4] = {                      \
 584         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 585         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 586     };                                                                   \
 587     return do_vpz_ool(s, a, fns[a->esz]);                                \
 588 }
 589
 590 DO_VPZ(ORV, orv)
 591 DO_VPZ(ANDV, andv)
 592 DO_VPZ(EORV, eorv)
 593
 594 DO_VPZ(UADDV, uaddv)
 595 DO_VPZ(SMAXV, smaxv)
 596 DO_VPZ(UMAXV, umaxv)
 597 DO_VPZ(SMINV, sminv)
 598 DO_VPZ(UMINV, uminv)
 599
 600 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 601 {
 602     static gen_helper_gvec_reduc * const fns[4] = {
 603         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 604         gen_helper_sve_saddv_s, NULL
 605     };
 606     return do_vpz_ool(s, a, fns[a->esz]);
 607 }
 608
 609 #undef DO_VPZ
 610
 611 /*
 612  *** SVE Shift by Immediate - Predicated Group
 613  */
 614
 615 /* Store zero into every active element of Zd.  We will use this for two
 616  * and three-operand predicated instructions for which logic dictates a
 617  * zero result.
 618  */
 619 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 620 {
 621     static gen_helper_gvec_2 * const fns[4] = {
 622         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 623         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 624     };
 625     if (sve_access_check(s)) {
 626         unsigned vsz = vec_full_reg_size(s);
 627         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 628                            pred_full_reg_offset(s, pg),
 629                            vsz, vsz, 0, fns[esz]);
 630     }
 631     return true;
 632 }
 633
 634 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 635 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 636 {
 637     static gen_helper_gvec_3 * const fns[4] = {
 638         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 639         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 640     };
 641     unsigned vsz = vec_full_reg_size(s);
 642     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 643                        vec_full_reg_offset(s, rn),
 644                        pred_full_reg_offset(s, pg),
 645                        vsz, vsz, 0, fns[esz]);
 646 }
 647
 648 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 649                         gen_helper_gvec_3 *fn)
 650 {
 651     if (sve_access_check(s)) {
 652         unsigned vsz = vec_full_reg_size(s);
 653         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 654                            vec_full_reg_offset(s, a->rn),
 655                            pred_full_reg_offset(s, a->pg),
 656                            vsz, vsz, a->imm, fn);
 657     }
 658     return true;
 659 }
 660
 661 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 662 {
 663     static gen_helper_gvec_3 * const fns[4] = {
 664         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 665         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 666     };
 667     if (a->esz < 0) {
 668         /* Invalid tsz encoding -- see tszimm_esz. */
 669         return false;
 670     }
 671     /* Shift by element size is architecturally valid.  For
 672        arithmetic right-shift, it's the same as by one less. */
 673     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 674     return do_zpzi_ool(s, a, fns[a->esz]);
 675 }
 676
 677 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 678 {
 679     static gen_helper_gvec_3 * const fns[4] = {
 680         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 681         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 682     };
 683     if (a->esz < 0) {
 684         return false;
 685     }
 686     /* Shift by element size is architecturally valid.
 687        For logical shifts, it is a zeroing operation.  */
 688     if (a->imm >= (8 << a->esz)) {
 689         return do_clr_zp(s, a->rd, a->pg, a->esz);
 690     } else {
 691         return do_zpzi_ool(s, a, fns[a->esz]);
 692     }
 693 }
 694
 695 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 696 {
 697     static gen_helper_gvec_3 * const fns[4] = {
 698         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 699         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 700     };
 701     if (a->esz < 0) {
 702         return false;
 703     }
 704     /* Shift by element size is architecturally valid.
 705        For logical shifts, it is a zeroing operation.  */
 706     if (a->imm >= (8 << a->esz)) {
 707         return do_clr_zp(s, a->rd, a->pg, a->esz);
 708     } else {
 709         return do_zpzi_ool(s, a, fns[a->esz]);
 710     }
 711 }
 712
 713 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 714 {
 715     static gen_helper_gvec_3 * const fns[4] = {
 716         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 717         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 718     };
 719     if (a->esz < 0) {
 720         return false;
 721     }
 722     /* Shift by element size is architecturally valid.  For arithmetic
 723        right shift for division, it is a zeroing operation.  */
 724     if (a->imm >= (8 << a->esz)) {
 725         return do_clr_zp(s, a->rd, a->pg, a->esz);
 726     } else {
 727         return do_zpzi_ool(s, a, fns[a->esz]);
 728     }
 729 }
 730
 731 /*
 732  *** SVE Bitwise Shift - Predicated Group
 733  */
 734
 735 #define DO_ZPZW(NAME, name) \
 736 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 737 {                                                                         \
 738     static gen_helper_gvec_4 * const fns[3] = {                           \
 739         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 740         gen_helper_sve_##name##_zpzw_s,                                   \
 741     };                                                                    \
 742     if (a->esz < 0 || a->esz >= 3) {                                      \
 743         return false;                                                     \
 744     }                                                                     \
 745     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 746 }
 747
 748 DO_ZPZW(ASR, asr)
 749 DO_ZPZW(LSR, lsr)
 750 DO_ZPZW(LSL, lsl)
 751
 752 #undef DO_ZPZW
 753
 754 /*
 755  *** SVE Bitwise Shift - Unpredicated Group
 756  */
 757
 758 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 759                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 760                                          int64_t, uint32_t, uint32_t))
 761 {
 762     if (a->esz < 0) {
 763         /* Invalid tsz encoding -- see tszimm_esz. */
 764         return false;
 765     }
 766     if (sve_access_check(s)) {
 767         unsigned vsz = vec_full_reg_size(s);
 768         /* Shift by element size is architecturally valid.  For
 769            arithmetic right-shift, it's the same as by one less.
 770            Otherwise it is a zeroing operation.  */
 771         if (a->imm >= 8 << a->esz) {
 772             if (asr) {
 773                 a->imm = (8 << a->esz) - 1;
 774             } else {
 775                 do_dupi_z(s, a->rd, 0);
 776                 return true;
 777             }
 778         }
 779         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 780                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 781     }
 782     return true;
 783 }
 784
 785 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 786 {
 787     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 788 }
 789
 790 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 791 {
 792     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 793 }
 794
 795 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 796 {
 797     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 798 }
 799
 800 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 801 {
 802     if (fn == NULL) {
 803         return false;
 804     }
 805     if (sve_access_check(s)) {
 806         unsigned vsz = vec_full_reg_size(s);
 807         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 808                            vec_full_reg_offset(s, a->rn),
 809                            vec_full_reg_offset(s, a->rm),
 810                            vsz, vsz, 0, fn);
 811     }
 812     return true;
 813 }
 814
 815 #define DO_ZZW(NAME, name) \
 816 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 817 {                                                                         \
 818     static gen_helper_gvec_3 * const fns[4] = {                           \
 819         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 820         gen_helper_sve_##name##_zzw_s, NULL                               \
 821     };                                                                    \
 822     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 823 }
 824
 825 DO_ZZW(ASR, asr)
 826 DO_ZZW(LSR, lsr)
 827 DO_ZZW(LSL, lsl)
 828
 829 #undef DO_ZZW
 830
 831 /*
 832  *** SVE Integer Multiply-Add Group
 833  */
 834
 835 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 836                          gen_helper_gvec_5 *fn)
 837 {
 838     if (sve_access_check(s)) {
 839         unsigned vsz = vec_full_reg_size(s);
 840         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 841                            vec_full_reg_offset(s, a->ra),
 842                            vec_full_reg_offset(s, a->rn),
 843                            vec_full_reg_offset(s, a->rm),
 844                            pred_full_reg_offset(s, a->pg),
 845                            vsz, vsz, 0, fn);
 846     }
 847     return true;
 848 }
 849
 850 #define DO_ZPZZZ(NAME, name) \
 851 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 852 {                                                                    \
 853     static gen_helper_gvec_5 * const fns[4] = {                      \
 854         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 855         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 856     };                                                               \
 857     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 858 }
 859
 860 DO_ZPZZZ(MLA, mla)
 861 DO_ZPZZZ(MLS, mls)
 862
 863 #undef DO_ZPZZZ
 864
 865 /*
 866  *** SVE Index Generation Group
 867  */
 868
 869 static void do_index(DisasContext *s, int esz, int rd,
 870                      TCGv_i64 start, TCGv_i64 incr)
 871 {
 872     unsigned vsz = vec_full_reg_size(s);
 873     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 874     TCGv_ptr t_zd = tcg_temp_new_ptr();
 875
 876     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 877     if (esz == 3) {
 878         gen_helper_sve_index_d(t_zd, start, incr, desc);
 879     } else {
 880         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 881         static index_fn * const fns[3] = {
 882             gen_helper_sve_index_b,
 883             gen_helper_sve_index_h,
 884             gen_helper_sve_index_s,
 885         };
 886         TCGv_i32 s32 = tcg_temp_new_i32();
 887         TCGv_i32 i32 = tcg_temp_new_i32();
 888
 889         tcg_gen_extrl_i64_i32(s32, start);
 890         tcg_gen_extrl_i64_i32(i32, incr);
 891         fns[esz](t_zd, s32, i32, desc);
 892
 893         tcg_temp_free_i32(s32);
 894         tcg_temp_free_i32(i32);
 895     }
 896     tcg_temp_free_ptr(t_zd);
 897     tcg_temp_free_i32(desc);
 898 }
 899
 900 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 901 {
 902     if (sve_access_check(s)) {
 903         TCGv_i64 start = tcg_const_i64(a->imm1);
 904         TCGv_i64 incr = tcg_const_i64(a->imm2);
 905         do_index(s, a->esz, a->rd, start, incr);
 906         tcg_temp_free_i64(start);
 907         tcg_temp_free_i64(incr);
 908     }
 909     return true;
 910 }
 911
 912 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 913 {
 914     if (sve_access_check(s)) {
 915         TCGv_i64 start = tcg_const_i64(a->imm);
 916         TCGv_i64 incr = cpu_reg(s, a->rm);
 917         do_index(s, a->esz, a->rd, start, incr);
 918         tcg_temp_free_i64(start);
 919     }
 920     return true;
 921 }
 922
 923 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 924 {
 925     if (sve_access_check(s)) {
 926         TCGv_i64 start = cpu_reg(s, a->rn);
 927         TCGv_i64 incr = tcg_const_i64(a->imm);
 928         do_index(s, a->esz, a->rd, start, incr);
 929         tcg_temp_free_i64(incr);
 930     }
 931     return true;
 932 }
 933
 934 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 935 {
 936     if (sve_access_check(s)) {
 937         TCGv_i64 start = cpu_reg(s, a->rn);
 938         TCGv_i64 incr = cpu_reg(s, a->rm);
 939         do_index(s, a->esz, a->rd, start, incr);
 940     }
 941     return true;
 942 }
 943
 944 /*
 945  *** SVE Stack Allocation Group
 946  */
 947
 948 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 949 {
 950     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 951     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 952     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 953     return true;
 954 }
 955
 956 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 957 {
 958     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 959     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 960     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 961     return true;
 962 }
 963
 964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965 {
 966     TCGv_i64 reg = cpu_reg(s, a->rd);
 967     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 968     return true;
 969 }
 970
 971 /*
 972  *** SVE Compute Vector Address Group
 973  */
 974
 975 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 976 {
 977     if (sve_access_check(s)) {
 978         unsigned vsz = vec_full_reg_size(s);
 979         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 980                            vec_full_reg_offset(s, a->rn),
 981                            vec_full_reg_offset(s, a->rm),
 982                            vsz, vsz, a->imm, fn);
 983     }
 984     return true;
 985 }
 986
 987 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 988 {
 989     return do_adr(s, a, gen_helper_sve_adr_p32);
 990 }
 991
 992 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 993 {
 994     return do_adr(s, a, gen_helper_sve_adr_p64);
 995 }
 996
 997 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
 998 {
 999     return do_adr(s, a, gen_helper_sve_adr_s32);
1000 }
1001
1002 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1003 {
1004     return do_adr(s, a, gen_helper_sve_adr_u32);
1005 }
1006
1007 /*
1008  *** SVE Integer Misc - Unpredicated Group
1009  */
1010
1011 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1012 {
1013     static gen_helper_gvec_2 * const fns[4] = {
1014         NULL,
1015         gen_helper_sve_fexpa_h,
1016         gen_helper_sve_fexpa_s,
1017         gen_helper_sve_fexpa_d,
1018     };
1019     if (a->esz == 0) {
1020         return false;
1021     }
1022     if (sve_access_check(s)) {
1023         unsigned vsz = vec_full_reg_size(s);
1024         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1025                            vec_full_reg_offset(s, a->rn),
1026                            vsz, vsz, 0, fns[a->esz]);
1027     }
1028     return true;
1029 }
1030
1031 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1032 {
1033     static gen_helper_gvec_3 * const fns[4] = {
1034         NULL,
1035         gen_helper_sve_ftssel_h,
1036         gen_helper_sve_ftssel_s,
1037         gen_helper_sve_ftssel_d,
1038     };
1039     if (a->esz == 0) {
1040         return false;
1041     }
1042     if (sve_access_check(s)) {
1043         unsigned vsz = vec_full_reg_size(s);
1044         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1045                            vec_full_reg_offset(s, a->rn),
1046                            vec_full_reg_offset(s, a->rm),
1047                            vsz, vsz, 0, fns[a->esz]);
1048     }
1049     return true;
1050 }
1051
1052 /*
1053  *** SVE Predicate Logical Operations Group
1054  */
1055
1056 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1057                           const GVecGen4 *gvec_op)
1058 {
1059     if (!sve_access_check(s)) {
1060         return true;
1061     }
1062
1063     unsigned psz = pred_gvec_reg_size(s);
1064     int dofs = pred_full_reg_offset(s, a->rd);
1065     int nofs = pred_full_reg_offset(s, a->rn);
1066     int mofs = pred_full_reg_offset(s, a->rm);
1067     int gofs = pred_full_reg_offset(s, a->pg);
1068
1069     if (psz == 8) {
1070         /* Do the operation and the flags generation in temps.  */
1071         TCGv_i64 pd = tcg_temp_new_i64();
1072         TCGv_i64 pn = tcg_temp_new_i64();
1073         TCGv_i64 pm = tcg_temp_new_i64();
1074         TCGv_i64 pg = tcg_temp_new_i64();
1075
1076         tcg_gen_ld_i64(pn, cpu_env, nofs);
1077         tcg_gen_ld_i64(pm, cpu_env, mofs);
1078         tcg_gen_ld_i64(pg, cpu_env, gofs);
1079
1080         gvec_op->fni8(pd, pn, pm, pg);
1081         tcg_gen_st_i64(pd, cpu_env, dofs);
1082
1083         do_predtest1(pd, pg);
1084
1085         tcg_temp_free_i64(pd);
1086         tcg_temp_free_i64(pn);
1087         tcg_temp_free_i64(pm);
1088         tcg_temp_free_i64(pg);
1089     } else {
1090         /* The operation and flags generation is large.  The computation
1091          * of the flags depends on the original contents of the guarding
1092          * predicate.  If the destination overwrites the guarding predicate,
1093          * then the easiest way to get this right is to save a copy.
1094           */
1095         int tofs = gofs;
1096         if (a->rd == a->pg) {
1097             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1098             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1099         }
1100
1101         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1102         do_predtest(s, dofs, tofs, psz / 8);
1103     }
1104     return true;
1105 }
1106
1107 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1108 {
1109     tcg_gen_and_i64(pd, pn, pm);
1110     tcg_gen_and_i64(pd, pd, pg);
1111 }
1112
1113 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1114                            TCGv_vec pm, TCGv_vec pg)
1115 {
1116     tcg_gen_and_vec(vece, pd, pn, pm);
1117     tcg_gen_and_vec(vece, pd, pd, pg);
1118 }
1119
1120 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1121 {
1122     static const GVecGen4 op = {
1123         .fni8 = gen_and_pg_i64,
1124         .fniv = gen_and_pg_vec,
1125         .fno = gen_helper_sve_and_pppp,
1126         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1127     };
1128     if (a->s) {
1129         return do_pppp_flags(s, a, &op);
1130     } else if (a->rn == a->rm) {
1131         if (a->pg == a->rn) {
1132             return do_mov_p(s, a->rd, a->rn);
1133         } else {
1134             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1135         }
1136     } else if (a->pg == a->rn || a->pg == a->rm) {
1137         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1138     } else {
1139         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1140     }
1141 }
1142
1143 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1144 {
1145     tcg_gen_andc_i64(pd, pn, pm);
1146     tcg_gen_and_i64(pd, pd, pg);
1147 }
1148
1149 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1150                            TCGv_vec pm, TCGv_vec pg)
1151 {
1152     tcg_gen_andc_vec(vece, pd, pn, pm);
1153     tcg_gen_and_vec(vece, pd, pd, pg);
1154 }
1155
1156 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1157 {
1158     static const GVecGen4 op = {
1159         .fni8 = gen_bic_pg_i64,
1160         .fniv = gen_bic_pg_vec,
1161         .fno = gen_helper_sve_bic_pppp,
1162         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1163     };
1164     if (a->s) {
1165         return do_pppp_flags(s, a, &op);
1166     } else if (a->pg == a->rn) {
1167         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1168     } else {
1169         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1170     }
1171 }
1172
1173 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1174 {
1175     tcg_gen_xor_i64(pd, pn, pm);
1176     tcg_gen_and_i64(pd, pd, pg);
1177 }
1178
1179 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1180                            TCGv_vec pm, TCGv_vec pg)
1181 {
1182     tcg_gen_xor_vec(vece, pd, pn, pm);
1183     tcg_gen_and_vec(vece, pd, pd, pg);
1184 }
1185
1186 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1187 {
1188     static const GVecGen4 op = {
1189         .fni8 = gen_eor_pg_i64,
1190         .fniv = gen_eor_pg_vec,
1191         .fno = gen_helper_sve_eor_pppp,
1192         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1193     };
1194     if (a->s) {
1195         return do_pppp_flags(s, a, &op);
1196     } else {
1197         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1198     }
1199 }
1200
1201 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1202 {
1203     tcg_gen_and_i64(pn, pn, pg);
1204     tcg_gen_andc_i64(pm, pm, pg);
1205     tcg_gen_or_i64(pd, pn, pm);
1206 }
1207
1208 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1209                            TCGv_vec pm, TCGv_vec pg)
1210 {
1211     tcg_gen_and_vec(vece, pn, pn, pg);
1212     tcg_gen_andc_vec(vece, pm, pm, pg);
1213     tcg_gen_or_vec(vece, pd, pn, pm);
1214 }
1215
1216 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1217 {
1218     static const GVecGen4 op = {
1219         .fni8 = gen_sel_pg_i64,
1220         .fniv = gen_sel_pg_vec,
1221         .fno = gen_helper_sve_sel_pppp,
1222         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1223     };
1224     if (a->s) {
1225         return false;
1226     } else {
1227         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1228     }
1229 }
1230
1231 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1232 {
1233     tcg_gen_or_i64(pd, pn, pm);
1234     tcg_gen_and_i64(pd, pd, pg);
1235 }
1236
1237 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1238                            TCGv_vec pm, TCGv_vec pg)
1239 {
1240     tcg_gen_or_vec(vece, pd, pn, pm);
1241     tcg_gen_and_vec(vece, pd, pd, pg);
1242 }
1243
1244 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1245 {
1246     static const GVecGen4 op = {
1247         .fni8 = gen_orr_pg_i64,
1248         .fniv = gen_orr_pg_vec,
1249         .fno = gen_helper_sve_orr_pppp,
1250         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1251     };
1252     if (a->s) {
1253         return do_pppp_flags(s, a, &op);
1254     } else if (a->pg == a->rn && a->rn == a->rm) {
1255         return do_mov_p(s, a->rd, a->rn);
1256     } else {
1257         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1258     }
1259 }
1260
1261 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1262 {
1263     tcg_gen_orc_i64(pd, pn, pm);
1264     tcg_gen_and_i64(pd, pd, pg);
1265 }
1266
1267 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1268                            TCGv_vec pm, TCGv_vec pg)
1269 {
1270     tcg_gen_orc_vec(vece, pd, pn, pm);
1271     tcg_gen_and_vec(vece, pd, pd, pg);
1272 }
1273
1274 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1275 {
1276     static const GVecGen4 op = {
1277         .fni8 = gen_orn_pg_i64,
1278         .fniv = gen_orn_pg_vec,
1279         .fno = gen_helper_sve_orn_pppp,
1280         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1281     };
1282     if (a->s) {
1283         return do_pppp_flags(s, a, &op);
1284     } else {
1285         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1286     }
1287 }
1288
1289 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1290 {
1291     tcg_gen_or_i64(pd, pn, pm);
1292     tcg_gen_andc_i64(pd, pg, pd);
1293 }
1294
1295 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1296                            TCGv_vec pm, TCGv_vec pg)
1297 {
1298     tcg_gen_or_vec(vece, pd, pn, pm);
1299     tcg_gen_andc_vec(vece, pd, pg, pd);
1300 }
1301
1302 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1303 {
1304     static const GVecGen4 op = {
1305         .fni8 = gen_nor_pg_i64,
1306         .fniv = gen_nor_pg_vec,
1307         .fno = gen_helper_sve_nor_pppp,
1308         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1309     };
1310     if (a->s) {
1311         return do_pppp_flags(s, a, &op);
1312     } else {
1313         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1314     }
1315 }
1316
1317 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1318 {
1319     tcg_gen_and_i64(pd, pn, pm);
1320     tcg_gen_andc_i64(pd, pg, pd);
1321 }
1322
1323 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1324                            TCGv_vec pm, TCGv_vec pg)
1325 {
1326     tcg_gen_and_vec(vece, pd, pn, pm);
1327     tcg_gen_andc_vec(vece, pd, pg, pd);
1328 }
1329
1330 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1331 {
1332     static const GVecGen4 op = {
1333         .fni8 = gen_nand_pg_i64,
1334         .fniv = gen_nand_pg_vec,
1335         .fno = gen_helper_sve_nand_pppp,
1336         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1337     };
1338     if (a->s) {
1339         return do_pppp_flags(s, a, &op);
1340     } else {
1341         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1342     }
1343 }
1344
1345 /*
1346  *** SVE Predicate Misc Group
1347  */
1348
1349 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1350 {
1351     if (sve_access_check(s)) {
1352         int nofs = pred_full_reg_offset(s, a->rn);
1353         int gofs = pred_full_reg_offset(s, a->pg);
1354         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1355
1356         if (words == 1) {
1357             TCGv_i64 pn = tcg_temp_new_i64();
1358             TCGv_i64 pg = tcg_temp_new_i64();
1359
1360             tcg_gen_ld_i64(pn, cpu_env, nofs);
1361             tcg_gen_ld_i64(pg, cpu_env, gofs);
1362             do_predtest1(pn, pg);
1363
1364             tcg_temp_free_i64(pn);
1365             tcg_temp_free_i64(pg);
1366         } else {
1367             do_predtest(s, nofs, gofs, words);
1368         }
1369     }
1370     return true;
1371 }
1372
1373 /* See the ARM pseudocode DecodePredCount.  */
1374 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1375 {
1376     unsigned elements = fullsz >> esz;
1377     unsigned bound;
1378
1379     switch (pattern) {
1380     case 0x0: /* POW2 */
1381         return pow2floor(elements);
1382     case 0x1: /* VL1 */
1383     case 0x2: /* VL2 */
1384     case 0x3: /* VL3 */
1385     case 0x4: /* VL4 */
1386     case 0x5: /* VL5 */
1387     case 0x6: /* VL6 */
1388     case 0x7: /* VL7 */
1389     case 0x8: /* VL8 */
1390         bound = pattern;
1391         break;
1392     case 0x9: /* VL16 */
1393     case 0xa: /* VL32 */
1394     case 0xb: /* VL64 */
1395     case 0xc: /* VL128 */
1396     case 0xd: /* VL256 */
1397         bound = 16 << (pattern - 9);
1398         break;
1399     case 0x1d: /* MUL4 */
1400         return elements - elements % 4;
1401     case 0x1e: /* MUL3 */
1402         return elements - elements % 3;
1403     case 0x1f: /* ALL */
1404         return elements;
1405     default:   /* #uimm5 */
1406         return 0;
1407     }
1408     return elements >= bound ? bound : 0;
1409 }
1410
1411 /* This handles all of the predicate initialization instructions,
1412  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1413  * so that decode_pred_count returns 0.  For SETFFR, we will have
1414  * set RD == 16 == FFR.
1415  */
1416 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1417 {
1418     if (!sve_access_check(s)) {
1419         return true;
1420     }
1421
1422     unsigned fullsz = vec_full_reg_size(s);
1423     unsigned ofs = pred_full_reg_offset(s, rd);
1424     unsigned numelem, setsz, i;
1425     uint64_t word, lastword;
1426     TCGv_i64 t;
1427
1428     numelem = decode_pred_count(fullsz, pat, esz);
1429
1430     /* Determine what we must store into each bit, and how many.  */
1431     if (numelem == 0) {
1432         lastword = word = 0;
1433         setsz = fullsz;
1434     } else {
1435         setsz = numelem << esz;
1436         lastword = word = pred_esz_masks[esz];
1437         if (setsz % 64) {
1438             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1439         }
1440     }
1441
1442     t = tcg_temp_new_i64();
1443     if (fullsz <= 64) {
1444         tcg_gen_movi_i64(t, lastword);
1445         tcg_gen_st_i64(t, cpu_env, ofs);
1446         goto done;
1447     }
1448
1449     if (word == lastword) {
1450         unsigned maxsz = size_for_gvec(fullsz / 8);
1451         unsigned oprsz = size_for_gvec(setsz / 8);
1452
1453         if (oprsz * 8 == setsz) {
1454             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1455             goto done;
1456         }
1457     }
1458
1459     setsz /= 8;
1460     fullsz /= 8;
1461
1462     tcg_gen_movi_i64(t, word);
1463     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1464         tcg_gen_st_i64(t, cpu_env, ofs + i);
1465     }
1466     if (lastword != word) {
1467         tcg_gen_movi_i64(t, lastword);
1468         tcg_gen_st_i64(t, cpu_env, ofs + i);
1469         i += 8;
1470     }
1471     if (i < fullsz) {
1472         tcg_gen_movi_i64(t, 0);
1473         for (; i < fullsz; i += 8) {
1474             tcg_gen_st_i64(t, cpu_env, ofs + i);
1475         }
1476     }
1477
1478  done:
1479     tcg_temp_free_i64(t);
1480
1481     /* PTRUES */
1482     if (setflag) {
1483         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1484         tcg_gen_movi_i32(cpu_CF, word == 0);
1485         tcg_gen_movi_i32(cpu_VF, 0);
1486         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1487     }
1488     return true;
1489 }
1490
1491 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1492 {
1493     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1494 }
1495
1496 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1497 {
1498     /* Note pat == 31 is #all, to set all elements.  */
1499     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1500 }
1501
1502 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1503 {
1504     /* Note pat == 32 is #unimp, to set no elements.  */
1505     return do_predset(s, 0, a->rd, 32, false);
1506 }
1507
1508 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1509 {
1510     /* The path through do_pppp_flags is complicated enough to want to avoid
1511      * duplication.  Frob the arguments into the form of a predicated AND.
1512      */
1513     arg_rprr_s alt_a = {
1514         .rd = a->rd, .pg = a->pg, .s = a->s,
1515         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1516     };
1517     return trans_AND_pppp(s, &alt_a);
1518 }
1519
1520 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1521 {
1522     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1523 }
1524
1525 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1526 {
1527     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1528 }
1529
1530 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1531                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1532                                            TCGv_ptr, TCGv_i32))
1533 {
1534     if (!sve_access_check(s)) {
1535         return true;
1536     }
1537
1538     TCGv_ptr t_pd = tcg_temp_new_ptr();
1539     TCGv_ptr t_pg = tcg_temp_new_ptr();
1540     TCGv_i32 t;
1541     unsigned desc;
1542
1543     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1544     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1545
1546     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1547     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1548     t = tcg_const_i32(desc);
1549
1550     gen_fn(t, t_pd, t_pg, t);
1551     tcg_temp_free_ptr(t_pd);
1552     tcg_temp_free_ptr(t_pg);
1553
1554     do_pred_flags(t);
1555     tcg_temp_free_i32(t);
1556     return true;
1557 }
1558
1559 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1560 {
1561     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1562 }
1563
1564 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1565 {
1566     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1567 }
1568
1569 /*
1570  *** SVE Element Count Group
1571  */
1572
1573 /* Perform an inline saturating addition of a 32-bit value within
1574  * a 64-bit register.  The second operand is known to be positive,
1575  * which halves the comparisions we must perform to bound the result.
1576  */
1577 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1578 {
1579     int64_t ibound;
1580     TCGv_i64 bound;
1581     TCGCond cond;
1582
1583     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1584     if (u) {
1585         tcg_gen_ext32u_i64(reg, reg);
1586     } else {
1587         tcg_gen_ext32s_i64(reg, reg);
1588     }
1589     if (d) {
1590         tcg_gen_sub_i64(reg, reg, val);
1591         ibound = (u ? 0 : INT32_MIN);
1592         cond = TCG_COND_LT;
1593     } else {
1594         tcg_gen_add_i64(reg, reg, val);
1595         ibound = (u ? UINT32_MAX : INT32_MAX);
1596         cond = TCG_COND_GT;
1597     }
1598     bound = tcg_const_i64(ibound);
1599     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1600     tcg_temp_free_i64(bound);
1601 }
1602
1603 /* Similarly with 64-bit values.  */
1604 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1605 {
1606     TCGv_i64 t0 = tcg_temp_new_i64();
1607     TCGv_i64 t1 = tcg_temp_new_i64();
1608     TCGv_i64 t2;
1609
1610     if (u) {
1611         if (d) {
1612             tcg_gen_sub_i64(t0, reg, val);
1613             tcg_gen_movi_i64(t1, 0);
1614             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1615         } else {
1616             tcg_gen_add_i64(t0, reg, val);
1617             tcg_gen_movi_i64(t1, -1);
1618             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1619         }
1620     } else {
1621         if (d) {
1622             /* Detect signed overflow for subtraction.  */
1623             tcg_gen_xor_i64(t0, reg, val);
1624             tcg_gen_sub_i64(t1, reg, val);
1625             tcg_gen_xor_i64(reg, reg, t1);
1626             tcg_gen_and_i64(t0, t0, reg);
1627
1628             /* Bound the result.  */
1629             tcg_gen_movi_i64(reg, INT64_MIN);
1630             t2 = tcg_const_i64(0);
1631             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1632         } else {
1633             /* Detect signed overflow for addition.  */
1634             tcg_gen_xor_i64(t0, reg, val);
1635             tcg_gen_add_i64(reg, reg, val);
1636             tcg_gen_xor_i64(t1, reg, val);
1637             tcg_gen_andc_i64(t0, t1, t0);
1638
1639             /* Bound the result.  */
1640             tcg_gen_movi_i64(t1, INT64_MAX);
1641             t2 = tcg_const_i64(0);
1642             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1643         }
1644         tcg_temp_free_i64(t2);
1645     }
1646     tcg_temp_free_i64(t0);
1647     tcg_temp_free_i64(t1);
1648 }
1649
1650 /* Similarly with a vector and a scalar operand.  */
1651 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1652                               TCGv_i64 val, bool u, bool d)
1653 {
1654     unsigned vsz = vec_full_reg_size(s);
1655     TCGv_ptr dptr, nptr;
1656     TCGv_i32 t32, desc;
1657     TCGv_i64 t64;
1658
1659     dptr = tcg_temp_new_ptr();
1660     nptr = tcg_temp_new_ptr();
1661     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1662     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1663     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1664
1665     switch (esz) {
1666     case MO_8:
1667         t32 = tcg_temp_new_i32();
1668         tcg_gen_extrl_i64_i32(t32, val);
1669         if (d) {
1670             tcg_gen_neg_i32(t32, t32);
1671         }
1672         if (u) {
1673             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1674         } else {
1675             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1676         }
1677         tcg_temp_free_i32(t32);
1678         break;
1679
1680     case MO_16:
1681         t32 = tcg_temp_new_i32();
1682         tcg_gen_extrl_i64_i32(t32, val);
1683         if (d) {
1684             tcg_gen_neg_i32(t32, t32);
1685         }
1686         if (u) {
1687             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1688         } else {
1689             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1690         }
1691         tcg_temp_free_i32(t32);
1692         break;
1693
1694     case MO_32:
1695         t64 = tcg_temp_new_i64();
1696         if (d) {
1697             tcg_gen_neg_i64(t64, val);
1698         } else {
1699             tcg_gen_mov_i64(t64, val);
1700         }
1701         if (u) {
1702             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1703         } else {
1704             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1705         }
1706         tcg_temp_free_i64(t64);
1707         break;
1708
1709     case MO_64:
1710         if (u) {
1711             if (d) {
1712                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1713             } else {
1714                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1715             }
1716         } else if (d) {
1717             t64 = tcg_temp_new_i64();
1718             tcg_gen_neg_i64(t64, val);
1719             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1720             tcg_temp_free_i64(t64);
1721         } else {
1722             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1723         }
1724         break;
1725
1726     default:
1727         g_assert_not_reached();
1728     }
1729
1730     tcg_temp_free_ptr(dptr);
1731     tcg_temp_free_ptr(nptr);
1732     tcg_temp_free_i32(desc);
1733 }
1734
1735 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1736 {
1737     if (sve_access_check(s)) {
1738         unsigned fullsz = vec_full_reg_size(s);
1739         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1740         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1741     }
1742     return true;
1743 }
1744
1745 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1746 {
1747     if (sve_access_check(s)) {
1748         unsigned fullsz = vec_full_reg_size(s);
1749         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1750         int inc = numelem * a->imm * (a->d ? -1 : 1);
1751         TCGv_i64 reg = cpu_reg(s, a->rd);
1752
1753         tcg_gen_addi_i64(reg, reg, inc);
1754     }
1755     return true;
1756 }
1757
1758 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1759 {
1760     if (!sve_access_check(s)) {
1761         return true;
1762     }
1763
1764     unsigned fullsz = vec_full_reg_size(s);
1765     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1766     int inc = numelem * a->imm;
1767     TCGv_i64 reg = cpu_reg(s, a->rd);
1768
1769     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1770     if (inc == 0) {
1771         if (a->u) {
1772             tcg_gen_ext32u_i64(reg, reg);
1773         } else {
1774             tcg_gen_ext32s_i64(reg, reg);
1775         }
1776     } else {
1777         TCGv_i64 t = tcg_const_i64(inc);
1778         do_sat_addsub_32(reg, t, a->u, a->d);
1779         tcg_temp_free_i64(t);
1780     }
1781     return true;
1782 }
1783
1784 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1785 {
1786     if (!sve_access_check(s)) {
1787         return true;
1788     }
1789
1790     unsigned fullsz = vec_full_reg_size(s);
1791     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1792     int inc = numelem * a->imm;
1793     TCGv_i64 reg = cpu_reg(s, a->rd);
1794
1795     if (inc != 0) {
1796         TCGv_i64 t = tcg_const_i64(inc);
1797         do_sat_addsub_64(reg, t, a->u, a->d);
1798         tcg_temp_free_i64(t);
1799     }
1800     return true;
1801 }
1802
1803 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1804 {
1805     if (a->esz == 0) {
1806         return false;
1807     }
1808
1809     unsigned fullsz = vec_full_reg_size(s);
1810     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1811     int inc = numelem * a->imm;
1812
1813     if (inc != 0) {
1814         if (sve_access_check(s)) {
1815             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1816             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1817                               vec_full_reg_offset(s, a->rn),
1818                               t, fullsz, fullsz);
1819             tcg_temp_free_i64(t);
1820         }
1821     } else {
1822         do_mov_z(s, a->rd, a->rn);
1823     }
1824     return true;
1825 }
1826
1827 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1828 {
1829     if (a->esz == 0) {
1830         return false;
1831     }
1832
1833     unsigned fullsz = vec_full_reg_size(s);
1834     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1835     int inc = numelem * a->imm;
1836
1837     if (inc != 0) {
1838         if (sve_access_check(s)) {
1839             TCGv_i64 t = tcg_const_i64(inc);
1840             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1841             tcg_temp_free_i64(t);
1842         }
1843     } else {
1844         do_mov_z(s, a->rd, a->rn);
1845     }
1846     return true;
1847 }
1848
1849 /*
1850  *** SVE Bitwise Immediate Group
1851  */
1852
1853 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1854 {
1855     uint64_t imm;
1856     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1857                                 extract32(a->dbm, 0, 6),
1858                                 extract32(a->dbm, 6, 6))) {
1859         return false;
1860     }
1861     if (sve_access_check(s)) {
1862         unsigned vsz = vec_full_reg_size(s);
1863         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1864                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1865     }
1866     return true;
1867 }
1868
1869 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1870 {
1871     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1872 }
1873
1874 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1875 {
1876     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1877 }
1878
1879 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1880 {
1881     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1882 }
1883
1884 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1885 {
1886     uint64_t imm;
1887     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1888                                 extract32(a->dbm, 0, 6),
1889                                 extract32(a->dbm, 6, 6))) {
1890         return false;
1891     }
1892     if (sve_access_check(s)) {
1893         do_dupi_z(s, a->rd, imm);
1894     }
1895     return true;
1896 }
1897
1898 /*
1899  *** SVE Integer Wide Immediate - Predicated Group
1900  */
1901
1902 /* Implement all merging copies.  This is used for CPY (immediate),
1903  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1904  */
1905 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1906                      TCGv_i64 val)
1907 {
1908     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1909     static gen_cpy * const fns[4] = {
1910         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1911         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1912     };
1913     unsigned vsz = vec_full_reg_size(s);
1914     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1915     TCGv_ptr t_zd = tcg_temp_new_ptr();
1916     TCGv_ptr t_zn = tcg_temp_new_ptr();
1917     TCGv_ptr t_pg = tcg_temp_new_ptr();
1918
1919     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1920     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1921     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1922
1923     fns[esz](t_zd, t_zn, t_pg, val, desc);
1924
1925     tcg_temp_free_ptr(t_zd);
1926     tcg_temp_free_ptr(t_zn);
1927     tcg_temp_free_ptr(t_pg);
1928     tcg_temp_free_i32(desc);
1929 }
1930
1931 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1932 {
1933     if (a->esz == 0) {
1934         return false;
1935     }
1936     if (sve_access_check(s)) {
1937         /* Decode the VFP immediate.  */
1938         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1939         TCGv_i64 t_imm = tcg_const_i64(imm);
1940         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1941         tcg_temp_free_i64(t_imm);
1942     }
1943     return true;
1944 }
1945
1946 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1947 {
1948     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1949         return false;
1950     }
1951     if (sve_access_check(s)) {
1952         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1953         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1954         tcg_temp_free_i64(t_imm);
1955     }
1956     return true;
1957 }
1958
1959 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1960 {
1961     static gen_helper_gvec_2i * const fns[4] = {
1962         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1963         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1964     };
1965
1966     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1967         return false;
1968     }
1969     if (sve_access_check(s)) {
1970         unsigned vsz = vec_full_reg_size(s);
1971         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1972         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1973                             pred_full_reg_offset(s, a->pg),
1974                             t_imm, vsz, vsz, 0, fns[a->esz]);
1975         tcg_temp_free_i64(t_imm);
1976     }
1977     return true;
1978 }
1979
1980 /*
1981  *** SVE Permute Extract Group
1982  */
1983
1984 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1985 {
1986     if (!sve_access_check(s)) {
1987         return true;
1988     }
1989
1990     unsigned vsz = vec_full_reg_size(s);
1991     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1992     unsigned n_siz = vsz - n_ofs;
1993     unsigned d = vec_full_reg_offset(s, a->rd);
1994     unsigned n = vec_full_reg_offset(s, a->rn);
1995     unsigned m = vec_full_reg_offset(s, a->rm);
1996
1997     /* Use host vector move insns if we have appropriate sizes
1998      * and no unfortunate overlap.
1999      */
2000     if (m != d
2001         && n_ofs == size_for_gvec(n_ofs)
2002         && n_siz == size_for_gvec(n_siz)
2003         && (d != n || n_siz <= n_ofs)) {
2004         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2005         if (n_ofs != 0) {
2006             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2007         }
2008     } else {
2009         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2010     }
2011     return true;
2012 }
2013
2014 /*
2015  *** SVE Permute - Unpredicated Group
2016  */
2017
2018 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2019 {
2020     if (sve_access_check(s)) {
2021         unsigned vsz = vec_full_reg_size(s);
2022         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2023                              vsz, vsz, cpu_reg_sp(s, a->rn));
2024     }
2025     return true;
2026 }
2027
2028 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2029 {
2030     if ((a->imm & 0x1f) == 0) {
2031         return false;
2032     }
2033     if (sve_access_check(s)) {
2034         unsigned vsz = vec_full_reg_size(s);
2035         unsigned dofs = vec_full_reg_offset(s, a->rd);
2036         unsigned esz, index;
2037
2038         esz = ctz32(a->imm);
2039         index = a->imm >> (esz + 1);
2040
2041         if ((index << esz) < vsz) {
2042             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2043             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2044         } else {
2045             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2046         }
2047     }
2048     return true;
2049 }
2050
2051 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2052 {
2053     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2054     static gen_insr * const fns[4] = {
2055         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2056         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2057     };
2058     unsigned vsz = vec_full_reg_size(s);
2059     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2060     TCGv_ptr t_zd = tcg_temp_new_ptr();
2061     TCGv_ptr t_zn = tcg_temp_new_ptr();
2062
2063     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2064     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2065
2066     fns[a->esz](t_zd, t_zn, val, desc);
2067
2068     tcg_temp_free_ptr(t_zd);
2069     tcg_temp_free_ptr(t_zn);
2070     tcg_temp_free_i32(desc);
2071 }
2072
2073 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2074 {
2075     if (sve_access_check(s)) {
2076         TCGv_i64 t = tcg_temp_new_i64();
2077         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2078         do_insr_i64(s, a, t);
2079         tcg_temp_free_i64(t);
2080     }
2081     return true;
2082 }
2083
2084 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2085 {
2086     if (sve_access_check(s)) {
2087         do_insr_i64(s, a, cpu_reg(s, a->rm));
2088     }
2089     return true;
2090 }
2091
2092 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2093 {
2094     static gen_helper_gvec_2 * const fns[4] = {
2095         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2096         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2097     };
2098
2099     if (sve_access_check(s)) {
2100         unsigned vsz = vec_full_reg_size(s);
2101         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2102                            vec_full_reg_offset(s, a->rn),
2103                            vsz, vsz, 0, fns[a->esz]);
2104     }
2105     return true;
2106 }
2107
2108 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2109 {
2110     static gen_helper_gvec_3 * const fns[4] = {
2111         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2112         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2113     };
2114
2115     if (sve_access_check(s)) {
2116         unsigned vsz = vec_full_reg_size(s);
2117         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2118                            vec_full_reg_offset(s, a->rn),
2119                            vec_full_reg_offset(s, a->rm),
2120                            vsz, vsz, 0, fns[a->esz]);
2121     }
2122     return true;
2123 }
2124
2125 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2126 {
2127     static gen_helper_gvec_2 * const fns[4][2] = {
2128         { NULL, NULL },
2129         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2130         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2131         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2132     };
2133
2134     if (a->esz == 0) {
2135         return false;
2136     }
2137     if (sve_access_check(s)) {
2138         unsigned vsz = vec_full_reg_size(s);
2139         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2140                            vec_full_reg_offset(s, a->rn)
2141                            + (a->h ? vsz / 2 : 0),
2142                            vsz, vsz, 0, fns[a->esz][a->u]);
2143     }
2144     return true;
2145 }
2146
2147 /*
2148  *** SVE Permute - Predicates Group
2149  */
2150
2151 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2152                           gen_helper_gvec_3 *fn)
2153 {
2154     if (!sve_access_check(s)) {
2155         return true;
2156     }
2157
2158     unsigned vsz = pred_full_reg_size(s);
2159
2160     /* Predicate sizes may be smaller and cannot use simd_desc.
2161        We cannot round up, as we do elsewhere, because we need
2162        the exact size for ZIP2 and REV.  We retain the style for
2163        the other helpers for consistency.  */
2164     TCGv_ptr t_d = tcg_temp_new_ptr();
2165     TCGv_ptr t_n = tcg_temp_new_ptr();
2166     TCGv_ptr t_m = tcg_temp_new_ptr();
2167     TCGv_i32 t_desc;
2168     int desc;
2169
2170     desc = vsz - 2;
2171     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2172     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2173
2174     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2175     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2176     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2177     t_desc = tcg_const_i32(desc);
2178
2179     fn(t_d, t_n, t_m, t_desc);
2180
2181     tcg_temp_free_ptr(t_d);
2182     tcg_temp_free_ptr(t_n);
2183     tcg_temp_free_ptr(t_m);
2184     tcg_temp_free_i32(t_desc);
2185     return true;
2186 }
2187
2188 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2189                           gen_helper_gvec_2 *fn)
2190 {
2191     if (!sve_access_check(s)) {
2192         return true;
2193     }
2194
2195     unsigned vsz = pred_full_reg_size(s);
2196     TCGv_ptr t_d = tcg_temp_new_ptr();
2197     TCGv_ptr t_n = tcg_temp_new_ptr();
2198     TCGv_i32 t_desc;
2199     int desc;
2200
2201     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2202     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2203
2204     /* Predicate sizes may be smaller and cannot use simd_desc.
2205        We cannot round up, as we do elsewhere, because we need
2206        the exact size for ZIP2 and REV.  We retain the style for
2207        the other helpers for consistency.  */
2208
2209     desc = vsz - 2;
2210     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2211     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2212     t_desc = tcg_const_i32(desc);
2213
2214     fn(t_d, t_n, t_desc);
2215
2216     tcg_temp_free_i32(t_desc);
2217     tcg_temp_free_ptr(t_d);
2218     tcg_temp_free_ptr(t_n);
2219     return true;
2220 }
2221
2222 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2223 {
2224     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2225 }
2226
2227 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2228 {
2229     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2230 }
2231
2232 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2233 {
2234     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2235 }
2236
2237 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2238 {
2239     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2240 }
2241
2242 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2243 {
2244     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2245 }
2246
2247 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2248 {
2249     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2250 }
2251
2252 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2253 {
2254     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2255 }
2256
2257 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2258 {
2259     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2260 }
2261
2262 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2263 {
2264     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2265 }
2266
2267 /*
2268  *** SVE Permute - Interleaving Group
2269  */
2270
2271 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2272 {
2273     static gen_helper_gvec_3 * const fns[4] = {
2274         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2275         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2276     };
2277
2278     if (sve_access_check(s)) {
2279         unsigned vsz = vec_full_reg_size(s);
2280         unsigned high_ofs = high ? vsz / 2 : 0;
2281         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2282                            vec_full_reg_offset(s, a->rn) + high_ofs,
2283                            vec_full_reg_offset(s, a->rm) + high_ofs,
2284                            vsz, vsz, 0, fns[a->esz]);
2285     }
2286     return true;
2287 }
2288
2289 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2290                             gen_helper_gvec_3 *fn)
2291 {
2292     if (sve_access_check(s)) {
2293         unsigned vsz = vec_full_reg_size(s);
2294         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2295                            vec_full_reg_offset(s, a->rn),
2296                            vec_full_reg_offset(s, a->rm),
2297                            vsz, vsz, data, fn);
2298     }
2299     return true;
2300 }
2301
2302 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2303 {
2304     return do_zip(s, a, false);
2305 }
2306
2307 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2308 {
2309     return do_zip(s, a, true);
2310 }
2311
2312 static gen_helper_gvec_3 * const uzp_fns[4] = {
2313     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2314     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2315 };
2316
2317 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2318 {
2319     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2320 }
2321
2322 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2323 {
2324     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2325 }
2326
2327 static gen_helper_gvec_3 * const trn_fns[4] = {
2328     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2329     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2330 };
2331
2332 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2333 {
2334     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2335 }
2336
2337 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2338 {
2339     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2340 }
2341
2342 /*
2343  *** SVE Permute Vector - Predicated Group
2344  */
2345
2346 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2347 {
2348     static gen_helper_gvec_3 * const fns[4] = {
2349         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2350     };
2351     return do_zpz_ool(s, a, fns[a->esz]);
2352 }
2353
2354 /* Call the helper that computes the ARM LastActiveElement pseudocode
2355  * function, scaled by the element size.  This includes the not found
2356  * indication; e.g. not found for esz=3 is -8.
2357  */
2358 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2359 {
2360     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2361      * round up, as we do elsewhere, because we need the exact size.
2362      */
2363     TCGv_ptr t_p = tcg_temp_new_ptr();
2364     TCGv_i32 t_desc;
2365     unsigned vsz = pred_full_reg_size(s);
2366     unsigned desc;
2367
2368     desc = vsz - 2;
2369     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2370
2371     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2372     t_desc = tcg_const_i32(desc);
2373
2374     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2375
2376     tcg_temp_free_i32(t_desc);
2377     tcg_temp_free_ptr(t_p);
2378 }
2379
2380 /* Increment LAST to the offset of the next element in the vector,
2381  * wrapping around to 0.
2382  */
2383 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2384 {
2385     unsigned vsz = vec_full_reg_size(s);
2386
2387     tcg_gen_addi_i32(last, last, 1 << esz);
2388     if (is_power_of_2(vsz)) {
2389         tcg_gen_andi_i32(last, last, vsz - 1);
2390     } else {
2391         TCGv_i32 max = tcg_const_i32(vsz);
2392         TCGv_i32 zero = tcg_const_i32(0);
2393         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2394         tcg_temp_free_i32(max);
2395         tcg_temp_free_i32(zero);
2396     }
2397 }
2398
2399 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2400 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2401 {
2402     unsigned vsz = vec_full_reg_size(s);
2403
2404     if (is_power_of_2(vsz)) {
2405         tcg_gen_andi_i32(last, last, vsz - 1);
2406     } else {
2407         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2408         TCGv_i32 zero = tcg_const_i32(0);
2409         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2410         tcg_temp_free_i32(max);
2411         tcg_temp_free_i32(zero);
2412     }
2413 }
2414
2415 /* Load an unsigned element of ESZ from BASE+OFS.  */
2416 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2417 {
2418     TCGv_i64 r = tcg_temp_new_i64();
2419
2420     switch (esz) {
2421     case 0:
2422         tcg_gen_ld8u_i64(r, base, ofs);
2423         break;
2424     case 1:
2425         tcg_gen_ld16u_i64(r, base, ofs);
2426         break;
2427     case 2:
2428         tcg_gen_ld32u_i64(r, base, ofs);
2429         break;
2430     case 3:
2431         tcg_gen_ld_i64(r, base, ofs);
2432         break;
2433     default:
2434         g_assert_not_reached();
2435     }
2436     return r;
2437 }
2438
2439 /* Load an unsigned element of ESZ from RM[LAST].  */
2440 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2441                                  int rm, int esz)
2442 {
2443     TCGv_ptr p = tcg_temp_new_ptr();
2444     TCGv_i64 r;
2445
2446     /* Convert offset into vector into offset into ENV.
2447      * The final adjustment for the vector register base
2448      * is added via constant offset to the load.
2449      */
2450 #ifdef HOST_WORDS_BIGENDIAN
2451     /* Adjust for element ordering.  See vec_reg_offset.  */
2452     if (esz < 3) {
2453         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2454     }
2455 #endif
2456     tcg_gen_ext_i32_ptr(p, last);
2457     tcg_gen_add_ptr(p, p, cpu_env);
2458
2459     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2460     tcg_temp_free_ptr(p);
2461
2462     return r;
2463 }
2464
2465 /* Compute CLAST for a Zreg.  */
2466 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2467 {
2468     TCGv_i32 last;
2469     TCGLabel *over;
2470     TCGv_i64 ele;
2471     unsigned vsz, esz = a->esz;
2472
2473     if (!sve_access_check(s)) {
2474         return true;
2475     }
2476
2477     last = tcg_temp_local_new_i32();
2478     over = gen_new_label();
2479
2480     find_last_active(s, last, esz, a->pg);
2481
2482     /* There is of course no movcond for a 2048-bit vector,
2483      * so we must branch over the actual store.
2484      */
2485     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2486
2487     if (!before) {
2488         incr_last_active(s, last, esz);
2489     }
2490
2491     ele = load_last_active(s, last, a->rm, esz);
2492     tcg_temp_free_i32(last);
2493
2494     vsz = vec_full_reg_size(s);
2495     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2496     tcg_temp_free_i64(ele);
2497
2498     /* If this insn used MOVPRFX, we may need a second move.  */
2499     if (a->rd != a->rn) {
2500         TCGLabel *done = gen_new_label();
2501         tcg_gen_br(done);
2502
2503         gen_set_label(over);
2504         do_mov_z(s, a->rd, a->rn);
2505
2506         gen_set_label(done);
2507     } else {
2508         gen_set_label(over);
2509     }
2510     return true;
2511 }
2512
2513 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2514 {
2515     return do_clast_vector(s, a, false);
2516 }
2517
2518 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2519 {
2520     return do_clast_vector(s, a, true);
2521 }
2522
2523 /* Compute CLAST for a scalar.  */
2524 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2525                             bool before, TCGv_i64 reg_val)
2526 {
2527     TCGv_i32 last = tcg_temp_new_i32();
2528     TCGv_i64 ele, cmp, zero;
2529
2530     find_last_active(s, last, esz, pg);
2531
2532     /* Extend the original value of last prior to incrementing.  */
2533     cmp = tcg_temp_new_i64();
2534     tcg_gen_ext_i32_i64(cmp, last);
2535
2536     if (!before) {
2537         incr_last_active(s, last, esz);
2538     }
2539
2540     /* The conceit here is that while last < 0 indicates not found, after
2541      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2542      * from which we can load garbage.  We then discard the garbage with
2543      * a conditional move.
2544      */
2545     ele = load_last_active(s, last, rm, esz);
2546     tcg_temp_free_i32(last);
2547
2548     zero = tcg_const_i64(0);
2549     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2550
2551     tcg_temp_free_i64(zero);
2552     tcg_temp_free_i64(cmp);
2553     tcg_temp_free_i64(ele);
2554 }
2555
2556 /* Compute CLAST for a Vreg.  */
2557 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2558 {
2559     if (sve_access_check(s)) {
2560         int esz = a->esz;
2561         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2562         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2563
2564         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2565         write_fp_dreg(s, a->rd, reg);
2566         tcg_temp_free_i64(reg);
2567     }
2568     return true;
2569 }
2570
2571 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2572 {
2573     return do_clast_fp(s, a, false);
2574 }
2575
2576 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2577 {
2578     return do_clast_fp(s, a, true);
2579 }
2580
2581 /* Compute CLAST for a Xreg.  */
2582 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2583 {
2584     TCGv_i64 reg;
2585
2586     if (!sve_access_check(s)) {
2587         return true;
2588     }
2589
2590     reg = cpu_reg(s, a->rd);
2591     switch (a->esz) {
2592     case 0:
2593         tcg_gen_ext8u_i64(reg, reg);
2594         break;
2595     case 1:
2596         tcg_gen_ext16u_i64(reg, reg);
2597         break;
2598     case 2:
2599         tcg_gen_ext32u_i64(reg, reg);
2600         break;
2601     case 3:
2602         break;
2603     default:
2604         g_assert_not_reached();
2605     }
2606
2607     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2608     return true;
2609 }
2610
2611 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2612 {
2613     return do_clast_general(s, a, false);
2614 }
2615
2616 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2617 {
2618     return do_clast_general(s, a, true);
2619 }
2620
2621 /* Compute LAST for a scalar.  */
2622 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2623                                int pg, int rm, bool before)
2624 {
2625     TCGv_i32 last = tcg_temp_new_i32();
2626     TCGv_i64 ret;
2627
2628     find_last_active(s, last, esz, pg);
2629     if (before) {
2630         wrap_last_active(s, last, esz);
2631     } else {
2632         incr_last_active(s, last, esz);
2633     }
2634
2635     ret = load_last_active(s, last, rm, esz);
2636     tcg_temp_free_i32(last);
2637     return ret;
2638 }
2639
2640 /* Compute LAST for a Vreg.  */
2641 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2642 {
2643     if (sve_access_check(s)) {
2644         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2645         write_fp_dreg(s, a->rd, val);
2646         tcg_temp_free_i64(val);
2647     }
2648     return true;
2649 }
2650
2651 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2652 {
2653     return do_last_fp(s, a, false);
2654 }
2655
2656 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2657 {
2658     return do_last_fp(s, a, true);
2659 }
2660
2661 /* Compute LAST for a Xreg.  */
2662 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2663 {
2664     if (sve_access_check(s)) {
2665         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2666         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2667         tcg_temp_free_i64(val);
2668     }
2669     return true;
2670 }
2671
2672 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2673 {
2674     return do_last_general(s, a, false);
2675 }
2676
2677 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2678 {
2679     return do_last_general(s, a, true);
2680 }
2681
2682 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2683 {
2684     if (sve_access_check(s)) {
2685         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2686     }
2687     return true;
2688 }
2689
2690 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2691 {
2692     if (sve_access_check(s)) {
2693         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2694         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2695         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2696         tcg_temp_free_i64(t);
2697     }
2698     return true;
2699 }
2700
2701 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2702 {
2703     static gen_helper_gvec_3 * const fns[4] = {
2704         NULL,
2705         gen_helper_sve_revb_h,
2706         gen_helper_sve_revb_s,
2707         gen_helper_sve_revb_d,
2708     };
2709     return do_zpz_ool(s, a, fns[a->esz]);
2710 }
2711
2712 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2713 {
2714     static gen_helper_gvec_3 * const fns[4] = {
2715         NULL,
2716         NULL,
2717         gen_helper_sve_revh_s,
2718         gen_helper_sve_revh_d,
2719     };
2720     return do_zpz_ool(s, a, fns[a->esz]);
2721 }
2722
2723 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2724 {
2725     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2726 }
2727
2728 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2729 {
2730     static gen_helper_gvec_3 * const fns[4] = {
2731         gen_helper_sve_rbit_b,
2732         gen_helper_sve_rbit_h,
2733         gen_helper_sve_rbit_s,
2734         gen_helper_sve_rbit_d,
2735     };
2736     return do_zpz_ool(s, a, fns[a->esz]);
2737 }
2738
2739 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2740 {
2741     if (sve_access_check(s)) {
2742         unsigned vsz = vec_full_reg_size(s);
2743         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2744                            vec_full_reg_offset(s, a->rn),
2745                            vec_full_reg_offset(s, a->rm),
2746                            pred_full_reg_offset(s, a->pg),
2747                            vsz, vsz, a->esz, gen_helper_sve_splice);
2748     }
2749     return true;
2750 }
2751
2752 /*
2753  *** SVE Integer Compare - Vectors Group
2754  */
2755
2756 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2757                           gen_helper_gvec_flags_4 *gen_fn)
2758 {
2759     TCGv_ptr pd, zn, zm, pg;
2760     unsigned vsz;
2761     TCGv_i32 t;
2762
2763     if (gen_fn == NULL) {
2764         return false;
2765     }
2766     if (!sve_access_check(s)) {
2767         return true;
2768     }
2769
2770     vsz = vec_full_reg_size(s);
2771     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2772     pd = tcg_temp_new_ptr();
2773     zn = tcg_temp_new_ptr();
2774     zm = tcg_temp_new_ptr();
2775     pg = tcg_temp_new_ptr();
2776
2777     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2778     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2779     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2780     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2781
2782     gen_fn(t, pd, zn, zm, pg, t);
2783
2784     tcg_temp_free_ptr(pd);
2785     tcg_temp_free_ptr(zn);
2786     tcg_temp_free_ptr(zm);
2787     tcg_temp_free_ptr(pg);
2788
2789     do_pred_flags(t);
2790
2791     tcg_temp_free_i32(t);
2792     return true;
2793 }
2794
2795 #define DO_PPZZ(NAME, name) \
2796 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2797 {                                                                         \
2798     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2799         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2800         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2801     };                                                                    \
2802     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2803 }
2804
2805 DO_PPZZ(CMPEQ, cmpeq)
2806 DO_PPZZ(CMPNE, cmpne)
2807 DO_PPZZ(CMPGT, cmpgt)
2808 DO_PPZZ(CMPGE, cmpge)
2809 DO_PPZZ(CMPHI, cmphi)
2810 DO_PPZZ(CMPHS, cmphs)
2811
2812 #undef DO_PPZZ
2813
2814 #define DO_PPZW(NAME, name) \
2815 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2816 {                                                                         \
2817     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2818         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2819         gen_helper_sve_##name##_ppzw_s, NULL                              \
2820     };                                                                    \
2821     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2822 }
2823
2824 DO_PPZW(CMPEQ, cmpeq)
2825 DO_PPZW(CMPNE, cmpne)
2826 DO_PPZW(CMPGT, cmpgt)
2827 DO_PPZW(CMPGE, cmpge)
2828 DO_PPZW(CMPHI, cmphi)
2829 DO_PPZW(CMPHS, cmphs)
2830 DO_PPZW(CMPLT, cmplt)
2831 DO_PPZW(CMPLE, cmple)
2832 DO_PPZW(CMPLO, cmplo)
2833 DO_PPZW(CMPLS, cmpls)
2834
2835 #undef DO_PPZW
2836
2837 /*
2838  *** SVE Integer Compare - Immediate Groups
2839  */
2840
2841 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2842                           gen_helper_gvec_flags_3 *gen_fn)
2843 {
2844     TCGv_ptr pd, zn, pg;
2845     unsigned vsz;
2846     TCGv_i32 t;
2847
2848     if (gen_fn == NULL) {
2849         return false;
2850     }
2851     if (!sve_access_check(s)) {
2852         return true;
2853     }
2854
2855     vsz = vec_full_reg_size(s);
2856     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2857     pd = tcg_temp_new_ptr();
2858     zn = tcg_temp_new_ptr();
2859     pg = tcg_temp_new_ptr();
2860
2861     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2862     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2863     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2864
2865     gen_fn(t, pd, zn, pg, t);
2866
2867     tcg_temp_free_ptr(pd);
2868     tcg_temp_free_ptr(zn);
2869     tcg_temp_free_ptr(pg);
2870
2871     do_pred_flags(t);
2872
2873     tcg_temp_free_i32(t);
2874     return true;
2875 }
2876
2877 #define DO_PPZI(NAME, name) \
2878 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2879 {                                                                         \
2880     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2881         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2882         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2883     };                                                                    \
2884     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2885 }
2886
2887 DO_PPZI(CMPEQ, cmpeq)
2888 DO_PPZI(CMPNE, cmpne)
2889 DO_PPZI(CMPGT, cmpgt)
2890 DO_PPZI(CMPGE, cmpge)
2891 DO_PPZI(CMPHI, cmphi)
2892 DO_PPZI(CMPHS, cmphs)
2893 DO_PPZI(CMPLT, cmplt)
2894 DO_PPZI(CMPLE, cmple)
2895 DO_PPZI(CMPLO, cmplo)
2896 DO_PPZI(CMPLS, cmpls)
2897
2898 #undef DO_PPZI
2899
2900 /*
2901  *** SVE Partition Break Group
2902  */
2903
2904 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2905                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2906 {
2907     if (!sve_access_check(s)) {
2908         return true;
2909     }
2910
2911     unsigned vsz = pred_full_reg_size(s);
2912
2913     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2914     TCGv_ptr d = tcg_temp_new_ptr();
2915     TCGv_ptr n = tcg_temp_new_ptr();
2916     TCGv_ptr m = tcg_temp_new_ptr();
2917     TCGv_ptr g = tcg_temp_new_ptr();
2918     TCGv_i32 t = tcg_const_i32(vsz - 2);
2919
2920     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2921     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2922     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2923     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2924
2925     if (a->s) {
2926         fn_s(t, d, n, m, g, t);
2927         do_pred_flags(t);
2928     } else {
2929         fn(d, n, m, g, t);
2930     }
2931     tcg_temp_free_ptr(d);
2932     tcg_temp_free_ptr(n);
2933     tcg_temp_free_ptr(m);
2934     tcg_temp_free_ptr(g);
2935     tcg_temp_free_i32(t);
2936     return true;
2937 }
2938
2939 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2940                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2941 {
2942     if (!sve_access_check(s)) {
2943         return true;
2944     }
2945
2946     unsigned vsz = pred_full_reg_size(s);
2947
2948     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2949     TCGv_ptr d = tcg_temp_new_ptr();
2950     TCGv_ptr n = tcg_temp_new_ptr();
2951     TCGv_ptr g = tcg_temp_new_ptr();
2952     TCGv_i32 t = tcg_const_i32(vsz - 2);
2953
2954     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2955     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2956     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2957
2958     if (a->s) {
2959         fn_s(t, d, n, g, t);
2960         do_pred_flags(t);
2961     } else {
2962         fn(d, n, g, t);
2963     }
2964     tcg_temp_free_ptr(d);
2965     tcg_temp_free_ptr(n);
2966     tcg_temp_free_ptr(g);
2967     tcg_temp_free_i32(t);
2968     return true;
2969 }
2970
2971 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2972 {
2973     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2974 }
2975
2976 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2977 {
2978     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2979 }
2980
2981 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2982 {
2983     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2984 }
2985
2986 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2987 {
2988     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2989 }
2990
2991 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2992 {
2993     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2994 }
2995
2996 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2997 {
2998     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2999 }
3000
3001 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3002 {
3003     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3004 }
3005
3006 /*
3007  *** SVE Predicate Count Group
3008  */
3009
3010 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3011 {
3012     unsigned psz = pred_full_reg_size(s);
3013
3014     if (psz <= 8) {
3015         uint64_t psz_mask;
3016
3017         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3018         if (pn != pg) {
3019             TCGv_i64 g = tcg_temp_new_i64();
3020             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3021             tcg_gen_and_i64(val, val, g);
3022             tcg_temp_free_i64(g);
3023         }
3024
3025         /* Reduce the pred_esz_masks value simply to reduce the
3026          * size of the code generated here.
3027          */
3028         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3029         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3030
3031         tcg_gen_ctpop_i64(val, val);
3032     } else {
3033         TCGv_ptr t_pn = tcg_temp_new_ptr();
3034         TCGv_ptr t_pg = tcg_temp_new_ptr();
3035         unsigned desc;
3036         TCGv_i32 t_desc;
3037
3038         desc = psz - 2;
3039         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3040
3041         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3042         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3043         t_desc = tcg_const_i32(desc);
3044
3045         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3046         tcg_temp_free_ptr(t_pn);
3047         tcg_temp_free_ptr(t_pg);
3048         tcg_temp_free_i32(t_desc);
3049     }
3050 }
3051
3052 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3053 {
3054     if (sve_access_check(s)) {
3055         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3056     }
3057     return true;
3058 }
3059
3060 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3061 {
3062     if (sve_access_check(s)) {
3063         TCGv_i64 reg = cpu_reg(s, a->rd);
3064         TCGv_i64 val = tcg_temp_new_i64();
3065
3066         do_cntp(s, val, a->esz, a->pg, a->pg);
3067         if (a->d) {
3068             tcg_gen_sub_i64(reg, reg, val);
3069         } else {
3070             tcg_gen_add_i64(reg, reg, val);
3071         }
3072         tcg_temp_free_i64(val);
3073     }
3074     return true;
3075 }
3076
3077 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3078 {
3079     if (a->esz == 0) {
3080         return false;
3081     }
3082     if (sve_access_check(s)) {
3083         unsigned vsz = vec_full_reg_size(s);
3084         TCGv_i64 val = tcg_temp_new_i64();
3085         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3086
3087         do_cntp(s, val, a->esz, a->pg, a->pg);
3088         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3089                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3090     }
3091     return true;
3092 }
3093
3094 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3095 {
3096     if (sve_access_check(s)) {
3097         TCGv_i64 reg = cpu_reg(s, a->rd);
3098         TCGv_i64 val = tcg_temp_new_i64();
3099
3100         do_cntp(s, val, a->esz, a->pg, a->pg);
3101         do_sat_addsub_32(reg, val, a->u, a->d);
3102     }
3103     return true;
3104 }
3105
3106 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3107 {
3108     if (sve_access_check(s)) {
3109         TCGv_i64 reg = cpu_reg(s, a->rd);
3110         TCGv_i64 val = tcg_temp_new_i64();
3111
3112         do_cntp(s, val, a->esz, a->pg, a->pg);
3113         do_sat_addsub_64(reg, val, a->u, a->d);
3114     }
3115     return true;
3116 }
3117
3118 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3119 {
3120     if (a->esz == 0) {
3121         return false;
3122     }
3123     if (sve_access_check(s)) {
3124         TCGv_i64 val = tcg_temp_new_i64();
3125         do_cntp(s, val, a->esz, a->pg, a->pg);
3126         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3127     }
3128     return true;
3129 }
3130
3131 /*
3132  *** SVE Integer Compare Scalars Group
3133  */
3134
3135 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3136 {
3137     if (!sve_access_check(s)) {
3138         return true;
3139     }
3140
3141     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3142     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3143     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3144     TCGv_i64 cmp = tcg_temp_new_i64();
3145
3146     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3147     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3148     tcg_temp_free_i64(cmp);
3149
3150     /* VF = !NF & !CF.  */
3151     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3152     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3153
3154     /* Both NF and VF actually look at bit 31.  */
3155     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3156     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3157     return true;
3158 }
3159
3160 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3161 {
3162     TCGv_i64 op0, op1, t0, t1, tmax;
3163     TCGv_i32 t2, t3;
3164     TCGv_ptr ptr;
3165     unsigned desc, vsz = vec_full_reg_size(s);
3166     TCGCond cond;
3167
3168     if (!sve_access_check(s)) {
3169         return true;
3170     }
3171
3172     op0 = read_cpu_reg(s, a->rn, 1);
3173     op1 = read_cpu_reg(s, a->rm, 1);
3174
3175     if (!a->sf) {
3176         if (a->u) {
3177             tcg_gen_ext32u_i64(op0, op0);
3178             tcg_gen_ext32u_i64(op1, op1);
3179         } else {
3180             tcg_gen_ext32s_i64(op0, op0);
3181             tcg_gen_ext32s_i64(op1, op1);
3182         }
3183     }
3184
3185     /* For the helper, compress the different conditions into a computation
3186      * of how many iterations for which the condition is true.
3187      */
3188     t0 = tcg_temp_new_i64();
3189     t1 = tcg_temp_new_i64();
3190     tcg_gen_sub_i64(t0, op1, op0);
3191
3192     tmax = tcg_const_i64(vsz >> a->esz);
3193     if (a->eq) {
3194         /* Equality means one more iteration.  */
3195         tcg_gen_addi_i64(t0, t0, 1);
3196
3197         /* If op1 is max (un)signed integer (and the only time the addition
3198          * above could overflow), then we produce an all-true predicate by
3199          * setting the count to the vector length.  This is because the
3200          * pseudocode is described as an increment + compare loop, and the
3201          * max integer would always compare true.
3202          */
3203         tcg_gen_movi_i64(t1, (a->sf
3204                               ? (a->u ? UINT64_MAX : INT64_MAX)
3205                               : (a->u ? UINT32_MAX : INT32_MAX)));
3206         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3207     }
3208
3209     /* Bound to the maximum.  */
3210     tcg_gen_umin_i64(t0, t0, tmax);
3211     tcg_temp_free_i64(tmax);
3212
3213     /* Set the count to zero if the condition is false.  */
3214     cond = (a->u
3215             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3216             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3217     tcg_gen_movi_i64(t1, 0);
3218     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3219     tcg_temp_free_i64(t1);
3220
3221     /* Since we're bounded, pass as a 32-bit type.  */
3222     t2 = tcg_temp_new_i32();
3223     tcg_gen_extrl_i64_i32(t2, t0);
3224     tcg_temp_free_i64(t0);
3225
3226     /* Scale elements to bits.  */
3227     tcg_gen_shli_i32(t2, t2, a->esz);
3228
3229     desc = (vsz / 8) - 2;
3230     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3231     t3 = tcg_const_i32(desc);
3232
3233     ptr = tcg_temp_new_ptr();
3234     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3235
3236     gen_helper_sve_while(t2, ptr, t2, t3);
3237     do_pred_flags(t2);
3238
3239     tcg_temp_free_ptr(ptr);
3240     tcg_temp_free_i32(t2);
3241     tcg_temp_free_i32(t3);
3242     return true;
3243 }
3244
3245 /*
3246  *** SVE Integer Wide Immediate - Unpredicated Group
3247  */
3248
3249 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3250 {
3251     if (a->esz == 0) {
3252         return false;
3253     }
3254     if (sve_access_check(s)) {
3255         unsigned vsz = vec_full_reg_size(s);
3256         int dofs = vec_full_reg_offset(s, a->rd);
3257         uint64_t imm;
3258
3259         /* Decode the VFP immediate.  */
3260         imm = vfp_expand_imm(a->esz, a->imm);
3261         imm = dup_const(a->esz, imm);
3262
3263         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3264     }
3265     return true;
3266 }
3267
3268 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3269 {
3270     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3271         return false;
3272     }
3273     if (sve_access_check(s)) {
3274         unsigned vsz = vec_full_reg_size(s);
3275         int dofs = vec_full_reg_offset(s, a->rd);
3276
3277         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3278     }
3279     return true;
3280 }
3281
3282 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3283 {
3284     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3285         return false;
3286     }
3287     if (sve_access_check(s)) {
3288         unsigned vsz = vec_full_reg_size(s);
3289         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3290                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3291     }
3292     return true;
3293 }
3294
3295 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3296 {
3297     a->imm = -a->imm;
3298     return trans_ADD_zzi(s, a);
3299 }
3300
3301 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3302 {
3303     static const GVecGen2s op[4] = {
3304         { .fni8 = tcg_gen_vec_sub8_i64,
3305           .fniv = tcg_gen_sub_vec,
3306           .fno = gen_helper_sve_subri_b,
3307           .opc = INDEX_op_sub_vec,
3308           .vece = MO_8,
3309           .scalar_first = true },
3310         { .fni8 = tcg_gen_vec_sub16_i64,
3311           .fniv = tcg_gen_sub_vec,
3312           .fno = gen_helper_sve_subri_h,
3313           .opc = INDEX_op_sub_vec,
3314           .vece = MO_16,
3315           .scalar_first = true },
3316         { .fni4 = tcg_gen_sub_i32,
3317           .fniv = tcg_gen_sub_vec,
3318           .fno = gen_helper_sve_subri_s,
3319           .opc = INDEX_op_sub_vec,
3320           .vece = MO_32,
3321           .scalar_first = true },
3322         { .fni8 = tcg_gen_sub_i64,
3323           .fniv = tcg_gen_sub_vec,
3324           .fno = gen_helper_sve_subri_d,
3325           .opc = INDEX_op_sub_vec,
3326           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3327           .vece = MO_64,
3328           .scalar_first = true }
3329     };
3330
3331     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3332         return false;
3333     }
3334     if (sve_access_check(s)) {
3335         unsigned vsz = vec_full_reg_size(s);
3336         TCGv_i64 c = tcg_const_i64(a->imm);
3337         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3338                         vec_full_reg_offset(s, a->rn),
3339                         vsz, vsz, c, &op[a->esz]);
3340         tcg_temp_free_i64(c);
3341     }
3342     return true;
3343 }
3344
3345 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3346 {
3347     if (sve_access_check(s)) {
3348         unsigned vsz = vec_full_reg_size(s);
3349         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3350                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3351     }
3352     return true;
3353 }
3354
3355 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3356 {
3357     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3358         return false;
3359     }
3360     if (sve_access_check(s)) {
3361         TCGv_i64 val = tcg_const_i64(a->imm);
3362         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3363         tcg_temp_free_i64(val);
3364     }
3365     return true;
3366 }
3367
3368 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3369 {
3370     return do_zzi_sat(s, a, false, false);
3371 }
3372
3373 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3374 {
3375     return do_zzi_sat(s, a, true, false);
3376 }
3377
3378 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3379 {
3380     return do_zzi_sat(s, a, false, true);
3381 }
3382
3383 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3384 {
3385     return do_zzi_sat(s, a, true, true);
3386 }
3387
3388 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3389 {
3390     if (sve_access_check(s)) {
3391         unsigned vsz = vec_full_reg_size(s);
3392         TCGv_i64 c = tcg_const_i64(a->imm);
3393
3394         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3395                             vec_full_reg_offset(s, a->rn),
3396                             c, vsz, vsz, 0, fn);
3397         tcg_temp_free_i64(c);
3398     }
3399     return true;
3400 }
3401
3402 #define DO_ZZI(NAME, name) \
3403 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3404 {                                                                       \
3405     static gen_helper_gvec_2i * const fns[4] = {                        \
3406         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3407         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3408     };                                                                  \
3409     return do_zzi_ool(s, a, fns[a->esz]);                               \
3410 }
3411
3412 DO_ZZI(SMAX, smax)
3413 DO_ZZI(UMAX, umax)
3414 DO_ZZI(SMIN, smin)
3415 DO_ZZI(UMIN, umin)
3416
3417 #undef DO_ZZI
3418
3419 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3420 {
3421     static gen_helper_gvec_3 * const fns[2][2] = {
3422         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3423         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3424     };
3425
3426     if (sve_access_check(s)) {
3427         unsigned vsz = vec_full_reg_size(s);
3428         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3429                            vec_full_reg_offset(s, a->rn),
3430                            vec_full_reg_offset(s, a->rm),
3431                            vsz, vsz, 0, fns[a->u][a->sz]);
3432     }
3433     return true;
3434 }
3435
3436 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3437 {
3438     static gen_helper_gvec_3 * const fns[2][2] = {
3439         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3440         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3441     };
3442
3443     if (sve_access_check(s)) {
3444         unsigned vsz = vec_full_reg_size(s);
3445         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3446                            vec_full_reg_offset(s, a->rn),
3447                            vec_full_reg_offset(s, a->rm),
3448                            vsz, vsz, a->index, fns[a->u][a->sz]);
3449     }
3450     return true;
3451 }
3452
3453
3454 /*
3455  *** SVE Floating Point Multiply-Add Indexed Group
3456  */
3457
3458 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3459 {
3460     static gen_helper_gvec_4_ptr * const fns[3] = {
3461         gen_helper_gvec_fmla_idx_h,
3462         gen_helper_gvec_fmla_idx_s,
3463         gen_helper_gvec_fmla_idx_d,
3464     };
3465
3466     if (sve_access_check(s)) {
3467         unsigned vsz = vec_full_reg_size(s);
3468         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3469         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3470                            vec_full_reg_offset(s, a->rn),
3471                            vec_full_reg_offset(s, a->rm),
3472                            vec_full_reg_offset(s, a->ra),
3473                            status, vsz, vsz, (a->index << 1) | a->sub,
3474                            fns[a->esz - 1]);
3475         tcg_temp_free_ptr(status);
3476     }
3477     return true;
3478 }
3479
3480 /*
3481  *** SVE Floating Point Multiply Indexed Group
3482  */
3483
3484 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3485 {
3486     static gen_helper_gvec_3_ptr * const fns[3] = {
3487         gen_helper_gvec_fmul_idx_h,
3488         gen_helper_gvec_fmul_idx_s,
3489         gen_helper_gvec_fmul_idx_d,
3490     };
3491
3492     if (sve_access_check(s)) {
3493         unsigned vsz = vec_full_reg_size(s);
3494         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3495         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3496                            vec_full_reg_offset(s, a->rn),
3497                            vec_full_reg_offset(s, a->rm),
3498                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3499         tcg_temp_free_ptr(status);
3500     }
3501     return true;
3502 }
3503
3504 /*
3505  *** SVE Floating Point Fast Reduction Group
3506  */
3507
3508 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3509                                   TCGv_ptr, TCGv_i32);
3510
3511 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3512                       gen_helper_fp_reduce *fn)
3513 {
3514     unsigned vsz = vec_full_reg_size(s);
3515     unsigned p2vsz = pow2ceil(vsz);
3516     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3517     TCGv_ptr t_zn, t_pg, status;
3518     TCGv_i64 temp;
3519
3520     temp = tcg_temp_new_i64();
3521     t_zn = tcg_temp_new_ptr();
3522     t_pg = tcg_temp_new_ptr();
3523
3524     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3525     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3526     status = get_fpstatus_ptr(a->esz == MO_16);
3527
3528     fn(temp, t_zn, t_pg, status, t_desc);
3529     tcg_temp_free_ptr(t_zn);
3530     tcg_temp_free_ptr(t_pg);
3531     tcg_temp_free_ptr(status);
3532     tcg_temp_free_i32(t_desc);
3533
3534     write_fp_dreg(s, a->rd, temp);
3535     tcg_temp_free_i64(temp);
3536 }
3537
3538 #define DO_VPZ(NAME, name) \
3539 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3540 {                                                                        \
3541     static gen_helper_fp_reduce * const fns[3] = {                       \
3542         gen_helper_sve_##name##_h,                                       \
3543         gen_helper_sve_##name##_s,                                       \
3544         gen_helper_sve_##name##_d,                                       \
3545     };                                                                   \
3546     if (a->esz == 0) {                                                   \
3547         return false;                                                    \
3548     }                                                                    \
3549     if (sve_access_check(s)) {                                           \
3550         do_reduce(s, a, fns[a->esz - 1]);                                \
3551     }                                                                    \
3552     return true;                                                         \
3553 }
3554
3555 DO_VPZ(FADDV, faddv)
3556 DO_VPZ(FMINNMV, fminnmv)
3557 DO_VPZ(FMAXNMV, fmaxnmv)
3558 DO_VPZ(FMINV, fminv)
3559 DO_VPZ(FMAXV, fmaxv)
3560
3561 /*
3562  *** SVE Floating Point Unary Operations - Unpredicated Group
3563  */
3564
3565 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3566 {
3567     unsigned vsz = vec_full_reg_size(s);
3568     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3569
3570     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3571                        vec_full_reg_offset(s, a->rn),
3572                        status, vsz, vsz, 0, fn);
3573     tcg_temp_free_ptr(status);
3574 }
3575
3576 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3577 {
3578     static gen_helper_gvec_2_ptr * const fns[3] = {
3579         gen_helper_gvec_frecpe_h,
3580         gen_helper_gvec_frecpe_s,
3581         gen_helper_gvec_frecpe_d,
3582     };
3583     if (a->esz == 0) {
3584         return false;
3585     }
3586     if (sve_access_check(s)) {
3587         do_zz_fp(s, a, fns[a->esz - 1]);
3588     }
3589     return true;
3590 }
3591
3592 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3593 {
3594     static gen_helper_gvec_2_ptr * const fns[3] = {
3595         gen_helper_gvec_frsqrte_h,
3596         gen_helper_gvec_frsqrte_s,
3597         gen_helper_gvec_frsqrte_d,
3598     };
3599     if (a->esz == 0) {
3600         return false;
3601     }
3602     if (sve_access_check(s)) {
3603         do_zz_fp(s, a, fns[a->esz - 1]);
3604     }
3605     return true;
3606 }
3607
3608 /*
3609  *** SVE Floating Point Compare with Zero Group
3610  */
3611
3612 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3613                       gen_helper_gvec_3_ptr *fn)
3614 {
3615     unsigned vsz = vec_full_reg_size(s);
3616     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3617
3618     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3619                        vec_full_reg_offset(s, a->rn),
3620                        pred_full_reg_offset(s, a->pg),
3621                        status, vsz, vsz, 0, fn);
3622     tcg_temp_free_ptr(status);
3623 }
3624
3625 #define DO_PPZ(NAME, name) \
3626 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3627 {                                                                 \
3628     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3629         gen_helper_sve_##name##_h,                                \
3630         gen_helper_sve_##name##_s,                                \
3631         gen_helper_sve_##name##_d,                                \
3632     };                                                            \
3633     if (a->esz == 0) {                                            \
3634         return false;                                             \
3635     }                                                             \
3636     if (sve_access_check(s)) {                                    \
3637         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3638     }                                                             \
3639     return true;                                                  \
3640 }
3641
3642 DO_PPZ(FCMGE_ppz0, fcmge0)
3643 DO_PPZ(FCMGT_ppz0, fcmgt0)
3644 DO_PPZ(FCMLE_ppz0, fcmle0)
3645 DO_PPZ(FCMLT_ppz0, fcmlt0)
3646 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3647 DO_PPZ(FCMNE_ppz0, fcmne0)
3648
3649 #undef DO_PPZ
3650
3651 /*
3652  *** SVE floating-point trig multiply-add coefficient
3653  */
3654
3655 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3656 {
3657     static gen_helper_gvec_3_ptr * const fns[3] = {
3658         gen_helper_sve_ftmad_h,
3659         gen_helper_sve_ftmad_s,
3660         gen_helper_sve_ftmad_d,
3661     };
3662
3663     if (a->esz == 0) {
3664         return false;
3665     }
3666     if (sve_access_check(s)) {
3667         unsigned vsz = vec_full_reg_size(s);
3668         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3669         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3670                            vec_full_reg_offset(s, a->rn),
3671                            vec_full_reg_offset(s, a->rm),
3672                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3673         tcg_temp_free_ptr(status);
3674     }
3675     return true;
3676 }
3677
3678 /*
3679  *** SVE Floating Point Accumulating Reduction Group
3680  */
3681
3682 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3683 {
3684     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3685                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3686     static fadda_fn * const fns[3] = {
3687         gen_helper_sve_fadda_h,
3688         gen_helper_sve_fadda_s,
3689         gen_helper_sve_fadda_d,
3690     };
3691     unsigned vsz = vec_full_reg_size(s);
3692     TCGv_ptr t_rm, t_pg, t_fpst;
3693     TCGv_i64 t_val;
3694     TCGv_i32 t_desc;
3695
3696     if (a->esz == 0) {
3697         return false;
3698     }
3699     if (!sve_access_check(s)) {
3700         return true;
3701     }
3702
3703     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3704     t_rm = tcg_temp_new_ptr();
3705     t_pg = tcg_temp_new_ptr();
3706     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3707     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3708     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3709     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3710
3711     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3712
3713     tcg_temp_free_i32(t_desc);
3714     tcg_temp_free_ptr(t_fpst);
3715     tcg_temp_free_ptr(t_pg);
3716     tcg_temp_free_ptr(t_rm);
3717
3718     write_fp_dreg(s, a->rd, t_val);
3719     tcg_temp_free_i64(t_val);
3720     return true;
3721 }
3722
3723 /*
3724  *** SVE Floating Point Arithmetic - Unpredicated Group
3725  */
3726
3727 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3728                       gen_helper_gvec_3_ptr *fn)
3729 {
3730     if (fn == NULL) {
3731         return false;
3732     }
3733     if (sve_access_check(s)) {
3734         unsigned vsz = vec_full_reg_size(s);
3735         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3736         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3737                            vec_full_reg_offset(s, a->rn),
3738                            vec_full_reg_offset(s, a->rm),
3739                            status, vsz, vsz, 0, fn);
3740         tcg_temp_free_ptr(status);
3741     }
3742     return true;
3743 }
3744
3745
3746 #define DO_FP3(NAME, name) \
3747 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3748 {                                                                   \
3749     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3750         NULL, gen_helper_gvec_##name##_h,                           \
3751         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3752     };                                                              \
3753     return do_zzz_fp(s, a, fns[a->esz]);                            \
3754 }
3755
3756 DO_FP3(FADD_zzz, fadd)
3757 DO_FP3(FSUB_zzz, fsub)
3758 DO_FP3(FMUL_zzz, fmul)
3759 DO_FP3(FTSMUL, ftsmul)
3760 DO_FP3(FRECPS, recps)
3761 DO_FP3(FRSQRTS, rsqrts)
3762
3763 #undef DO_FP3
3764
3765 /*
3766  *** SVE Floating Point Arithmetic - Predicated Group
3767  */
3768
3769 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3770                        gen_helper_gvec_4_ptr *fn)
3771 {
3772     if (fn == NULL) {
3773         return false;
3774     }
3775     if (sve_access_check(s)) {
3776         unsigned vsz = vec_full_reg_size(s);
3777         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3778         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3779                            vec_full_reg_offset(s, a->rn),
3780                            vec_full_reg_offset(s, a->rm),
3781                            pred_full_reg_offset(s, a->pg),
3782                            status, vsz, vsz, 0, fn);
3783         tcg_temp_free_ptr(status);
3784     }
3785     return true;
3786 }
3787
3788 #define DO_FP3(NAME, name) \
3789 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3790 {                                                                   \
3791     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3792         NULL, gen_helper_sve_##name##_h,                            \
3793         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3794     };                                                              \
3795     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3796 }
3797
3798 DO_FP3(FADD_zpzz, fadd)
3799 DO_FP3(FSUB_zpzz, fsub)
3800 DO_FP3(FMUL_zpzz, fmul)
3801 DO_FP3(FMIN_zpzz, fmin)
3802 DO_FP3(FMAX_zpzz, fmax)
3803 DO_FP3(FMINNM_zpzz, fminnum)
3804 DO_FP3(FMAXNM_zpzz, fmaxnum)
3805 DO_FP3(FABD, fabd)
3806 DO_FP3(FSCALE, fscalbn)
3807 DO_FP3(FDIV, fdiv)
3808 DO_FP3(FMULX, fmulx)
3809
3810 #undef DO_FP3
3811
3812 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3813                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3814
3815 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3816                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3817 {
3818     unsigned vsz = vec_full_reg_size(s);
3819     TCGv_ptr t_zd, t_zn, t_pg, status;
3820     TCGv_i32 desc;
3821
3822     t_zd = tcg_temp_new_ptr();
3823     t_zn = tcg_temp_new_ptr();
3824     t_pg = tcg_temp_new_ptr();
3825     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3826     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3827     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3828
3829     status = get_fpstatus_ptr(is_fp16);
3830     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3831     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3832
3833     tcg_temp_free_i32(desc);
3834     tcg_temp_free_ptr(status);
3835     tcg_temp_free_ptr(t_pg);
3836     tcg_temp_free_ptr(t_zn);
3837     tcg_temp_free_ptr(t_zd);
3838 }
3839
3840 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3841                       gen_helper_sve_fp2scalar *fn)
3842 {
3843     TCGv_i64 temp = tcg_const_i64(imm);
3844     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3845     tcg_temp_free_i64(temp);
3846 }
3847
3848 #define DO_FP_IMM(NAME, name, const0, const1) \
3849 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3850 {                                                                         \
3851     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3852         gen_helper_sve_##name##_h,                                        \
3853         gen_helper_sve_##name##_s,                                        \
3854         gen_helper_sve_##name##_d                                         \
3855     };                                                                    \
3856     static uint64_t const val[3][2] = {                                   \
3857         { float16_##const0, float16_##const1 },                           \
3858         { float32_##const0, float32_##const1 },                           \
3859         { float64_##const0, float64_##const1 },                           \
3860     };                                                                    \
3861     if (a->esz == 0) {                                                    \
3862         return false;                                                     \
3863     }                                                                     \
3864     if (sve_access_check(s)) {                                            \
3865         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3866     }                                                                     \
3867     return true;                                                          \
3868 }
3869
3870 #define float16_two  make_float16(0x4000)
3871 #define float32_two  make_float32(0x40000000)
3872 #define float64_two  make_float64(0x4000000000000000ULL)
3873
3874 DO_FP_IMM(FADD, fadds, half, one)
3875 DO_FP_IMM(FSUB, fsubs, half, one)
3876 DO_FP_IMM(FMUL, fmuls, half, two)
3877 DO_FP_IMM(FSUBR, fsubrs, half, one)
3878 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3879 DO_FP_IMM(FMINNM, fminnms, zero, one)
3880 DO_FP_IMM(FMAX, fmaxs, zero, one)
3881 DO_FP_IMM(FMIN, fmins, zero, one)
3882
3883 #undef DO_FP_IMM
3884
3885 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3886                       gen_helper_gvec_4_ptr *fn)
3887 {
3888     if (fn == NULL) {
3889         return false;
3890     }
3891     if (sve_access_check(s)) {
3892         unsigned vsz = vec_full_reg_size(s);
3893         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3894         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3895                            vec_full_reg_offset(s, a->rn),
3896                            vec_full_reg_offset(s, a->rm),
3897                            pred_full_reg_offset(s, a->pg),
3898                            status, vsz, vsz, 0, fn);
3899         tcg_temp_free_ptr(status);
3900     }
3901     return true;
3902 }
3903
3904 #define DO_FPCMP(NAME, name) \
3905 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3906 {                                                                     \
3907     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3908         NULL, gen_helper_sve_##name##_h,                              \
3909         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3910     };                                                                \
3911     return do_fp_cmp(s, a, fns[a->esz]);                              \
3912 }
3913
3914 DO_FPCMP(FCMGE, fcmge)
3915 DO_FPCMP(FCMGT, fcmgt)
3916 DO_FPCMP(FCMEQ, fcmeq)
3917 DO_FPCMP(FCMNE, fcmne)
3918 DO_FPCMP(FCMUO, fcmuo)
3919 DO_FPCMP(FACGE, facge)
3920 DO_FPCMP(FACGT, facgt)
3921
3922 #undef DO_FPCMP
3923
3924 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3925 {
3926     static gen_helper_gvec_4_ptr * const fns[3] = {
3927         gen_helper_sve_fcadd_h,
3928         gen_helper_sve_fcadd_s,
3929         gen_helper_sve_fcadd_d
3930     };
3931
3932     if (a->esz == 0) {
3933         return false;
3934     }
3935     if (sve_access_check(s)) {
3936         unsigned vsz = vec_full_reg_size(s);
3937         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3938         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3939                            vec_full_reg_offset(s, a->rn),
3940                            vec_full_reg_offset(s, a->rm),
3941                            pred_full_reg_offset(s, a->pg),
3942                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3943         tcg_temp_free_ptr(status);
3944     }
3945     return true;
3946 }
3947
3948 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3949
3950 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3951 {
3952     if (fn == NULL) {
3953         return false;
3954     }
3955     if (!sve_access_check(s)) {
3956         return true;
3957     }
3958
3959     unsigned vsz = vec_full_reg_size(s);
3960     unsigned desc;
3961     TCGv_i32 t_desc;
3962     TCGv_ptr pg = tcg_temp_new_ptr();
3963
3964     /* We would need 7 operands to pass these arguments "properly".
3965      * So we encode all the register numbers into the descriptor.
3966      */
3967     desc = deposit32(a->rd, 5, 5, a->rn);
3968     desc = deposit32(desc, 10, 5, a->rm);
3969     desc = deposit32(desc, 15, 5, a->ra);
3970     desc = simd_desc(vsz, vsz, desc);
3971
3972     t_desc = tcg_const_i32(desc);
3973     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3974     fn(cpu_env, pg, t_desc);
3975     tcg_temp_free_i32(t_desc);
3976     tcg_temp_free_ptr(pg);
3977     return true;
3978 }
3979
3980 #define DO_FMLA(NAME, name) \
3981 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3982 {                                                                    \
3983     static gen_helper_sve_fmla * const fns[4] = {                    \
3984         NULL, gen_helper_sve_##name##_h,                             \
3985         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3986     };                                                               \
3987     return do_fmla(s, a, fns[a->esz]);                               \
3988 }
3989
3990 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3991 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3992 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3993 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3994
3995 #undef DO_FMLA
3996
3997 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3998 {
3999     static gen_helper_sve_fmla * const fns[3] = {
4000         gen_helper_sve_fcmla_zpzzz_h,
4001         gen_helper_sve_fcmla_zpzzz_s,
4002         gen_helper_sve_fcmla_zpzzz_d,
4003     };
4004
4005     if (a->esz == 0) {
4006         return false;
4007     }
4008     if (sve_access_check(s)) {
4009         unsigned vsz = vec_full_reg_size(s);
4010         unsigned desc;
4011         TCGv_i32 t_desc;
4012         TCGv_ptr pg = tcg_temp_new_ptr();
4013
4014         /* We would need 7 operands to pass these arguments "properly".
4015          * So we encode all the register numbers into the descriptor.
4016          */
4017         desc = deposit32(a->rd, 5, 5, a->rn);
4018         desc = deposit32(desc, 10, 5, a->rm);
4019         desc = deposit32(desc, 15, 5, a->ra);
4020         desc = deposit32(desc, 20, 2, a->rot);
4021         desc = sextract32(desc, 0, 22);
4022         desc = simd_desc(vsz, vsz, desc);
4023
4024         t_desc = tcg_const_i32(desc);
4025         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4026         fns[a->esz - 1](cpu_env, pg, t_desc);
4027         tcg_temp_free_i32(t_desc);
4028         tcg_temp_free_ptr(pg);
4029     }
4030     return true;
4031 }
4032
4033 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4034 {
4035     static gen_helper_gvec_3_ptr * const fns[2] = {
4036         gen_helper_gvec_fcmlah_idx,
4037         gen_helper_gvec_fcmlas_idx,
4038     };
4039
4040     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4041     tcg_debug_assert(a->rd == a->ra);
4042     if (sve_access_check(s)) {
4043         unsigned vsz = vec_full_reg_size(s);
4044         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4045         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4046                            vec_full_reg_offset(s, a->rn),
4047                            vec_full_reg_offset(s, a->rm),
4048                            status, vsz, vsz,
4049                            a->index * 4 + a->rot,
4050                            fns[a->esz - 1]);
4051         tcg_temp_free_ptr(status);
4052     }
4053     return true;
4054 }
4055
4056 /*
4057  *** SVE Floating Point Unary Operations Predicated Group
4058  */
4059
4060 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4061                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4062 {
4063     if (sve_access_check(s)) {
4064         unsigned vsz = vec_full_reg_size(s);
4065         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4066         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4067                            vec_full_reg_offset(s, rn),
4068                            pred_full_reg_offset(s, pg),
4069                            status, vsz, vsz, 0, fn);
4070         tcg_temp_free_ptr(status);
4071     }
4072     return true;
4073 }
4074
4075 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4076 {
4077     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4078 }
4079
4080 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4081 {
4082     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4083 }
4084
4085 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4086 {
4087     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4088 }
4089
4090 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4091 {
4092     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4093 }
4094
4095 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4096 {
4097     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4098 }
4099
4100 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4101 {
4102     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4103 }
4104
4105 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4106 {
4107     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4108 }
4109
4110 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4111 {
4112     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4113 }
4114
4115 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4116 {
4117     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4118 }
4119
4120 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4121 {
4122     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4123 }
4124
4125 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4126 {
4127     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4128 }
4129
4130 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4131 {
4132     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4133 }
4134
4135 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4136 {
4137     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4138 }
4139
4140 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4141 {
4142     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4143 }
4144
4145 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4146 {
4147     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4148 }
4149
4150 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4151 {
4152     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4153 }
4154
4155 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4156 {
4157     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4158 }
4159
4160 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4161 {
4162     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4163 }
4164
4165 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4166 {
4167     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4168 }
4169
4170 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4171 {
4172     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4173 }
4174
4175 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4176     gen_helper_sve_frint_h,
4177     gen_helper_sve_frint_s,
4178     gen_helper_sve_frint_d
4179 };
4180
4181 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4182 {
4183     if (a->esz == 0) {
4184         return false;
4185     }
4186     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4187                       frint_fns[a->esz - 1]);
4188 }
4189
4190 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4191 {
4192     static gen_helper_gvec_3_ptr * const fns[3] = {
4193         gen_helper_sve_frintx_h,
4194         gen_helper_sve_frintx_s,
4195         gen_helper_sve_frintx_d
4196     };
4197     if (a->esz == 0) {
4198         return false;
4199     }
4200     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4201 }
4202
4203 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4204 {
4205     if (a->esz == 0) {
4206         return false;
4207     }
4208     if (sve_access_check(s)) {
4209         unsigned vsz = vec_full_reg_size(s);
4210         TCGv_i32 tmode = tcg_const_i32(mode);
4211         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4212
4213         gen_helper_set_rmode(tmode, tmode, status);
4214
4215         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4216                            vec_full_reg_offset(s, a->rn),
4217                            pred_full_reg_offset(s, a->pg),
4218                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4219
4220         gen_helper_set_rmode(tmode, tmode, status);
4221         tcg_temp_free_i32(tmode);
4222         tcg_temp_free_ptr(status);
4223     }
4224     return true;
4225 }
4226
4227 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4228 {
4229     return do_frint_mode(s, a, float_round_nearest_even);
4230 }
4231
4232 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4233 {
4234     return do_frint_mode(s, a, float_round_up);
4235 }
4236
4237 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4238 {
4239     return do_frint_mode(s, a, float_round_down);
4240 }
4241
4242 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4243 {
4244     return do_frint_mode(s, a, float_round_to_zero);
4245 }
4246
4247 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4248 {
4249     return do_frint_mode(s, a, float_round_ties_away);
4250 }
4251
4252 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4253 {
4254     static gen_helper_gvec_3_ptr * const fns[3] = {
4255         gen_helper_sve_frecpx_h,
4256         gen_helper_sve_frecpx_s,
4257         gen_helper_sve_frecpx_d
4258     };
4259     if (a->esz == 0) {
4260         return false;
4261     }
4262     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4263 }
4264
4265 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4266 {
4267     static gen_helper_gvec_3_ptr * const fns[3] = {
4268         gen_helper_sve_fsqrt_h,
4269         gen_helper_sve_fsqrt_s,
4270         gen_helper_sve_fsqrt_d
4271     };
4272     if (a->esz == 0) {
4273         return false;
4274     }
4275     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4276 }
4277
4278 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4279 {
4280     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4281 }
4282
4283 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4284 {
4285     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4286 }
4287
4288 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4289 {
4290     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4291 }
4292
4293 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4294 {
4295     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4296 }
4297
4298 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4299 {
4300     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4301 }
4302
4303 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4304 {
4305     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4306 }
4307
4308 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4309 {
4310     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4311 }
4312
4313 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4314 {
4315     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4316 }
4317
4318 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4319 {
4320     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4321 }
4322
4323 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4324 {
4325     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4326 }
4327
4328 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4329 {
4330     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4331 }
4332
4333 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4334 {
4335     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4336 }
4337
4338 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4339 {
4340     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4341 }
4342
4343 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4344 {
4345     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4346 }
4347
4348 /*
4349  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4350  */
4351
4352 /* Subroutine loading a vector register at VOFS of LEN bytes.
4353  * The load should begin at the address Rn + IMM.
4354  */
4355
4356 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4357 {
4358     int len_align = QEMU_ALIGN_DOWN(len, 8);
4359     int len_remain = len % 8;
4360     int nparts = len / 8 + ctpop8(len_remain);
4361     int midx = get_mem_index(s);
4362     TCGv_i64 addr, t0, t1;
4363
4364     addr = tcg_temp_new_i64();
4365     t0 = tcg_temp_new_i64();
4366
4367     /* Note that unpredicated load/store of vector/predicate registers
4368      * are defined as a stream of bytes, which equates to little-endian
4369      * operations on larger quantities.  There is no nice way to force
4370      * a little-endian load for aarch64_be-linux-user out of line.
4371      *
4372      * Attempt to keep code expansion to a minimum by limiting the
4373      * amount of unrolling done.
4374      */
4375     if (nparts <= 4) {
4376         int i;
4377
4378         for (i = 0; i < len_align; i += 8) {
4379             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4380             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4381             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4382         }
4383     } else {
4384         TCGLabel *loop = gen_new_label();
4385         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4386
4387         gen_set_label(loop);
4388
4389         /* Minimize the number of local temps that must be re-read from
4390          * the stack each iteration.  Instead, re-compute values other
4391          * than the loop counter.
4392          */
4393         tp = tcg_temp_new_ptr();
4394         tcg_gen_addi_ptr(tp, i, imm);
4395         tcg_gen_extu_ptr_i64(addr, tp);
4396         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4397
4398         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4399
4400         tcg_gen_add_ptr(tp, cpu_env, i);
4401         tcg_gen_addi_ptr(i, i, 8);
4402         tcg_gen_st_i64(t0, tp, vofs);
4403         tcg_temp_free_ptr(tp);
4404
4405         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4406         tcg_temp_free_ptr(i);
4407     }
4408
4409     /* Predicate register loads can be any multiple of 2.
4410      * Note that we still store the entire 64-bit unit into cpu_env.
4411      */
4412     if (len_remain) {
4413         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4414
4415         switch (len_remain) {
4416         case 2:
4417         case 4:
4418         case 8:
4419             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4420             break;
4421
4422         case 6:
4423             t1 = tcg_temp_new_i64();
4424             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4425             tcg_gen_addi_i64(addr, addr, 4);
4426             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4427             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4428             tcg_temp_free_i64(t1);
4429             break;
4430
4431         default:
4432             g_assert_not_reached();
4433         }
4434         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4435     }
4436     tcg_temp_free_i64(addr);
4437     tcg_temp_free_i64(t0);
4438 }
4439
4440 /* Similarly for stores.  */
4441 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4442 {
4443     int len_align = QEMU_ALIGN_DOWN(len, 8);
4444     int len_remain = len % 8;
4445     int nparts = len / 8 + ctpop8(len_remain);
4446     int midx = get_mem_index(s);
4447     TCGv_i64 addr, t0;
4448
4449     addr = tcg_temp_new_i64();
4450     t0 = tcg_temp_new_i64();
4451
4452     /* Note that unpredicated load/store of vector/predicate registers
4453      * are defined as a stream of bytes, which equates to little-endian
4454      * operations on larger quantities.  There is no nice way to force
4455      * a little-endian store for aarch64_be-linux-user out of line.
4456      *
4457      * Attempt to keep code expansion to a minimum by limiting the
4458      * amount of unrolling done.
4459      */
4460     if (nparts <= 4) {
4461         int i;
4462
4463         for (i = 0; i < len_align; i += 8) {
4464             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4465             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4466             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4467         }
4468     } else {
4469         TCGLabel *loop = gen_new_label();
4470         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4471
4472         gen_set_label(loop);
4473
4474         t2 = tcg_temp_new_ptr();
4475         tcg_gen_add_ptr(t2, cpu_env, i);
4476         tcg_gen_ld_i64(t0, t2, vofs);
4477
4478         /* Minimize the number of local temps that must be re-read from
4479          * the stack each iteration.  Instead, re-compute values other
4480          * than the loop counter.
4481          */
4482         tcg_gen_addi_ptr(t2, i, imm);
4483         tcg_gen_extu_ptr_i64(addr, t2);
4484         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4485         tcg_temp_free_ptr(t2);
4486
4487         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4488
4489         tcg_gen_addi_ptr(i, i, 8);
4490
4491         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4492         tcg_temp_free_ptr(i);
4493     }
4494
4495     /* Predicate register stores can be any multiple of 2.  */
4496     if (len_remain) {
4497         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4498         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4499
4500         switch (len_remain) {
4501         case 2:
4502         case 4:
4503         case 8:
4504             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4505             break;
4506
4507         case 6:
4508             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4509             tcg_gen_addi_i64(addr, addr, 4);
4510             tcg_gen_shri_i64(t0, t0, 32);
4511             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4512             break;
4513
4514         default:
4515             g_assert_not_reached();
4516         }
4517     }
4518     tcg_temp_free_i64(addr);
4519     tcg_temp_free_i64(t0);
4520 }
4521
4522 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4523 {
4524     if (sve_access_check(s)) {
4525         int size = vec_full_reg_size(s);
4526         int off = vec_full_reg_offset(s, a->rd);
4527         do_ldr(s, off, size, a->rn, a->imm * size);
4528     }
4529     return true;
4530 }
4531
4532 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4533 {
4534     if (sve_access_check(s)) {
4535         int size = pred_full_reg_size(s);
4536         int off = pred_full_reg_offset(s, a->rd);
4537         do_ldr(s, off, size, a->rn, a->imm * size);
4538     }
4539     return true;
4540 }
4541
4542 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4543 {
4544     if (sve_access_check(s)) {
4545         int size = vec_full_reg_size(s);
4546         int off = vec_full_reg_offset(s, a->rd);
4547         do_str(s, off, size, a->rn, a->imm * size);
4548     }
4549     return true;
4550 }
4551
4552 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4553 {
4554     if (sve_access_check(s)) {
4555         int size = pred_full_reg_size(s);
4556         int off = pred_full_reg_offset(s, a->rd);
4557         do_str(s, off, size, a->rn, a->imm * size);
4558     }
4559     return true;
4560 }
4561
4562 /*
4563  *** SVE Memory - Contiguous Load Group
4564  */
4565
4566 /* The memory mode of the dtype.  */
4567 static const TCGMemOp dtype_mop[16] = {
4568     MO_UB, MO_UB, MO_UB, MO_UB,
4569     MO_SL, MO_UW, MO_UW, MO_UW,
4570     MO_SW, MO_SW, MO_UL, MO_UL,
4571     MO_SB, MO_SB, MO_SB, MO_Q
4572 };
4573
4574 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4575
4576 /* The vector element size of dtype.  */
4577 static const uint8_t dtype_esz[16] = {
4578     0, 1, 2, 3,
4579     3, 1, 2, 3,
4580     3, 2, 2, 3,
4581     3, 2, 1, 3
4582 };
4583
4584 static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4585 {
4586     return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4587 }
4588
4589 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4590                        int dtype, gen_helper_gvec_mem *fn)
4591 {
4592     unsigned vsz = vec_full_reg_size(s);
4593     TCGv_ptr t_pg;
4594     TCGv_i32 t_desc;
4595     int desc;
4596
4597     /* For e.g. LD4, there are not enough arguments to pass all 4
4598      * registers as pointers, so encode the regno into the data field.
4599      * For consistency, do this even for LD1.
4600      */
4601     desc = sve_memopidx(s, dtype);
4602     desc |= zt << MEMOPIDX_SHIFT;
4603     desc = simd_desc(vsz, vsz, desc);
4604     t_desc = tcg_const_i32(desc);
4605     t_pg = tcg_temp_new_ptr();
4606
4607     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4608     fn(cpu_env, t_pg, addr, t_desc);
4609
4610     tcg_temp_free_ptr(t_pg);
4611     tcg_temp_free_i32(t_desc);
4612 }
4613
4614 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4615                       TCGv_i64 addr, int dtype, int nreg)
4616 {
4617     static gen_helper_gvec_mem * const fns[2][16][4] = {
4618         /* Little-endian */
4619         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4620             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4621           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4622           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4623           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4624
4625           { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4626           { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4627             gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4628           { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4629           { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4630
4631           { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4632           { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4633           { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4634             gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4635           { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4636
4637           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4638           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4639           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4640           { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4641             gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4642
4643         /* Big-endian */
4644         { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4645             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4646           { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4647           { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4648           { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4649
4650           { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4651           { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4652             gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4653           { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4654           { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4655
4656           { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4657           { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4658           { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4659             gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4660           { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4661
4662           { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4663           { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4664           { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4665           { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4666             gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4667     };
4668     gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4669
4670     /* While there are holes in the table, they are not
4671      * accessible via the instruction encoding.
4672      */
4673     assert(fn != NULL);
4674     do_mem_zpa(s, zt, pg, addr, dtype, fn);
4675 }
4676
4677 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4678 {
4679     if (a->rm == 31) {
4680         return false;
4681     }
4682     if (sve_access_check(s)) {
4683         TCGv_i64 addr = new_tmp_a64(s);
4684         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4685         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4686         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4687     }
4688     return true;
4689 }
4690
4691 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4692 {
4693     if (sve_access_check(s)) {
4694         int vsz = vec_full_reg_size(s);
4695         int elements = vsz >> dtype_esz[a->dtype];
4696         TCGv_i64 addr = new_tmp_a64(s);
4697
4698         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4699                          (a->imm * elements * (a->nreg + 1))
4700                          << dtype_msz(a->dtype));
4701         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4702     }
4703     return true;
4704 }
4705
4706 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4707 {
4708     static gen_helper_gvec_mem * const fns[2][16] = {
4709         /* Little-endian */
4710         { gen_helper_sve_ldff1bb_r,
4711           gen_helper_sve_ldff1bhu_r,
4712           gen_helper_sve_ldff1bsu_r,
4713           gen_helper_sve_ldff1bdu_r,
4714
4715           gen_helper_sve_ldff1sds_le_r,
4716           gen_helper_sve_ldff1hh_le_r,
4717           gen_helper_sve_ldff1hsu_le_r,
4718           gen_helper_sve_ldff1hdu_le_r,
4719
4720           gen_helper_sve_ldff1hds_le_r,
4721           gen_helper_sve_ldff1hss_le_r,
4722           gen_helper_sve_ldff1ss_le_r,
4723           gen_helper_sve_ldff1sdu_le_r,
4724
4725           gen_helper_sve_ldff1bds_r,
4726           gen_helper_sve_ldff1bss_r,
4727           gen_helper_sve_ldff1bhs_r,
4728           gen_helper_sve_ldff1dd_le_r },
4729
4730         /* Big-endian */
4731         { gen_helper_sve_ldff1bb_r,
4732           gen_helper_sve_ldff1bhu_r,
4733           gen_helper_sve_ldff1bsu_r,
4734           gen_helper_sve_ldff1bdu_r,
4735
4736           gen_helper_sve_ldff1sds_be_r,
4737           gen_helper_sve_ldff1hh_be_r,
4738           gen_helper_sve_ldff1hsu_be_r,
4739           gen_helper_sve_ldff1hdu_be_r,
4740
4741           gen_helper_sve_ldff1hds_be_r,
4742           gen_helper_sve_ldff1hss_be_r,
4743           gen_helper_sve_ldff1ss_be_r,
4744           gen_helper_sve_ldff1sdu_be_r,
4745
4746           gen_helper_sve_ldff1bds_r,
4747           gen_helper_sve_ldff1bss_r,
4748           gen_helper_sve_ldff1bhs_r,
4749           gen_helper_sve_ldff1dd_be_r },
4750     };
4751
4752     if (sve_access_check(s)) {
4753         TCGv_i64 addr = new_tmp_a64(s);
4754         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4755         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4756         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4757                    fns[s->be_data == MO_BE][a->dtype]);
4758     }
4759     return true;
4760 }
4761
4762 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4763 {
4764     static gen_helper_gvec_mem * const fns[2][16] = {
4765         /* Little-endian */
4766         { gen_helper_sve_ldnf1bb_r,
4767           gen_helper_sve_ldnf1bhu_r,
4768           gen_helper_sve_ldnf1bsu_r,
4769           gen_helper_sve_ldnf1bdu_r,
4770
4771           gen_helper_sve_ldnf1sds_le_r,
4772           gen_helper_sve_ldnf1hh_le_r,
4773           gen_helper_sve_ldnf1hsu_le_r,
4774           gen_helper_sve_ldnf1hdu_le_r,
4775
4776           gen_helper_sve_ldnf1hds_le_r,
4777           gen_helper_sve_ldnf1hss_le_r,
4778           gen_helper_sve_ldnf1ss_le_r,
4779           gen_helper_sve_ldnf1sdu_le_r,
4780
4781           gen_helper_sve_ldnf1bds_r,
4782           gen_helper_sve_ldnf1bss_r,
4783           gen_helper_sve_ldnf1bhs_r,
4784           gen_helper_sve_ldnf1dd_le_r },
4785
4786         /* Big-endian */
4787         { gen_helper_sve_ldnf1bb_r,
4788           gen_helper_sve_ldnf1bhu_r,
4789           gen_helper_sve_ldnf1bsu_r,
4790           gen_helper_sve_ldnf1bdu_r,
4791
4792           gen_helper_sve_ldnf1sds_be_r,
4793           gen_helper_sve_ldnf1hh_be_r,
4794           gen_helper_sve_ldnf1hsu_be_r,
4795           gen_helper_sve_ldnf1hdu_be_r,
4796
4797           gen_helper_sve_ldnf1hds_be_r,
4798           gen_helper_sve_ldnf1hss_be_r,
4799           gen_helper_sve_ldnf1ss_be_r,
4800           gen_helper_sve_ldnf1sdu_be_r,
4801
4802           gen_helper_sve_ldnf1bds_r,
4803           gen_helper_sve_ldnf1bss_r,
4804           gen_helper_sve_ldnf1bhs_r,
4805           gen_helper_sve_ldnf1dd_be_r },
4806     };
4807
4808     if (sve_access_check(s)) {
4809         int vsz = vec_full_reg_size(s);
4810         int elements = vsz >> dtype_esz[a->dtype];
4811         int off = (a->imm * elements) << dtype_msz(a->dtype);
4812         TCGv_i64 addr = new_tmp_a64(s);
4813
4814         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4815         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4816                    fns[s->be_data == MO_BE][a->dtype]);
4817     }
4818     return true;
4819 }
4820
4821 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4822 {
4823     static gen_helper_gvec_mem * const fns[2][4] = {
4824         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4825           gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4826         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4827           gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4828     };
4829     unsigned vsz = vec_full_reg_size(s);
4830     TCGv_ptr t_pg;
4831     TCGv_i32 t_desc;
4832     int desc, poff;
4833
4834     /* Load the first quadword using the normal predicated load helpers.  */
4835     desc = sve_memopidx(s, msz_dtype(msz));
4836     desc |= zt << MEMOPIDX_SHIFT;
4837     desc = simd_desc(16, 16, desc);
4838     t_desc = tcg_const_i32(desc);
4839
4840     poff = pred_full_reg_offset(s, pg);
4841     if (vsz > 16) {
4842         /*
4843          * Zero-extend the first 16 bits of the predicate into a temporary.
4844          * This avoids triggering an assert making sure we don't have bits
4845          * set within a predicate beyond VQ, but we have lowered VQ to 1
4846          * for this load operation.
4847          */
4848         TCGv_i64 tmp = tcg_temp_new_i64();
4849 #ifdef HOST_WORDS_BIGENDIAN
4850         poff += 6;
4851 #endif
4852         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4853
4854         poff = offsetof(CPUARMState, vfp.preg_tmp);
4855         tcg_gen_st_i64(tmp, cpu_env, poff);
4856         tcg_temp_free_i64(tmp);
4857     }
4858
4859     t_pg = tcg_temp_new_ptr();
4860     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4861
4862     fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4863
4864     tcg_temp_free_ptr(t_pg);
4865     tcg_temp_free_i32(t_desc);
4866
4867     /* Replicate that first quadword.  */
4868     if (vsz > 16) {
4869         unsigned dofs = vec_full_reg_offset(s, zt);
4870         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4871     }
4872 }
4873
4874 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4875 {
4876     if (a->rm == 31) {
4877         return false;
4878     }
4879     if (sve_access_check(s)) {
4880         int msz = dtype_msz(a->dtype);
4881         TCGv_i64 addr = new_tmp_a64(s);
4882         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4883         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4884         do_ldrq(s, a->rd, a->pg, addr, msz);
4885     }
4886     return true;
4887 }
4888
4889 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4890 {
4891     if (sve_access_check(s)) {
4892         TCGv_i64 addr = new_tmp_a64(s);
4893         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4894         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4895     }
4896     return true;
4897 }
4898
4899 /* Load and broadcast element.  */
4900 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4901 {
4902     if (!sve_access_check(s)) {
4903         return true;
4904     }
4905
4906     unsigned vsz = vec_full_reg_size(s);
4907     unsigned psz = pred_full_reg_size(s);
4908     unsigned esz = dtype_esz[a->dtype];
4909     unsigned msz = dtype_msz(a->dtype);
4910     TCGLabel *over = gen_new_label();
4911     TCGv_i64 temp;
4912
4913     /* If the guarding predicate has no bits set, no load occurs.  */
4914     if (psz <= 8) {
4915         /* Reduce the pred_esz_masks value simply to reduce the
4916          * size of the code generated here.
4917          */
4918         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4919         temp = tcg_temp_new_i64();
4920         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4921         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4922         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4923         tcg_temp_free_i64(temp);
4924     } else {
4925         TCGv_i32 t32 = tcg_temp_new_i32();
4926         find_last_active(s, t32, esz, a->pg);
4927         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4928         tcg_temp_free_i32(t32);
4929     }
4930
4931     /* Load the data.  */
4932     temp = tcg_temp_new_i64();
4933     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4934     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4935                         s->be_data | dtype_mop[a->dtype]);
4936
4937     /* Broadcast to *all* elements.  */
4938     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4939                          vsz, vsz, temp);
4940     tcg_temp_free_i64(temp);
4941
4942     /* Zero the inactive elements.  */
4943     gen_set_label(over);
4944     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4945     return true;
4946 }
4947
4948 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4949                       int msz, int esz, int nreg)
4950 {
4951     static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4952         { { gen_helper_sve_st1bb_r,
4953             gen_helper_sve_st1bh_r,
4954             gen_helper_sve_st1bs_r,
4955             gen_helper_sve_st1bd_r },
4956           { NULL,
4957             gen_helper_sve_st1hh_le_r,
4958             gen_helper_sve_st1hs_le_r,
4959             gen_helper_sve_st1hd_le_r },
4960           { NULL, NULL,
4961             gen_helper_sve_st1ss_le_r,
4962             gen_helper_sve_st1sd_le_r },
4963           { NULL, NULL, NULL,
4964             gen_helper_sve_st1dd_le_r } },
4965         { { gen_helper_sve_st1bb_r,
4966             gen_helper_sve_st1bh_r,
4967             gen_helper_sve_st1bs_r,
4968             gen_helper_sve_st1bd_r },
4969           { NULL,
4970             gen_helper_sve_st1hh_be_r,
4971             gen_helper_sve_st1hs_be_r,
4972             gen_helper_sve_st1hd_be_r },
4973           { NULL, NULL,
4974             gen_helper_sve_st1ss_be_r,
4975             gen_helper_sve_st1sd_be_r },
4976           { NULL, NULL, NULL,
4977             gen_helper_sve_st1dd_be_r } },
4978     };
4979     static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4980         { { gen_helper_sve_st2bb_r,
4981             gen_helper_sve_st2hh_le_r,
4982             gen_helper_sve_st2ss_le_r,
4983             gen_helper_sve_st2dd_le_r },
4984           { gen_helper_sve_st3bb_r,
4985             gen_helper_sve_st3hh_le_r,
4986             gen_helper_sve_st3ss_le_r,
4987             gen_helper_sve_st3dd_le_r },
4988           { gen_helper_sve_st4bb_r,
4989             gen_helper_sve_st4hh_le_r,
4990             gen_helper_sve_st4ss_le_r,
4991             gen_helper_sve_st4dd_le_r } },
4992         { { gen_helper_sve_st2bb_r,
4993             gen_helper_sve_st2hh_be_r,
4994             gen_helper_sve_st2ss_be_r,
4995             gen_helper_sve_st2dd_be_r },
4996           { gen_helper_sve_st3bb_r,
4997             gen_helper_sve_st3hh_be_r,
4998             gen_helper_sve_st3ss_be_r,
4999             gen_helper_sve_st3dd_be_r },
5000           { gen_helper_sve_st4bb_r,
5001             gen_helper_sve_st4hh_be_r,
5002             gen_helper_sve_st4ss_be_r,
5003             gen_helper_sve_st4dd_be_r } },
5004     };
5005     gen_helper_gvec_mem *fn;
5006     int be = s->be_data == MO_BE;
5007
5008     if (nreg == 0) {
5009         /* ST1 */
5010         fn = fn_single[be][msz][esz];
5011     } else {
5012         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5013         assert(msz == esz);
5014         fn = fn_multiple[be][nreg - 1][msz];
5015     }
5016     assert(fn != NULL);
5017     do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
5018 }
5019
5020 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5021 {
5022     if (a->rm == 31 || a->msz > a->esz) {
5023         return false;
5024     }
5025     if (sve_access_check(s)) {
5026         TCGv_i64 addr = new_tmp_a64(s);
5027         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5028         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5029         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5030     }
5031     return true;
5032 }
5033
5034 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5035 {
5036     if (a->msz > a->esz) {
5037         return false;
5038     }
5039     if (sve_access_check(s)) {
5040         int vsz = vec_full_reg_size(s);
5041         int elements = vsz >> a->esz;
5042         TCGv_i64 addr = new_tmp_a64(s);
5043
5044         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5045                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5046         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5047     }
5048     return true;
5049 }
5050
5051 /*
5052  *** SVE gather loads / scatter stores
5053  */
5054
5055 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5056                        int scale, TCGv_i64 scalar, int msz,
5057                        gen_helper_gvec_mem_scatter *fn)
5058 {
5059     unsigned vsz = vec_full_reg_size(s);
5060     TCGv_ptr t_zm = tcg_temp_new_ptr();
5061     TCGv_ptr t_pg = tcg_temp_new_ptr();
5062     TCGv_ptr t_zt = tcg_temp_new_ptr();
5063     TCGv_i32 t_desc;
5064     int desc;
5065
5066     desc = sve_memopidx(s, msz_dtype(msz));
5067     desc |= scale << MEMOPIDX_SHIFT;
5068     desc = simd_desc(vsz, vsz, desc);
5069     t_desc = tcg_const_i32(desc);
5070
5071     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5072     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5073     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5074     fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5075
5076     tcg_temp_free_ptr(t_zt);
5077     tcg_temp_free_ptr(t_zm);
5078     tcg_temp_free_ptr(t_pg);
5079     tcg_temp_free_i32(t_desc);
5080 }
5081
5082 /* Indexed by [be][ff][xs][u][msz].  */
5083 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5084     /* Little-endian */
5085     { { { { gen_helper_sve_ldbss_zsu,
5086             gen_helper_sve_ldhss_le_zsu,
5087             NULL, },
5088           { gen_helper_sve_ldbsu_zsu,
5089             gen_helper_sve_ldhsu_le_zsu,
5090             gen_helper_sve_ldss_le_zsu, } },
5091         { { gen_helper_sve_ldbss_zss,
5092             gen_helper_sve_ldhss_le_zss,
5093             NULL, },
5094           { gen_helper_sve_ldbsu_zss,
5095             gen_helper_sve_ldhsu_le_zss,
5096             gen_helper_sve_ldss_le_zss, } } },
5097
5098       /* First-fault */
5099       { { { gen_helper_sve_ldffbss_zsu,
5100             gen_helper_sve_ldffhss_le_zsu,
5101             NULL, },
5102           { gen_helper_sve_ldffbsu_zsu,
5103             gen_helper_sve_ldffhsu_le_zsu,
5104             gen_helper_sve_ldffss_le_zsu, } },
5105         { { gen_helper_sve_ldffbss_zss,
5106             gen_helper_sve_ldffhss_le_zss,
5107             NULL, },
5108           { gen_helper_sve_ldffbsu_zss,
5109             gen_helper_sve_ldffhsu_le_zss,
5110             gen_helper_sve_ldffss_le_zss, } } } },
5111
5112     /* Big-endian */
5113     { { { { gen_helper_sve_ldbss_zsu,
5114             gen_helper_sve_ldhss_be_zsu,
5115             NULL, },
5116           { gen_helper_sve_ldbsu_zsu,
5117             gen_helper_sve_ldhsu_be_zsu,
5118             gen_helper_sve_ldss_be_zsu, } },
5119         { { gen_helper_sve_ldbss_zss,
5120             gen_helper_sve_ldhss_be_zss,
5121             NULL, },
5122           { gen_helper_sve_ldbsu_zss,
5123             gen_helper_sve_ldhsu_be_zss,
5124             gen_helper_sve_ldss_be_zss, } } },
5125
5126       /* First-fault */
5127       { { { gen_helper_sve_ldffbss_zsu,
5128             gen_helper_sve_ldffhss_be_zsu,
5129             NULL, },
5130           { gen_helper_sve_ldffbsu_zsu,
5131             gen_helper_sve_ldffhsu_be_zsu,
5132             gen_helper_sve_ldffss_be_zsu, } },
5133         { { gen_helper_sve_ldffbss_zss,
5134             gen_helper_sve_ldffhss_be_zss,
5135             NULL, },
5136           { gen_helper_sve_ldffbsu_zss,
5137             gen_helper_sve_ldffhsu_be_zss,
5138             gen_helper_sve_ldffss_be_zss, } } } },
5139 };
5140
5141 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5142 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5143     /* Little-endian */
5144     { { { { gen_helper_sve_ldbds_zsu,
5145             gen_helper_sve_ldhds_le_zsu,
5146             gen_helper_sve_ldsds_le_zsu,
5147             NULL, },
5148           { gen_helper_sve_ldbdu_zsu,
5149             gen_helper_sve_ldhdu_le_zsu,
5150             gen_helper_sve_ldsdu_le_zsu,
5151             gen_helper_sve_lddd_le_zsu, } },
5152         { { gen_helper_sve_ldbds_zss,
5153             gen_helper_sve_ldhds_le_zss,
5154             gen_helper_sve_ldsds_le_zss,
5155             NULL, },
5156           { gen_helper_sve_ldbdu_zss,
5157             gen_helper_sve_ldhdu_le_zss,
5158             gen_helper_sve_ldsdu_le_zss,
5159             gen_helper_sve_lddd_le_zss, } },
5160         { { gen_helper_sve_ldbds_zd,
5161             gen_helper_sve_ldhds_le_zd,
5162             gen_helper_sve_ldsds_le_zd,
5163             NULL, },
5164           { gen_helper_sve_ldbdu_zd,
5165             gen_helper_sve_ldhdu_le_zd,
5166             gen_helper_sve_ldsdu_le_zd,
5167             gen_helper_sve_lddd_le_zd, } } },
5168
5169       /* First-fault */
5170       { { { gen_helper_sve_ldffbds_zsu,
5171             gen_helper_sve_ldffhds_le_zsu,
5172             gen_helper_sve_ldffsds_le_zsu,
5173             NULL, },
5174           { gen_helper_sve_ldffbdu_zsu,
5175             gen_helper_sve_ldffhdu_le_zsu,
5176             gen_helper_sve_ldffsdu_le_zsu,
5177             gen_helper_sve_ldffdd_le_zsu, } },
5178         { { gen_helper_sve_ldffbds_zss,
5179             gen_helper_sve_ldffhds_le_zss,
5180             gen_helper_sve_ldffsds_le_zss,
5181             NULL, },
5182           { gen_helper_sve_ldffbdu_zss,
5183             gen_helper_sve_ldffhdu_le_zss,
5184             gen_helper_sve_ldffsdu_le_zss,
5185             gen_helper_sve_ldffdd_le_zss, } },
5186         { { gen_helper_sve_ldffbds_zd,
5187             gen_helper_sve_ldffhds_le_zd,
5188             gen_helper_sve_ldffsds_le_zd,
5189             NULL, },
5190           { gen_helper_sve_ldffbdu_zd,
5191             gen_helper_sve_ldffhdu_le_zd,
5192             gen_helper_sve_ldffsdu_le_zd,
5193             gen_helper_sve_ldffdd_le_zd, } } } },
5194
5195     /* Big-endian */
5196     { { { { gen_helper_sve_ldbds_zsu,
5197             gen_helper_sve_ldhds_be_zsu,
5198             gen_helper_sve_ldsds_be_zsu,
5199             NULL, },
5200           { gen_helper_sve_ldbdu_zsu,
5201             gen_helper_sve_ldhdu_be_zsu,
5202             gen_helper_sve_ldsdu_be_zsu,
5203             gen_helper_sve_lddd_be_zsu, } },
5204         { { gen_helper_sve_ldbds_zss,
5205             gen_helper_sve_ldhds_be_zss,
5206             gen_helper_sve_ldsds_be_zss,
5207             NULL, },
5208           { gen_helper_sve_ldbdu_zss,
5209             gen_helper_sve_ldhdu_be_zss,
5210             gen_helper_sve_ldsdu_be_zss,
5211             gen_helper_sve_lddd_be_zss, } },
5212         { { gen_helper_sve_ldbds_zd,
5213             gen_helper_sve_ldhds_be_zd,
5214             gen_helper_sve_ldsds_be_zd,
5215             NULL, },
5216           { gen_helper_sve_ldbdu_zd,
5217             gen_helper_sve_ldhdu_be_zd,
5218             gen_helper_sve_ldsdu_be_zd,
5219             gen_helper_sve_lddd_be_zd, } } },
5220
5221       /* First-fault */
5222       { { { gen_helper_sve_ldffbds_zsu,
5223             gen_helper_sve_ldffhds_be_zsu,
5224             gen_helper_sve_ldffsds_be_zsu,
5225             NULL, },
5226           { gen_helper_sve_ldffbdu_zsu,
5227             gen_helper_sve_ldffhdu_be_zsu,
5228             gen_helper_sve_ldffsdu_be_zsu,
5229             gen_helper_sve_ldffdd_be_zsu, } },
5230         { { gen_helper_sve_ldffbds_zss,
5231             gen_helper_sve_ldffhds_be_zss,
5232             gen_helper_sve_ldffsds_be_zss,
5233             NULL, },
5234           { gen_helper_sve_ldffbdu_zss,
5235             gen_helper_sve_ldffhdu_be_zss,
5236             gen_helper_sve_ldffsdu_be_zss,
5237             gen_helper_sve_ldffdd_be_zss, } },
5238         { { gen_helper_sve_ldffbds_zd,
5239             gen_helper_sve_ldffhds_be_zd,
5240             gen_helper_sve_ldffsds_be_zd,
5241             NULL, },
5242           { gen_helper_sve_ldffbdu_zd,
5243             gen_helper_sve_ldffhdu_be_zd,
5244             gen_helper_sve_ldffsdu_be_zd,
5245             gen_helper_sve_ldffdd_be_zd, } } } },
5246 };
5247
5248 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5249 {
5250     gen_helper_gvec_mem_scatter *fn = NULL;
5251     int be = s->be_data == MO_BE;
5252
5253     if (!sve_access_check(s)) {
5254         return true;
5255     }
5256
5257     switch (a->esz) {
5258     case MO_32:
5259         fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5260         break;
5261     case MO_64:
5262         fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5263         break;
5264     }
5265     assert(fn != NULL);
5266
5267     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5268                cpu_reg_sp(s, a->rn), a->msz, fn);
5269     return true;
5270 }
5271
5272 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5273 {
5274     gen_helper_gvec_mem_scatter *fn = NULL;
5275     int be = s->be_data == MO_BE;
5276     TCGv_i64 imm;
5277
5278     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5279         return false;
5280     }
5281     if (!sve_access_check(s)) {
5282         return true;
5283     }
5284
5285     switch (a->esz) {
5286     case MO_32:
5287         fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5288         break;
5289     case MO_64:
5290         fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5291         break;
5292     }
5293     assert(fn != NULL);
5294
5295     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5296      * by loading the immediate into the scalar parameter.
5297      */
5298     imm = tcg_const_i64(a->imm << a->msz);
5299     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5300     tcg_temp_free_i64(imm);
5301     return true;
5302 }
5303
5304 /* Indexed by [be][xs][msz].  */
5305 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5306     /* Little-endian */
5307     { { gen_helper_sve_stbs_zsu,
5308         gen_helper_sve_sths_le_zsu,
5309         gen_helper_sve_stss_le_zsu, },
5310       { gen_helper_sve_stbs_zss,
5311         gen_helper_sve_sths_le_zss,
5312         gen_helper_sve_stss_le_zss, } },
5313     /* Big-endian */
5314     { { gen_helper_sve_stbs_zsu,
5315         gen_helper_sve_sths_be_zsu,
5316         gen_helper_sve_stss_be_zsu, },
5317       { gen_helper_sve_stbs_zss,
5318         gen_helper_sve_sths_be_zss,
5319         gen_helper_sve_stss_be_zss, } },
5320 };
5321
5322 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5323 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5324     /* Little-endian */
5325     { { gen_helper_sve_stbd_zsu,
5326         gen_helper_sve_sthd_le_zsu,
5327         gen_helper_sve_stsd_le_zsu,
5328         gen_helper_sve_stdd_le_zsu, },
5329       { gen_helper_sve_stbd_zss,
5330         gen_helper_sve_sthd_le_zss,
5331         gen_helper_sve_stsd_le_zss,
5332         gen_helper_sve_stdd_le_zss, },
5333       { gen_helper_sve_stbd_zd,
5334         gen_helper_sve_sthd_le_zd,
5335         gen_helper_sve_stsd_le_zd,
5336         gen_helper_sve_stdd_le_zd, } },
5337     /* Big-endian */
5338     { { gen_helper_sve_stbd_zsu,
5339         gen_helper_sve_sthd_be_zsu,
5340         gen_helper_sve_stsd_be_zsu,
5341         gen_helper_sve_stdd_be_zsu, },
5342       { gen_helper_sve_stbd_zss,
5343         gen_helper_sve_sthd_be_zss,
5344         gen_helper_sve_stsd_be_zss,
5345         gen_helper_sve_stdd_be_zss, },
5346       { gen_helper_sve_stbd_zd,
5347         gen_helper_sve_sthd_be_zd,
5348         gen_helper_sve_stsd_be_zd,
5349         gen_helper_sve_stdd_be_zd, } },
5350 };
5351
5352 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5353 {
5354     gen_helper_gvec_mem_scatter *fn;
5355     int be = s->be_data == MO_BE;
5356
5357     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5358         return false;
5359     }
5360     if (!sve_access_check(s)) {
5361         return true;
5362     }
5363     switch (a->esz) {
5364     case MO_32:
5365         fn = scatter_store_fn32[be][a->xs][a->msz];
5366         break;
5367     case MO_64:
5368         fn = scatter_store_fn64[be][a->xs][a->msz];
5369         break;
5370     default:
5371         g_assert_not_reached();
5372     }
5373     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5374                cpu_reg_sp(s, a->rn), a->msz, fn);
5375     return true;
5376 }
5377
5378 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5379 {
5380     gen_helper_gvec_mem_scatter *fn = NULL;
5381     int be = s->be_data == MO_BE;
5382     TCGv_i64 imm;
5383
5384     if (a->esz < a->msz) {
5385         return false;
5386     }
5387     if (!sve_access_check(s)) {
5388         return true;
5389     }
5390
5391     switch (a->esz) {
5392     case MO_32:
5393         fn = scatter_store_fn32[be][0][a->msz];
5394         break;
5395     case MO_64:
5396         fn = scatter_store_fn64[be][2][a->msz];
5397         break;
5398     }
5399     assert(fn != NULL);
5400
5401     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5402      * by loading the immediate into the scalar parameter.
5403      */
5404     imm = tcg_const_i64(a->imm << a->msz);
5405     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5406     tcg_temp_free_i64(imm);
5407     return true;
5408 }
5409
5410 /*
5411  * Prefetches
5412  */
5413
5414 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5415 {
5416     /* Prefetch is a nop within QEMU.  */
5417     (void)sve_access_check(s);
5418     return true;
5419 }
5420
5421 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5422 {
5423     if (a->rm == 31) {
5424         return false;
5425     }
5426     /* Prefetch is a nop within QEMU.  */
5427     (void)sve_access_check(s);
5428     return true;
5429 }
5430
5431 /*
5432  * Move Prefix
5433  *
5434  * TODO: The implementation so far could handle predicated merging movprfx.
5435  * The helper functions as written take an extra source register to
5436  * use in the operation, but the result is only written when predication
5437  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5438  * to allow the final write back to the destination to be unconditional.
5439  * For predicated zeroing movprfx, we need to rearrange the helpers to
5440  * allow the final write back to zero inactives.
5441  *
5442  * In the meantime, just emit the moves.
5443  */
5444
5445 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5446 {
5447     return do_mov_z(s, a->rd, a->rn);
5448 }
5449
5450 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5451 {
5452     if (sve_access_check(s)) {
5453         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5454     }
5455     return true;
5456 }
5457
5458 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5459 {
5460     if (sve_access_check(s)) {
5461         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5462     }
5463     return true;
5464 }