target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 /* Select active elememnts from Zn and inactive elements from Zm,
 355  * storing the result in Zd.
 356  */
 357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358 {
 359     static gen_helper_gvec_4 * const fns[4] = {
 360         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362     };
 363     unsigned vsz = vec_full_reg_size(s);
 364     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                        vec_full_reg_offset(s, rn),
 366                        vec_full_reg_offset(s, rm),
 367                        pred_full_reg_offset(s, pg),
 368                        vsz, vsz, 0, fns[esz]);
 369 }
 370
 371 #define DO_ZPZZ(NAME, name) \
 372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 373                                 uint32_t insn)                            \
 374 {                                                                         \
 375     static gen_helper_gvec_4 * const fns[4] = {                           \
 376         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 377         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 378     };                                                                    \
 379     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 380 }
 381
 382 DO_ZPZZ(AND, and)
 383 DO_ZPZZ(EOR, eor)
 384 DO_ZPZZ(ORR, orr)
 385 DO_ZPZZ(BIC, bic)
 386
 387 DO_ZPZZ(ADD, add)
 388 DO_ZPZZ(SUB, sub)
 389
 390 DO_ZPZZ(SMAX, smax)
 391 DO_ZPZZ(UMAX, umax)
 392 DO_ZPZZ(SMIN, smin)
 393 DO_ZPZZ(UMIN, umin)
 394 DO_ZPZZ(SABD, sabd)
 395 DO_ZPZZ(UABD, uabd)
 396
 397 DO_ZPZZ(MUL, mul)
 398 DO_ZPZZ(SMULH, smulh)
 399 DO_ZPZZ(UMULH, umulh)
 400
 401 DO_ZPZZ(ASR, asr)
 402 DO_ZPZZ(LSR, lsr)
 403 DO_ZPZZ(LSL, lsl)
 404
 405 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 406 {
 407     static gen_helper_gvec_4 * const fns[4] = {
 408         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 409     };
 410     return do_zpzz_ool(s, a, fns[a->esz]);
 411 }
 412
 413 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 414 {
 415     static gen_helper_gvec_4 * const fns[4] = {
 416         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 417     };
 418     return do_zpzz_ool(s, a, fns[a->esz]);
 419 }
 420
 421 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 422 {
 423     if (sve_access_check(s)) {
 424         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 425     }
 426     return true;
 427 }
 428
 429 #undef DO_ZPZZ
 430
 431 /*
 432  *** SVE Integer Arithmetic - Unary Predicated Group
 433  */
 434
 435 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 436 {
 437     if (fn == NULL) {
 438         return false;
 439     }
 440     if (sve_access_check(s)) {
 441         unsigned vsz = vec_full_reg_size(s);
 442         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 443                            vec_full_reg_offset(s, a->rn),
 444                            pred_full_reg_offset(s, a->pg),
 445                            vsz, vsz, 0, fn);
 446     }
 447     return true;
 448 }
 449
 450 #define DO_ZPZ(NAME, name) \
 451 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 452 {                                                                   \
 453     static gen_helper_gvec_3 * const fns[4] = {                     \
 454         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 455         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 456     };                                                              \
 457     return do_zpz_ool(s, a, fns[a->esz]);                           \
 458 }
 459
 460 DO_ZPZ(CLS, cls)
 461 DO_ZPZ(CLZ, clz)
 462 DO_ZPZ(CNT_zpz, cnt_zpz)
 463 DO_ZPZ(CNOT, cnot)
 464 DO_ZPZ(NOT_zpz, not_zpz)
 465 DO_ZPZ(ABS, abs)
 466 DO_ZPZ(NEG, neg)
 467
 468 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 469 {
 470     static gen_helper_gvec_3 * const fns[4] = {
 471         NULL,
 472         gen_helper_sve_fabs_h,
 473         gen_helper_sve_fabs_s,
 474         gen_helper_sve_fabs_d
 475     };
 476     return do_zpz_ool(s, a, fns[a->esz]);
 477 }
 478
 479 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 480 {
 481     static gen_helper_gvec_3 * const fns[4] = {
 482         NULL,
 483         gen_helper_sve_fneg_h,
 484         gen_helper_sve_fneg_s,
 485         gen_helper_sve_fneg_d
 486     };
 487     return do_zpz_ool(s, a, fns[a->esz]);
 488 }
 489
 490 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 491 {
 492     static gen_helper_gvec_3 * const fns[4] = {
 493         NULL,
 494         gen_helper_sve_sxtb_h,
 495         gen_helper_sve_sxtb_s,
 496         gen_helper_sve_sxtb_d
 497     };
 498     return do_zpz_ool(s, a, fns[a->esz]);
 499 }
 500
 501 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 502 {
 503     static gen_helper_gvec_3 * const fns[4] = {
 504         NULL,
 505         gen_helper_sve_uxtb_h,
 506         gen_helper_sve_uxtb_s,
 507         gen_helper_sve_uxtb_d
 508     };
 509     return do_zpz_ool(s, a, fns[a->esz]);
 510 }
 511
 512 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 513 {
 514     static gen_helper_gvec_3 * const fns[4] = {
 515         NULL, NULL,
 516         gen_helper_sve_sxth_s,
 517         gen_helper_sve_sxth_d
 518     };
 519     return do_zpz_ool(s, a, fns[a->esz]);
 520 }
 521
 522 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 523 {
 524     static gen_helper_gvec_3 * const fns[4] = {
 525         NULL, NULL,
 526         gen_helper_sve_uxth_s,
 527         gen_helper_sve_uxth_d
 528     };
 529     return do_zpz_ool(s, a, fns[a->esz]);
 530 }
 531
 532 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 535 }
 536
 537 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 538 {
 539     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 540 }
 541
 542 #undef DO_ZPZ
 543
 544 /*
 545  *** SVE Integer Reduction Group
 546  */
 547
 548 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 549 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 550                        gen_helper_gvec_reduc *fn)
 551 {
 552     unsigned vsz = vec_full_reg_size(s);
 553     TCGv_ptr t_zn, t_pg;
 554     TCGv_i32 desc;
 555     TCGv_i64 temp;
 556
 557     if (fn == NULL) {
 558         return false;
 559     }
 560     if (!sve_access_check(s)) {
 561         return true;
 562     }
 563
 564     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 565     temp = tcg_temp_new_i64();
 566     t_zn = tcg_temp_new_ptr();
 567     t_pg = tcg_temp_new_ptr();
 568
 569     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 570     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 571     fn(temp, t_zn, t_pg, desc);
 572     tcg_temp_free_ptr(t_zn);
 573     tcg_temp_free_ptr(t_pg);
 574     tcg_temp_free_i32(desc);
 575
 576     write_fp_dreg(s, a->rd, temp);
 577     tcg_temp_free_i64(temp);
 578     return true;
 579 }
 580
 581 #define DO_VPZ(NAME, name) \
 582 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 583 {                                                                        \
 584     static gen_helper_gvec_reduc * const fns[4] = {                      \
 585         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 586         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 587     };                                                                   \
 588     return do_vpz_ool(s, a, fns[a->esz]);                                \
 589 }
 590
 591 DO_VPZ(ORV, orv)
 592 DO_VPZ(ANDV, andv)
 593 DO_VPZ(EORV, eorv)
 594
 595 DO_VPZ(UADDV, uaddv)
 596 DO_VPZ(SMAXV, smaxv)
 597 DO_VPZ(UMAXV, umaxv)
 598 DO_VPZ(SMINV, sminv)
 599 DO_VPZ(UMINV, uminv)
 600
 601 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 602 {
 603     static gen_helper_gvec_reduc * const fns[4] = {
 604         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 605         gen_helper_sve_saddv_s, NULL
 606     };
 607     return do_vpz_ool(s, a, fns[a->esz]);
 608 }
 609
 610 #undef DO_VPZ
 611
 612 /*
 613  *** SVE Shift by Immediate - Predicated Group
 614  */
 615
 616 /* Store zero into every active element of Zd.  We will use this for two
 617  * and three-operand predicated instructions for which logic dictates a
 618  * zero result.
 619  */
 620 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 621 {
 622     static gen_helper_gvec_2 * const fns[4] = {
 623         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 624         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 625     };
 626     if (sve_access_check(s)) {
 627         unsigned vsz = vec_full_reg_size(s);
 628         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 629                            pred_full_reg_offset(s, pg),
 630                            vsz, vsz, 0, fns[esz]);
 631     }
 632     return true;
 633 }
 634
 635 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 636 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 637 {
 638     static gen_helper_gvec_3 * const fns[4] = {
 639         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 640         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 641     };
 642     unsigned vsz = vec_full_reg_size(s);
 643     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 644                        vec_full_reg_offset(s, rn),
 645                        pred_full_reg_offset(s, pg),
 646                        vsz, vsz, 0, fns[esz]);
 647 }
 648
 649 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 650                         gen_helper_gvec_3 *fn)
 651 {
 652     if (sve_access_check(s)) {
 653         unsigned vsz = vec_full_reg_size(s);
 654         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 655                            vec_full_reg_offset(s, a->rn),
 656                            pred_full_reg_offset(s, a->pg),
 657                            vsz, vsz, a->imm, fn);
 658     }
 659     return true;
 660 }
 661
 662 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 663 {
 664     static gen_helper_gvec_3 * const fns[4] = {
 665         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 666         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 667     };
 668     if (a->esz < 0) {
 669         /* Invalid tsz encoding -- see tszimm_esz. */
 670         return false;
 671     }
 672     /* Shift by element size is architecturally valid.  For
 673        arithmetic right-shift, it's the same as by one less. */
 674     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 675     return do_zpzi_ool(s, a, fns[a->esz]);
 676 }
 677
 678 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 679 {
 680     static gen_helper_gvec_3 * const fns[4] = {
 681         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 682         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 683     };
 684     if (a->esz < 0) {
 685         return false;
 686     }
 687     /* Shift by element size is architecturally valid.
 688        For logical shifts, it is a zeroing operation.  */
 689     if (a->imm >= (8 << a->esz)) {
 690         return do_clr_zp(s, a->rd, a->pg, a->esz);
 691     } else {
 692         return do_zpzi_ool(s, a, fns[a->esz]);
 693     }
 694 }
 695
 696 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 697 {
 698     static gen_helper_gvec_3 * const fns[4] = {
 699         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 700         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 701     };
 702     if (a->esz < 0) {
 703         return false;
 704     }
 705     /* Shift by element size is architecturally valid.
 706        For logical shifts, it is a zeroing operation.  */
 707     if (a->imm >= (8 << a->esz)) {
 708         return do_clr_zp(s, a->rd, a->pg, a->esz);
 709     } else {
 710         return do_zpzi_ool(s, a, fns[a->esz]);
 711     }
 712 }
 713
 714 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 715 {
 716     static gen_helper_gvec_3 * const fns[4] = {
 717         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 718         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 719     };
 720     if (a->esz < 0) {
 721         return false;
 722     }
 723     /* Shift by element size is architecturally valid.  For arithmetic
 724        right shift for division, it is a zeroing operation.  */
 725     if (a->imm >= (8 << a->esz)) {
 726         return do_clr_zp(s, a->rd, a->pg, a->esz);
 727     } else {
 728         return do_zpzi_ool(s, a, fns[a->esz]);
 729     }
 730 }
 731
 732 /*
 733  *** SVE Bitwise Shift - Predicated Group
 734  */
 735
 736 #define DO_ZPZW(NAME, name) \
 737 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 738                                 uint32_t insn)                            \
 739 {                                                                         \
 740     static gen_helper_gvec_4 * const fns[3] = {                           \
 741         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 742         gen_helper_sve_##name##_zpzw_s,                                   \
 743     };                                                                    \
 744     if (a->esz < 0 || a->esz >= 3) {                                      \
 745         return false;                                                     \
 746     }                                                                     \
 747     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 748 }
 749
 750 DO_ZPZW(ASR, asr)
 751 DO_ZPZW(LSR, lsr)
 752 DO_ZPZW(LSL, lsl)
 753
 754 #undef DO_ZPZW
 755
 756 /*
 757  *** SVE Bitwise Shift - Unpredicated Group
 758  */
 759
 760 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 761                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 762                                          int64_t, uint32_t, uint32_t))
 763 {
 764     if (a->esz < 0) {
 765         /* Invalid tsz encoding -- see tszimm_esz. */
 766         return false;
 767     }
 768     if (sve_access_check(s)) {
 769         unsigned vsz = vec_full_reg_size(s);
 770         /* Shift by element size is architecturally valid.  For
 771            arithmetic right-shift, it's the same as by one less.
 772            Otherwise it is a zeroing operation.  */
 773         if (a->imm >= 8 << a->esz) {
 774             if (asr) {
 775                 a->imm = (8 << a->esz) - 1;
 776             } else {
 777                 do_dupi_z(s, a->rd, 0);
 778                 return true;
 779             }
 780         }
 781         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 782                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 783     }
 784     return true;
 785 }
 786
 787 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 788 {
 789     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 790 }
 791
 792 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 793 {
 794     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 795 }
 796
 797 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 798 {
 799     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 800 }
 801
 802 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 803 {
 804     if (fn == NULL) {
 805         return false;
 806     }
 807     if (sve_access_check(s)) {
 808         unsigned vsz = vec_full_reg_size(s);
 809         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 810                            vec_full_reg_offset(s, a->rn),
 811                            vec_full_reg_offset(s, a->rm),
 812                            vsz, vsz, 0, fn);
 813     }
 814     return true;
 815 }
 816
 817 #define DO_ZZW(NAME, name) \
 818 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 819                                uint32_t insn)                             \
 820 {                                                                         \
 821     static gen_helper_gvec_3 * const fns[4] = {                           \
 822         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 823         gen_helper_sve_##name##_zzw_s, NULL                               \
 824     };                                                                    \
 825     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 826 }
 827
 828 DO_ZZW(ASR, asr)
 829 DO_ZZW(LSR, lsr)
 830 DO_ZZW(LSL, lsl)
 831
 832 #undef DO_ZZW
 833
 834 /*
 835  *** SVE Integer Multiply-Add Group
 836  */
 837
 838 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 839                          gen_helper_gvec_5 *fn)
 840 {
 841     if (sve_access_check(s)) {
 842         unsigned vsz = vec_full_reg_size(s);
 843         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 844                            vec_full_reg_offset(s, a->ra),
 845                            vec_full_reg_offset(s, a->rn),
 846                            vec_full_reg_offset(s, a->rm),
 847                            pred_full_reg_offset(s, a->pg),
 848                            vsz, vsz, 0, fn);
 849     }
 850     return true;
 851 }
 852
 853 #define DO_ZPZZZ(NAME, name) \
 854 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 855 {                                                                    \
 856     static gen_helper_gvec_5 * const fns[4] = {                      \
 857         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 858         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 859     };                                                               \
 860     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 861 }
 862
 863 DO_ZPZZZ(MLA, mla)
 864 DO_ZPZZZ(MLS, mls)
 865
 866 #undef DO_ZPZZZ
 867
 868 /*
 869  *** SVE Index Generation Group
 870  */
 871
 872 static void do_index(DisasContext *s, int esz, int rd,
 873                      TCGv_i64 start, TCGv_i64 incr)
 874 {
 875     unsigned vsz = vec_full_reg_size(s);
 876     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 877     TCGv_ptr t_zd = tcg_temp_new_ptr();
 878
 879     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 880     if (esz == 3) {
 881         gen_helper_sve_index_d(t_zd, start, incr, desc);
 882     } else {
 883         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 884         static index_fn * const fns[3] = {
 885             gen_helper_sve_index_b,
 886             gen_helper_sve_index_h,
 887             gen_helper_sve_index_s,
 888         };
 889         TCGv_i32 s32 = tcg_temp_new_i32();
 890         TCGv_i32 i32 = tcg_temp_new_i32();
 891
 892         tcg_gen_extrl_i64_i32(s32, start);
 893         tcg_gen_extrl_i64_i32(i32, incr);
 894         fns[esz](t_zd, s32, i32, desc);
 895
 896         tcg_temp_free_i32(s32);
 897         tcg_temp_free_i32(i32);
 898     }
 899     tcg_temp_free_ptr(t_zd);
 900     tcg_temp_free_i32(desc);
 901 }
 902
 903 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 904 {
 905     if (sve_access_check(s)) {
 906         TCGv_i64 start = tcg_const_i64(a->imm1);
 907         TCGv_i64 incr = tcg_const_i64(a->imm2);
 908         do_index(s, a->esz, a->rd, start, incr);
 909         tcg_temp_free_i64(start);
 910         tcg_temp_free_i64(incr);
 911     }
 912     return true;
 913 }
 914
 915 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 916 {
 917     if (sve_access_check(s)) {
 918         TCGv_i64 start = tcg_const_i64(a->imm);
 919         TCGv_i64 incr = cpu_reg(s, a->rm);
 920         do_index(s, a->esz, a->rd, start, incr);
 921         tcg_temp_free_i64(start);
 922     }
 923     return true;
 924 }
 925
 926 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 927 {
 928     if (sve_access_check(s)) {
 929         TCGv_i64 start = cpu_reg(s, a->rn);
 930         TCGv_i64 incr = tcg_const_i64(a->imm);
 931         do_index(s, a->esz, a->rd, start, incr);
 932         tcg_temp_free_i64(incr);
 933     }
 934     return true;
 935 }
 936
 937 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 938 {
 939     if (sve_access_check(s)) {
 940         TCGv_i64 start = cpu_reg(s, a->rn);
 941         TCGv_i64 incr = cpu_reg(s, a->rm);
 942         do_index(s, a->esz, a->rd, start, incr);
 943     }
 944     return true;
 945 }
 946
 947 /*
 948  *** SVE Stack Allocation Group
 949  */
 950
 951 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 952 {
 953     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 954     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 955     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 956     return true;
 957 }
 958
 959 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 960 {
 961     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 962     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 963     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 964     return true;
 965 }
 966
 967 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 968 {
 969     TCGv_i64 reg = cpu_reg(s, a->rd);
 970     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 971     return true;
 972 }
 973
 974 /*
 975  *** SVE Compute Vector Address Group
 976  */
 977
 978 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 979 {
 980     if (sve_access_check(s)) {
 981         unsigned vsz = vec_full_reg_size(s);
 982         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 983                            vec_full_reg_offset(s, a->rn),
 984                            vec_full_reg_offset(s, a->rm),
 985                            vsz, vsz, a->imm, fn);
 986     }
 987     return true;
 988 }
 989
 990 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 991 {
 992     return do_adr(s, a, gen_helper_sve_adr_p32);
 993 }
 994
 995 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 996 {
 997     return do_adr(s, a, gen_helper_sve_adr_p64);
 998 }
 999
1000 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001 {
1002     return do_adr(s, a, gen_helper_sve_adr_s32);
1003 }
1004
1005 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006 {
1007     return do_adr(s, a, gen_helper_sve_adr_u32);
1008 }
1009
1010 /*
1011  *** SVE Integer Misc - Unpredicated Group
1012  */
1013
1014 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015 {
1016     static gen_helper_gvec_2 * const fns[4] = {
1017         NULL,
1018         gen_helper_sve_fexpa_h,
1019         gen_helper_sve_fexpa_s,
1020         gen_helper_sve_fexpa_d,
1021     };
1022     if (a->esz == 0) {
1023         return false;
1024     }
1025     if (sve_access_check(s)) {
1026         unsigned vsz = vec_full_reg_size(s);
1027         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028                            vec_full_reg_offset(s, a->rn),
1029                            vsz, vsz, 0, fns[a->esz]);
1030     }
1031     return true;
1032 }
1033
1034 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035 {
1036     static gen_helper_gvec_3 * const fns[4] = {
1037         NULL,
1038         gen_helper_sve_ftssel_h,
1039         gen_helper_sve_ftssel_s,
1040         gen_helper_sve_ftssel_d,
1041     };
1042     if (a->esz == 0) {
1043         return false;
1044     }
1045     if (sve_access_check(s)) {
1046         unsigned vsz = vec_full_reg_size(s);
1047         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048                            vec_full_reg_offset(s, a->rn),
1049                            vec_full_reg_offset(s, a->rm),
1050                            vsz, vsz, 0, fns[a->esz]);
1051     }
1052     return true;
1053 }
1054
1055 /*
1056  *** SVE Predicate Logical Operations Group
1057  */
1058
1059 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060                           const GVecGen4 *gvec_op)
1061 {
1062     if (!sve_access_check(s)) {
1063         return true;
1064     }
1065
1066     unsigned psz = pred_gvec_reg_size(s);
1067     int dofs = pred_full_reg_offset(s, a->rd);
1068     int nofs = pred_full_reg_offset(s, a->rn);
1069     int mofs = pred_full_reg_offset(s, a->rm);
1070     int gofs = pred_full_reg_offset(s, a->pg);
1071
1072     if (psz == 8) {
1073         /* Do the operation and the flags generation in temps.  */
1074         TCGv_i64 pd = tcg_temp_new_i64();
1075         TCGv_i64 pn = tcg_temp_new_i64();
1076         TCGv_i64 pm = tcg_temp_new_i64();
1077         TCGv_i64 pg = tcg_temp_new_i64();
1078
1079         tcg_gen_ld_i64(pn, cpu_env, nofs);
1080         tcg_gen_ld_i64(pm, cpu_env, mofs);
1081         tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083         gvec_op->fni8(pd, pn, pm, pg);
1084         tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086         do_predtest1(pd, pg);
1087
1088         tcg_temp_free_i64(pd);
1089         tcg_temp_free_i64(pn);
1090         tcg_temp_free_i64(pm);
1091         tcg_temp_free_i64(pg);
1092     } else {
1093         /* The operation and flags generation is large.  The computation
1094          * of the flags depends on the original contents of the guarding
1095          * predicate.  If the destination overwrites the guarding predicate,
1096          * then the easiest way to get this right is to save a copy.
1097           */
1098         int tofs = gofs;
1099         if (a->rd == a->pg) {
1100             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102         }
1103
1104         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105         do_predtest(s, dofs, tofs, psz / 8);
1106     }
1107     return true;
1108 }
1109
1110 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 {
1112     tcg_gen_and_i64(pd, pn, pm);
1113     tcg_gen_and_i64(pd, pd, pg);
1114 }
1115
1116 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117                            TCGv_vec pm, TCGv_vec pg)
1118 {
1119     tcg_gen_and_vec(vece, pd, pn, pm);
1120     tcg_gen_and_vec(vece, pd, pd, pg);
1121 }
1122
1123 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124 {
1125     static const GVecGen4 op = {
1126         .fni8 = gen_and_pg_i64,
1127         .fniv = gen_and_pg_vec,
1128         .fno = gen_helper_sve_and_pppp,
1129         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130     };
1131     if (a->s) {
1132         return do_pppp_flags(s, a, &op);
1133     } else if (a->rn == a->rm) {
1134         if (a->pg == a->rn) {
1135             return do_mov_p(s, a->rd, a->rn);
1136         } else {
1137             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138         }
1139     } else if (a->pg == a->rn || a->pg == a->rm) {
1140         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141     } else {
1142         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143     }
1144 }
1145
1146 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147 {
1148     tcg_gen_andc_i64(pd, pn, pm);
1149     tcg_gen_and_i64(pd, pd, pg);
1150 }
1151
1152 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153                            TCGv_vec pm, TCGv_vec pg)
1154 {
1155     tcg_gen_andc_vec(vece, pd, pn, pm);
1156     tcg_gen_and_vec(vece, pd, pd, pg);
1157 }
1158
1159 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160 {
1161     static const GVecGen4 op = {
1162         .fni8 = gen_bic_pg_i64,
1163         .fniv = gen_bic_pg_vec,
1164         .fno = gen_helper_sve_bic_pppp,
1165         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166     };
1167     if (a->s) {
1168         return do_pppp_flags(s, a, &op);
1169     } else if (a->pg == a->rn) {
1170         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171     } else {
1172         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173     }
1174 }
1175
1176 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177 {
1178     tcg_gen_xor_i64(pd, pn, pm);
1179     tcg_gen_and_i64(pd, pd, pg);
1180 }
1181
1182 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183                            TCGv_vec pm, TCGv_vec pg)
1184 {
1185     tcg_gen_xor_vec(vece, pd, pn, pm);
1186     tcg_gen_and_vec(vece, pd, pd, pg);
1187 }
1188
1189 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190 {
1191     static const GVecGen4 op = {
1192         .fni8 = gen_eor_pg_i64,
1193         .fniv = gen_eor_pg_vec,
1194         .fno = gen_helper_sve_eor_pppp,
1195         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196     };
1197     if (a->s) {
1198         return do_pppp_flags(s, a, &op);
1199     } else {
1200         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201     }
1202 }
1203
1204 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205 {
1206     tcg_gen_and_i64(pn, pn, pg);
1207     tcg_gen_andc_i64(pm, pm, pg);
1208     tcg_gen_or_i64(pd, pn, pm);
1209 }
1210
1211 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212                            TCGv_vec pm, TCGv_vec pg)
1213 {
1214     tcg_gen_and_vec(vece, pn, pn, pg);
1215     tcg_gen_andc_vec(vece, pm, pm, pg);
1216     tcg_gen_or_vec(vece, pd, pn, pm);
1217 }
1218
1219 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220 {
1221     static const GVecGen4 op = {
1222         .fni8 = gen_sel_pg_i64,
1223         .fniv = gen_sel_pg_vec,
1224         .fno = gen_helper_sve_sel_pppp,
1225         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226     };
1227     if (a->s) {
1228         return false;
1229     } else {
1230         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231     }
1232 }
1233
1234 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235 {
1236     tcg_gen_or_i64(pd, pn, pm);
1237     tcg_gen_and_i64(pd, pd, pg);
1238 }
1239
1240 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241                            TCGv_vec pm, TCGv_vec pg)
1242 {
1243     tcg_gen_or_vec(vece, pd, pn, pm);
1244     tcg_gen_and_vec(vece, pd, pd, pg);
1245 }
1246
1247 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248 {
1249     static const GVecGen4 op = {
1250         .fni8 = gen_orr_pg_i64,
1251         .fniv = gen_orr_pg_vec,
1252         .fno = gen_helper_sve_orr_pppp,
1253         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254     };
1255     if (a->s) {
1256         return do_pppp_flags(s, a, &op);
1257     } else if (a->pg == a->rn && a->rn == a->rm) {
1258         return do_mov_p(s, a->rd, a->rn);
1259     } else {
1260         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261     }
1262 }
1263
1264 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 {
1266     tcg_gen_orc_i64(pd, pn, pm);
1267     tcg_gen_and_i64(pd, pd, pg);
1268 }
1269
1270 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271                            TCGv_vec pm, TCGv_vec pg)
1272 {
1273     tcg_gen_orc_vec(vece, pd, pn, pm);
1274     tcg_gen_and_vec(vece, pd, pd, pg);
1275 }
1276
1277 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278 {
1279     static const GVecGen4 op = {
1280         .fni8 = gen_orn_pg_i64,
1281         .fniv = gen_orn_pg_vec,
1282         .fno = gen_helper_sve_orn_pppp,
1283         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284     };
1285     if (a->s) {
1286         return do_pppp_flags(s, a, &op);
1287     } else {
1288         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289     }
1290 }
1291
1292 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293 {
1294     tcg_gen_or_i64(pd, pn, pm);
1295     tcg_gen_andc_i64(pd, pg, pd);
1296 }
1297
1298 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299                            TCGv_vec pm, TCGv_vec pg)
1300 {
1301     tcg_gen_or_vec(vece, pd, pn, pm);
1302     tcg_gen_andc_vec(vece, pd, pg, pd);
1303 }
1304
1305 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306 {
1307     static const GVecGen4 op = {
1308         .fni8 = gen_nor_pg_i64,
1309         .fniv = gen_nor_pg_vec,
1310         .fno = gen_helper_sve_nor_pppp,
1311         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312     };
1313     if (a->s) {
1314         return do_pppp_flags(s, a, &op);
1315     } else {
1316         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317     }
1318 }
1319
1320 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321 {
1322     tcg_gen_and_i64(pd, pn, pm);
1323     tcg_gen_andc_i64(pd, pg, pd);
1324 }
1325
1326 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327                            TCGv_vec pm, TCGv_vec pg)
1328 {
1329     tcg_gen_and_vec(vece, pd, pn, pm);
1330     tcg_gen_andc_vec(vece, pd, pg, pd);
1331 }
1332
1333 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334 {
1335     static const GVecGen4 op = {
1336         .fni8 = gen_nand_pg_i64,
1337         .fniv = gen_nand_pg_vec,
1338         .fno = gen_helper_sve_nand_pppp,
1339         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340     };
1341     if (a->s) {
1342         return do_pppp_flags(s, a, &op);
1343     } else {
1344         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345     }
1346 }
1347
1348 /*
1349  *** SVE Predicate Misc Group
1350  */
1351
1352 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353 {
1354     if (sve_access_check(s)) {
1355         int nofs = pred_full_reg_offset(s, a->rn);
1356         int gofs = pred_full_reg_offset(s, a->pg);
1357         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359         if (words == 1) {
1360             TCGv_i64 pn = tcg_temp_new_i64();
1361             TCGv_i64 pg = tcg_temp_new_i64();
1362
1363             tcg_gen_ld_i64(pn, cpu_env, nofs);
1364             tcg_gen_ld_i64(pg, cpu_env, gofs);
1365             do_predtest1(pn, pg);
1366
1367             tcg_temp_free_i64(pn);
1368             tcg_temp_free_i64(pg);
1369         } else {
1370             do_predtest(s, nofs, gofs, words);
1371         }
1372     }
1373     return true;
1374 }
1375
1376 /* See the ARM pseudocode DecodePredCount.  */
1377 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378 {
1379     unsigned elements = fullsz >> esz;
1380     unsigned bound;
1381
1382     switch (pattern) {
1383     case 0x0: /* POW2 */
1384         return pow2floor(elements);
1385     case 0x1: /* VL1 */
1386     case 0x2: /* VL2 */
1387     case 0x3: /* VL3 */
1388     case 0x4: /* VL4 */
1389     case 0x5: /* VL5 */
1390     case 0x6: /* VL6 */
1391     case 0x7: /* VL7 */
1392     case 0x8: /* VL8 */
1393         bound = pattern;
1394         break;
1395     case 0x9: /* VL16 */
1396     case 0xa: /* VL32 */
1397     case 0xb: /* VL64 */
1398     case 0xc: /* VL128 */
1399     case 0xd: /* VL256 */
1400         bound = 16 << (pattern - 9);
1401         break;
1402     case 0x1d: /* MUL4 */
1403         return elements - elements % 4;
1404     case 0x1e: /* MUL3 */
1405         return elements - elements % 3;
1406     case 0x1f: /* ALL */
1407         return elements;
1408     default:   /* #uimm5 */
1409         return 0;
1410     }
1411     return elements >= bound ? bound : 0;
1412 }
1413
1414 /* This handles all of the predicate initialization instructions,
1415  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1416  * so that decode_pred_count returns 0.  For SETFFR, we will have
1417  * set RD == 16 == FFR.
1418  */
1419 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420 {
1421     if (!sve_access_check(s)) {
1422         return true;
1423     }
1424
1425     unsigned fullsz = vec_full_reg_size(s);
1426     unsigned ofs = pred_full_reg_offset(s, rd);
1427     unsigned numelem, setsz, i;
1428     uint64_t word, lastword;
1429     TCGv_i64 t;
1430
1431     numelem = decode_pred_count(fullsz, pat, esz);
1432
1433     /* Determine what we must store into each bit, and how many.  */
1434     if (numelem == 0) {
1435         lastword = word = 0;
1436         setsz = fullsz;
1437     } else {
1438         setsz = numelem << esz;
1439         lastword = word = pred_esz_masks[esz];
1440         if (setsz % 64) {
1441             lastword &= ~(-1ull << (setsz % 64));
1442         }
1443     }
1444
1445     t = tcg_temp_new_i64();
1446     if (fullsz <= 64) {
1447         tcg_gen_movi_i64(t, lastword);
1448         tcg_gen_st_i64(t, cpu_env, ofs);
1449         goto done;
1450     }
1451
1452     if (word == lastword) {
1453         unsigned maxsz = size_for_gvec(fullsz / 8);
1454         unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456         if (oprsz * 8 == setsz) {
1457             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458             goto done;
1459         }
1460         if (oprsz * 8 == setsz + 8) {
1461             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1462             tcg_gen_movi_i64(t, 0);
1463             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1464             goto done;
1465         }
1466     }
1467
1468     setsz /= 8;
1469     fullsz /= 8;
1470
1471     tcg_gen_movi_i64(t, word);
1472     for (i = 0; i < setsz; i += 8) {
1473         tcg_gen_st_i64(t, cpu_env, ofs + i);
1474     }
1475     if (lastword != word) {
1476         tcg_gen_movi_i64(t, lastword);
1477         tcg_gen_st_i64(t, cpu_env, ofs + i);
1478         i += 8;
1479     }
1480     if (i < fullsz) {
1481         tcg_gen_movi_i64(t, 0);
1482         for (; i < fullsz; i += 8) {
1483             tcg_gen_st_i64(t, cpu_env, ofs + i);
1484         }
1485     }
1486
1487  done:
1488     tcg_temp_free_i64(t);
1489
1490     /* PTRUES */
1491     if (setflag) {
1492         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1493         tcg_gen_movi_i32(cpu_CF, word == 0);
1494         tcg_gen_movi_i32(cpu_VF, 0);
1495         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1496     }
1497     return true;
1498 }
1499
1500 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1501 {
1502     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1503 }
1504
1505 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1506 {
1507     /* Note pat == 31 is #all, to set all elements.  */
1508     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1509 }
1510
1511 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1512 {
1513     /* Note pat == 32 is #unimp, to set no elements.  */
1514     return do_predset(s, 0, a->rd, 32, false);
1515 }
1516
1517 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1518 {
1519     /* The path through do_pppp_flags is complicated enough to want to avoid
1520      * duplication.  Frob the arguments into the form of a predicated AND.
1521      */
1522     arg_rprr_s alt_a = {
1523         .rd = a->rd, .pg = a->pg, .s = a->s,
1524         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1525     };
1526     return trans_AND_pppp(s, &alt_a, insn);
1527 }
1528
1529 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1530 {
1531     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1532 }
1533
1534 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1535 {
1536     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1537 }
1538
1539 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1540                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1541                                            TCGv_ptr, TCGv_i32))
1542 {
1543     if (!sve_access_check(s)) {
1544         return true;
1545     }
1546
1547     TCGv_ptr t_pd = tcg_temp_new_ptr();
1548     TCGv_ptr t_pg = tcg_temp_new_ptr();
1549     TCGv_i32 t;
1550     unsigned desc;
1551
1552     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1553     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1554
1555     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1556     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1557     t = tcg_const_i32(desc);
1558
1559     gen_fn(t, t_pd, t_pg, t);
1560     tcg_temp_free_ptr(t_pd);
1561     tcg_temp_free_ptr(t_pg);
1562
1563     do_pred_flags(t);
1564     tcg_temp_free_i32(t);
1565     return true;
1566 }
1567
1568 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1569 {
1570     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1571 }
1572
1573 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1574 {
1575     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1576 }
1577
1578 /*
1579  *** SVE Element Count Group
1580  */
1581
1582 /* Perform an inline saturating addition of a 32-bit value within
1583  * a 64-bit register.  The second operand is known to be positive,
1584  * which halves the comparisions we must perform to bound the result.
1585  */
1586 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1587 {
1588     int64_t ibound;
1589     TCGv_i64 bound;
1590     TCGCond cond;
1591
1592     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1593     if (u) {
1594         tcg_gen_ext32u_i64(reg, reg);
1595     } else {
1596         tcg_gen_ext32s_i64(reg, reg);
1597     }
1598     if (d) {
1599         tcg_gen_sub_i64(reg, reg, val);
1600         ibound = (u ? 0 : INT32_MIN);
1601         cond = TCG_COND_LT;
1602     } else {
1603         tcg_gen_add_i64(reg, reg, val);
1604         ibound = (u ? UINT32_MAX : INT32_MAX);
1605         cond = TCG_COND_GT;
1606     }
1607     bound = tcg_const_i64(ibound);
1608     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1609     tcg_temp_free_i64(bound);
1610 }
1611
1612 /* Similarly with 64-bit values.  */
1613 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1614 {
1615     TCGv_i64 t0 = tcg_temp_new_i64();
1616     TCGv_i64 t1 = tcg_temp_new_i64();
1617     TCGv_i64 t2;
1618
1619     if (u) {
1620         if (d) {
1621             tcg_gen_sub_i64(t0, reg, val);
1622             tcg_gen_movi_i64(t1, 0);
1623             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1624         } else {
1625             tcg_gen_add_i64(t0, reg, val);
1626             tcg_gen_movi_i64(t1, -1);
1627             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1628         }
1629     } else {
1630         if (d) {
1631             /* Detect signed overflow for subtraction.  */
1632             tcg_gen_xor_i64(t0, reg, val);
1633             tcg_gen_sub_i64(t1, reg, val);
1634             tcg_gen_xor_i64(reg, reg, t0);
1635             tcg_gen_and_i64(t0, t0, reg);
1636
1637             /* Bound the result.  */
1638             tcg_gen_movi_i64(reg, INT64_MIN);
1639             t2 = tcg_const_i64(0);
1640             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1641         } else {
1642             /* Detect signed overflow for addition.  */
1643             tcg_gen_xor_i64(t0, reg, val);
1644             tcg_gen_add_i64(reg, reg, val);
1645             tcg_gen_xor_i64(t1, reg, val);
1646             tcg_gen_andc_i64(t0, t1, t0);
1647
1648             /* Bound the result.  */
1649             tcg_gen_movi_i64(t1, INT64_MAX);
1650             t2 = tcg_const_i64(0);
1651             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1652         }
1653         tcg_temp_free_i64(t2);
1654     }
1655     tcg_temp_free_i64(t0);
1656     tcg_temp_free_i64(t1);
1657 }
1658
1659 /* Similarly with a vector and a scalar operand.  */
1660 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1661                               TCGv_i64 val, bool u, bool d)
1662 {
1663     unsigned vsz = vec_full_reg_size(s);
1664     TCGv_ptr dptr, nptr;
1665     TCGv_i32 t32, desc;
1666     TCGv_i64 t64;
1667
1668     dptr = tcg_temp_new_ptr();
1669     nptr = tcg_temp_new_ptr();
1670     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1671     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1672     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1673
1674     switch (esz) {
1675     case MO_8:
1676         t32 = tcg_temp_new_i32();
1677         tcg_gen_extrl_i64_i32(t32, val);
1678         if (d) {
1679             tcg_gen_neg_i32(t32, t32);
1680         }
1681         if (u) {
1682             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1683         } else {
1684             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1685         }
1686         tcg_temp_free_i32(t32);
1687         break;
1688
1689     case MO_16:
1690         t32 = tcg_temp_new_i32();
1691         tcg_gen_extrl_i64_i32(t32, val);
1692         if (d) {
1693             tcg_gen_neg_i32(t32, t32);
1694         }
1695         if (u) {
1696             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1697         } else {
1698             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1699         }
1700         tcg_temp_free_i32(t32);
1701         break;
1702
1703     case MO_32:
1704         t64 = tcg_temp_new_i64();
1705         if (d) {
1706             tcg_gen_neg_i64(t64, val);
1707         } else {
1708             tcg_gen_mov_i64(t64, val);
1709         }
1710         if (u) {
1711             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1712         } else {
1713             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1714         }
1715         tcg_temp_free_i64(t64);
1716         break;
1717
1718     case MO_64:
1719         if (u) {
1720             if (d) {
1721                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1722             } else {
1723                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1724             }
1725         } else if (d) {
1726             t64 = tcg_temp_new_i64();
1727             tcg_gen_neg_i64(t64, val);
1728             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1729             tcg_temp_free_i64(t64);
1730         } else {
1731             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1732         }
1733         break;
1734
1735     default:
1736         g_assert_not_reached();
1737     }
1738
1739     tcg_temp_free_ptr(dptr);
1740     tcg_temp_free_ptr(nptr);
1741     tcg_temp_free_i32(desc);
1742 }
1743
1744 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1745 {
1746     if (sve_access_check(s)) {
1747         unsigned fullsz = vec_full_reg_size(s);
1748         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1749         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1750     }
1751     return true;
1752 }
1753
1754 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1755 {
1756     if (sve_access_check(s)) {
1757         unsigned fullsz = vec_full_reg_size(s);
1758         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1759         int inc = numelem * a->imm * (a->d ? -1 : 1);
1760         TCGv_i64 reg = cpu_reg(s, a->rd);
1761
1762         tcg_gen_addi_i64(reg, reg, inc);
1763     }
1764     return true;
1765 }
1766
1767 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1768                                uint32_t insn)
1769 {
1770     if (!sve_access_check(s)) {
1771         return true;
1772     }
1773
1774     unsigned fullsz = vec_full_reg_size(s);
1775     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1776     int inc = numelem * a->imm;
1777     TCGv_i64 reg = cpu_reg(s, a->rd);
1778
1779     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1780     if (inc == 0) {
1781         if (a->u) {
1782             tcg_gen_ext32u_i64(reg, reg);
1783         } else {
1784             tcg_gen_ext32s_i64(reg, reg);
1785         }
1786     } else {
1787         TCGv_i64 t = tcg_const_i64(inc);
1788         do_sat_addsub_32(reg, t, a->u, a->d);
1789         tcg_temp_free_i64(t);
1790     }
1791     return true;
1792 }
1793
1794 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1795                                uint32_t insn)
1796 {
1797     if (!sve_access_check(s)) {
1798         return true;
1799     }
1800
1801     unsigned fullsz = vec_full_reg_size(s);
1802     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1803     int inc = numelem * a->imm;
1804     TCGv_i64 reg = cpu_reg(s, a->rd);
1805
1806     if (inc != 0) {
1807         TCGv_i64 t = tcg_const_i64(inc);
1808         do_sat_addsub_64(reg, t, a->u, a->d);
1809         tcg_temp_free_i64(t);
1810     }
1811     return true;
1812 }
1813
1814 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1815 {
1816     if (a->esz == 0) {
1817         return false;
1818     }
1819
1820     unsigned fullsz = vec_full_reg_size(s);
1821     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1822     int inc = numelem * a->imm;
1823
1824     if (inc != 0) {
1825         if (sve_access_check(s)) {
1826             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1827             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1828                               vec_full_reg_offset(s, a->rn),
1829                               t, fullsz, fullsz);
1830             tcg_temp_free_i64(t);
1831         }
1832     } else {
1833         do_mov_z(s, a->rd, a->rn);
1834     }
1835     return true;
1836 }
1837
1838 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1839                             uint32_t insn)
1840 {
1841     if (a->esz == 0) {
1842         return false;
1843     }
1844
1845     unsigned fullsz = vec_full_reg_size(s);
1846     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1847     int inc = numelem * a->imm;
1848
1849     if (inc != 0) {
1850         if (sve_access_check(s)) {
1851             TCGv_i64 t = tcg_const_i64(inc);
1852             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1853             tcg_temp_free_i64(t);
1854         }
1855     } else {
1856         do_mov_z(s, a->rd, a->rn);
1857     }
1858     return true;
1859 }
1860
1861 /*
1862  *** SVE Bitwise Immediate Group
1863  */
1864
1865 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1866 {
1867     uint64_t imm;
1868     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1869                                 extract32(a->dbm, 0, 6),
1870                                 extract32(a->dbm, 6, 6))) {
1871         return false;
1872     }
1873     if (sve_access_check(s)) {
1874         unsigned vsz = vec_full_reg_size(s);
1875         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1876                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1877     }
1878     return true;
1879 }
1880
1881 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1882 {
1883     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1884 }
1885
1886 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1887 {
1888     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1889 }
1890
1891 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1892 {
1893     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1894 }
1895
1896 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1897 {
1898     uint64_t imm;
1899     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1900                                 extract32(a->dbm, 0, 6),
1901                                 extract32(a->dbm, 6, 6))) {
1902         return false;
1903     }
1904     if (sve_access_check(s)) {
1905         do_dupi_z(s, a->rd, imm);
1906     }
1907     return true;
1908 }
1909
1910 /*
1911  *** SVE Integer Wide Immediate - Predicated Group
1912  */
1913
1914 /* Implement all merging copies.  This is used for CPY (immediate),
1915  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1916  */
1917 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1918                      TCGv_i64 val)
1919 {
1920     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1921     static gen_cpy * const fns[4] = {
1922         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1923         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1924     };
1925     unsigned vsz = vec_full_reg_size(s);
1926     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1927     TCGv_ptr t_zd = tcg_temp_new_ptr();
1928     TCGv_ptr t_zn = tcg_temp_new_ptr();
1929     TCGv_ptr t_pg = tcg_temp_new_ptr();
1930
1931     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1932     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1933     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1934
1935     fns[esz](t_zd, t_zn, t_pg, val, desc);
1936
1937     tcg_temp_free_ptr(t_zd);
1938     tcg_temp_free_ptr(t_zn);
1939     tcg_temp_free_ptr(t_pg);
1940     tcg_temp_free_i32(desc);
1941 }
1942
1943 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1944 {
1945     if (a->esz == 0) {
1946         return false;
1947     }
1948     if (sve_access_check(s)) {
1949         /* Decode the VFP immediate.  */
1950         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1951         TCGv_i64 t_imm = tcg_const_i64(imm);
1952         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1953         tcg_temp_free_i64(t_imm);
1954     }
1955     return true;
1956 }
1957
1958 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1959 {
1960     if (a->esz == 0 && extract32(insn, 13, 1)) {
1961         return false;
1962     }
1963     if (sve_access_check(s)) {
1964         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1965         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1966         tcg_temp_free_i64(t_imm);
1967     }
1968     return true;
1969 }
1970
1971 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1972 {
1973     static gen_helper_gvec_2i * const fns[4] = {
1974         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1975         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1976     };
1977
1978     if (a->esz == 0 && extract32(insn, 13, 1)) {
1979         return false;
1980     }
1981     if (sve_access_check(s)) {
1982         unsigned vsz = vec_full_reg_size(s);
1983         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1984         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1985                             pred_full_reg_offset(s, a->pg),
1986                             t_imm, vsz, vsz, 0, fns[a->esz]);
1987         tcg_temp_free_i64(t_imm);
1988     }
1989     return true;
1990 }
1991
1992 /*
1993  *** SVE Permute Extract Group
1994  */
1995
1996 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1997 {
1998     if (!sve_access_check(s)) {
1999         return true;
2000     }
2001
2002     unsigned vsz = vec_full_reg_size(s);
2003     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
2004     unsigned n_siz = vsz - n_ofs;
2005     unsigned d = vec_full_reg_offset(s, a->rd);
2006     unsigned n = vec_full_reg_offset(s, a->rn);
2007     unsigned m = vec_full_reg_offset(s, a->rm);
2008
2009     /* Use host vector move insns if we have appropriate sizes
2010      * and no unfortunate overlap.
2011      */
2012     if (m != d
2013         && n_ofs == size_for_gvec(n_ofs)
2014         && n_siz == size_for_gvec(n_siz)
2015         && (d != n || n_siz <= n_ofs)) {
2016         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2017         if (n_ofs != 0) {
2018             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2019         }
2020     } else {
2021         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2022     }
2023     return true;
2024 }
2025
2026 /*
2027  *** SVE Permute - Unpredicated Group
2028  */
2029
2030 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2031 {
2032     if (sve_access_check(s)) {
2033         unsigned vsz = vec_full_reg_size(s);
2034         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2035                              vsz, vsz, cpu_reg_sp(s, a->rn));
2036     }
2037     return true;
2038 }
2039
2040 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2041 {
2042     if ((a->imm & 0x1f) == 0) {
2043         return false;
2044     }
2045     if (sve_access_check(s)) {
2046         unsigned vsz = vec_full_reg_size(s);
2047         unsigned dofs = vec_full_reg_offset(s, a->rd);
2048         unsigned esz, index;
2049
2050         esz = ctz32(a->imm);
2051         index = a->imm >> (esz + 1);
2052
2053         if ((index << esz) < vsz) {
2054             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2055             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2056         } else {
2057             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2058         }
2059     }
2060     return true;
2061 }
2062
2063 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2064 {
2065     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2066     static gen_insr * const fns[4] = {
2067         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2068         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2069     };
2070     unsigned vsz = vec_full_reg_size(s);
2071     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2072     TCGv_ptr t_zd = tcg_temp_new_ptr();
2073     TCGv_ptr t_zn = tcg_temp_new_ptr();
2074
2075     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2076     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2077
2078     fns[a->esz](t_zd, t_zn, val, desc);
2079
2080     tcg_temp_free_ptr(t_zd);
2081     tcg_temp_free_ptr(t_zn);
2082     tcg_temp_free_i32(desc);
2083 }
2084
2085 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2086 {
2087     if (sve_access_check(s)) {
2088         TCGv_i64 t = tcg_temp_new_i64();
2089         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2090         do_insr_i64(s, a, t);
2091         tcg_temp_free_i64(t);
2092     }
2093     return true;
2094 }
2095
2096 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2097 {
2098     if (sve_access_check(s)) {
2099         do_insr_i64(s, a, cpu_reg(s, a->rm));
2100     }
2101     return true;
2102 }
2103
2104 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2105 {
2106     static gen_helper_gvec_2 * const fns[4] = {
2107         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2108         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2109     };
2110
2111     if (sve_access_check(s)) {
2112         unsigned vsz = vec_full_reg_size(s);
2113         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2114                            vec_full_reg_offset(s, a->rn),
2115                            vsz, vsz, 0, fns[a->esz]);
2116     }
2117     return true;
2118 }
2119
2120 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2121 {
2122     static gen_helper_gvec_3 * const fns[4] = {
2123         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2124         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2125     };
2126
2127     if (sve_access_check(s)) {
2128         unsigned vsz = vec_full_reg_size(s);
2129         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2130                            vec_full_reg_offset(s, a->rn),
2131                            vec_full_reg_offset(s, a->rm),
2132                            vsz, vsz, 0, fns[a->esz]);
2133     }
2134     return true;
2135 }
2136
2137 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2138 {
2139     static gen_helper_gvec_2 * const fns[4][2] = {
2140         { NULL, NULL },
2141         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2142         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2143         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2144     };
2145
2146     if (a->esz == 0) {
2147         return false;
2148     }
2149     if (sve_access_check(s)) {
2150         unsigned vsz = vec_full_reg_size(s);
2151         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2152                            vec_full_reg_offset(s, a->rn)
2153                            + (a->h ? vsz / 2 : 0),
2154                            vsz, vsz, 0, fns[a->esz][a->u]);
2155     }
2156     return true;
2157 }
2158
2159 /*
2160  *** SVE Permute - Predicates Group
2161  */
2162
2163 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2164                           gen_helper_gvec_3 *fn)
2165 {
2166     if (!sve_access_check(s)) {
2167         return true;
2168     }
2169
2170     unsigned vsz = pred_full_reg_size(s);
2171
2172     /* Predicate sizes may be smaller and cannot use simd_desc.
2173        We cannot round up, as we do elsewhere, because we need
2174        the exact size for ZIP2 and REV.  We retain the style for
2175        the other helpers for consistency.  */
2176     TCGv_ptr t_d = tcg_temp_new_ptr();
2177     TCGv_ptr t_n = tcg_temp_new_ptr();
2178     TCGv_ptr t_m = tcg_temp_new_ptr();
2179     TCGv_i32 t_desc;
2180     int desc;
2181
2182     desc = vsz - 2;
2183     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2184     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2185
2186     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2187     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2188     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2189     t_desc = tcg_const_i32(desc);
2190
2191     fn(t_d, t_n, t_m, t_desc);
2192
2193     tcg_temp_free_ptr(t_d);
2194     tcg_temp_free_ptr(t_n);
2195     tcg_temp_free_ptr(t_m);
2196     tcg_temp_free_i32(t_desc);
2197     return true;
2198 }
2199
2200 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2201                           gen_helper_gvec_2 *fn)
2202 {
2203     if (!sve_access_check(s)) {
2204         return true;
2205     }
2206
2207     unsigned vsz = pred_full_reg_size(s);
2208     TCGv_ptr t_d = tcg_temp_new_ptr();
2209     TCGv_ptr t_n = tcg_temp_new_ptr();
2210     TCGv_i32 t_desc;
2211     int desc;
2212
2213     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2214     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2215
2216     /* Predicate sizes may be smaller and cannot use simd_desc.
2217        We cannot round up, as we do elsewhere, because we need
2218        the exact size for ZIP2 and REV.  We retain the style for
2219        the other helpers for consistency.  */
2220
2221     desc = vsz - 2;
2222     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2223     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2224     t_desc = tcg_const_i32(desc);
2225
2226     fn(t_d, t_n, t_desc);
2227
2228     tcg_temp_free_i32(t_desc);
2229     tcg_temp_free_ptr(t_d);
2230     tcg_temp_free_ptr(t_n);
2231     return true;
2232 }
2233
2234 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2235 {
2236     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2237 }
2238
2239 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2240 {
2241     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2242 }
2243
2244 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2245 {
2246     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2247 }
2248
2249 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2250 {
2251     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2252 }
2253
2254 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2255 {
2256     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2257 }
2258
2259 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2260 {
2261     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2262 }
2263
2264 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2265 {
2266     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2267 }
2268
2269 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2270 {
2271     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2272 }
2273
2274 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2275 {
2276     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2277 }
2278
2279 /*
2280  *** SVE Permute - Interleaving Group
2281  */
2282
2283 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2284 {
2285     static gen_helper_gvec_3 * const fns[4] = {
2286         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2287         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2288     };
2289
2290     if (sve_access_check(s)) {
2291         unsigned vsz = vec_full_reg_size(s);
2292         unsigned high_ofs = high ? vsz / 2 : 0;
2293         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2294                            vec_full_reg_offset(s, a->rn) + high_ofs,
2295                            vec_full_reg_offset(s, a->rm) + high_ofs,
2296                            vsz, vsz, 0, fns[a->esz]);
2297     }
2298     return true;
2299 }
2300
2301 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2302                             gen_helper_gvec_3 *fn)
2303 {
2304     if (sve_access_check(s)) {
2305         unsigned vsz = vec_full_reg_size(s);
2306         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2307                            vec_full_reg_offset(s, a->rn),
2308                            vec_full_reg_offset(s, a->rm),
2309                            vsz, vsz, data, fn);
2310     }
2311     return true;
2312 }
2313
2314 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2315 {
2316     return do_zip(s, a, false);
2317 }
2318
2319 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2320 {
2321     return do_zip(s, a, true);
2322 }
2323
2324 static gen_helper_gvec_3 * const uzp_fns[4] = {
2325     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2326     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2327 };
2328
2329 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2330 {
2331     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2332 }
2333
2334 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2335 {
2336     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2337 }
2338
2339 static gen_helper_gvec_3 * const trn_fns[4] = {
2340     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2341     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2342 };
2343
2344 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2345 {
2346     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2347 }
2348
2349 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2350 {
2351     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2352 }
2353
2354 /*
2355  *** SVE Permute Vector - Predicated Group
2356  */
2357
2358 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2359 {
2360     static gen_helper_gvec_3 * const fns[4] = {
2361         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2362     };
2363     return do_zpz_ool(s, a, fns[a->esz]);
2364 }
2365
2366 /* Call the helper that computes the ARM LastActiveElement pseudocode
2367  * function, scaled by the element size.  This includes the not found
2368  * indication; e.g. not found for esz=3 is -8.
2369  */
2370 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2371 {
2372     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2373      * round up, as we do elsewhere, because we need the exact size.
2374      */
2375     TCGv_ptr t_p = tcg_temp_new_ptr();
2376     TCGv_i32 t_desc;
2377     unsigned vsz = pred_full_reg_size(s);
2378     unsigned desc;
2379
2380     desc = vsz - 2;
2381     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2382
2383     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2384     t_desc = tcg_const_i32(desc);
2385
2386     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2387
2388     tcg_temp_free_i32(t_desc);
2389     tcg_temp_free_ptr(t_p);
2390 }
2391
2392 /* Increment LAST to the offset of the next element in the vector,
2393  * wrapping around to 0.
2394  */
2395 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2396 {
2397     unsigned vsz = vec_full_reg_size(s);
2398
2399     tcg_gen_addi_i32(last, last, 1 << esz);
2400     if (is_power_of_2(vsz)) {
2401         tcg_gen_andi_i32(last, last, vsz - 1);
2402     } else {
2403         TCGv_i32 max = tcg_const_i32(vsz);
2404         TCGv_i32 zero = tcg_const_i32(0);
2405         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2406         tcg_temp_free_i32(max);
2407         tcg_temp_free_i32(zero);
2408     }
2409 }
2410
2411 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2412 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2413 {
2414     unsigned vsz = vec_full_reg_size(s);
2415
2416     if (is_power_of_2(vsz)) {
2417         tcg_gen_andi_i32(last, last, vsz - 1);
2418     } else {
2419         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2420         TCGv_i32 zero = tcg_const_i32(0);
2421         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2422         tcg_temp_free_i32(max);
2423         tcg_temp_free_i32(zero);
2424     }
2425 }
2426
2427 /* Load an unsigned element of ESZ from BASE+OFS.  */
2428 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2429 {
2430     TCGv_i64 r = tcg_temp_new_i64();
2431
2432     switch (esz) {
2433     case 0:
2434         tcg_gen_ld8u_i64(r, base, ofs);
2435         break;
2436     case 1:
2437         tcg_gen_ld16u_i64(r, base, ofs);
2438         break;
2439     case 2:
2440         tcg_gen_ld32u_i64(r, base, ofs);
2441         break;
2442     case 3:
2443         tcg_gen_ld_i64(r, base, ofs);
2444         break;
2445     default:
2446         g_assert_not_reached();
2447     }
2448     return r;
2449 }
2450
2451 /* Load an unsigned element of ESZ from RM[LAST].  */
2452 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2453                                  int rm, int esz)
2454 {
2455     TCGv_ptr p = tcg_temp_new_ptr();
2456     TCGv_i64 r;
2457
2458     /* Convert offset into vector into offset into ENV.
2459      * The final adjustment for the vector register base
2460      * is added via constant offset to the load.
2461      */
2462 #ifdef HOST_WORDS_BIGENDIAN
2463     /* Adjust for element ordering.  See vec_reg_offset.  */
2464     if (esz < 3) {
2465         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2466     }
2467 #endif
2468     tcg_gen_ext_i32_ptr(p, last);
2469     tcg_gen_add_ptr(p, p, cpu_env);
2470
2471     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2472     tcg_temp_free_ptr(p);
2473
2474     return r;
2475 }
2476
2477 /* Compute CLAST for a Zreg.  */
2478 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2479 {
2480     TCGv_i32 last;
2481     TCGLabel *over;
2482     TCGv_i64 ele;
2483     unsigned vsz, esz = a->esz;
2484
2485     if (!sve_access_check(s)) {
2486         return true;
2487     }
2488
2489     last = tcg_temp_local_new_i32();
2490     over = gen_new_label();
2491
2492     find_last_active(s, last, esz, a->pg);
2493
2494     /* There is of course no movcond for a 2048-bit vector,
2495      * so we must branch over the actual store.
2496      */
2497     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2498
2499     if (!before) {
2500         incr_last_active(s, last, esz);
2501     }
2502
2503     ele = load_last_active(s, last, a->rm, esz);
2504     tcg_temp_free_i32(last);
2505
2506     vsz = vec_full_reg_size(s);
2507     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2508     tcg_temp_free_i64(ele);
2509
2510     /* If this insn used MOVPRFX, we may need a second move.  */
2511     if (a->rd != a->rn) {
2512         TCGLabel *done = gen_new_label();
2513         tcg_gen_br(done);
2514
2515         gen_set_label(over);
2516         do_mov_z(s, a->rd, a->rn);
2517
2518         gen_set_label(done);
2519     } else {
2520         gen_set_label(over);
2521     }
2522     return true;
2523 }
2524
2525 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2526 {
2527     return do_clast_vector(s, a, false);
2528 }
2529
2530 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2531 {
2532     return do_clast_vector(s, a, true);
2533 }
2534
2535 /* Compute CLAST for a scalar.  */
2536 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2537                             bool before, TCGv_i64 reg_val)
2538 {
2539     TCGv_i32 last = tcg_temp_new_i32();
2540     TCGv_i64 ele, cmp, zero;
2541
2542     find_last_active(s, last, esz, pg);
2543
2544     /* Extend the original value of last prior to incrementing.  */
2545     cmp = tcg_temp_new_i64();
2546     tcg_gen_ext_i32_i64(cmp, last);
2547
2548     if (!before) {
2549         incr_last_active(s, last, esz);
2550     }
2551
2552     /* The conceit here is that while last < 0 indicates not found, after
2553      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2554      * from which we can load garbage.  We then discard the garbage with
2555      * a conditional move.
2556      */
2557     ele = load_last_active(s, last, rm, esz);
2558     tcg_temp_free_i32(last);
2559
2560     zero = tcg_const_i64(0);
2561     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2562
2563     tcg_temp_free_i64(zero);
2564     tcg_temp_free_i64(cmp);
2565     tcg_temp_free_i64(ele);
2566 }
2567
2568 /* Compute CLAST for a Vreg.  */
2569 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2570 {
2571     if (sve_access_check(s)) {
2572         int esz = a->esz;
2573         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2574         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2575
2576         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2577         write_fp_dreg(s, a->rd, reg);
2578         tcg_temp_free_i64(reg);
2579     }
2580     return true;
2581 }
2582
2583 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2584 {
2585     return do_clast_fp(s, a, false);
2586 }
2587
2588 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2589 {
2590     return do_clast_fp(s, a, true);
2591 }
2592
2593 /* Compute CLAST for a Xreg.  */
2594 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2595 {
2596     TCGv_i64 reg;
2597
2598     if (!sve_access_check(s)) {
2599         return true;
2600     }
2601
2602     reg = cpu_reg(s, a->rd);
2603     switch (a->esz) {
2604     case 0:
2605         tcg_gen_ext8u_i64(reg, reg);
2606         break;
2607     case 1:
2608         tcg_gen_ext16u_i64(reg, reg);
2609         break;
2610     case 2:
2611         tcg_gen_ext32u_i64(reg, reg);
2612         break;
2613     case 3:
2614         break;
2615     default:
2616         g_assert_not_reached();
2617     }
2618
2619     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2620     return true;
2621 }
2622
2623 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2624 {
2625     return do_clast_general(s, a, false);
2626 }
2627
2628 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2629 {
2630     return do_clast_general(s, a, true);
2631 }
2632
2633 /* Compute LAST for a scalar.  */
2634 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2635                                int pg, int rm, bool before)
2636 {
2637     TCGv_i32 last = tcg_temp_new_i32();
2638     TCGv_i64 ret;
2639
2640     find_last_active(s, last, esz, pg);
2641     if (before) {
2642         wrap_last_active(s, last, esz);
2643     } else {
2644         incr_last_active(s, last, esz);
2645     }
2646
2647     ret = load_last_active(s, last, rm, esz);
2648     tcg_temp_free_i32(last);
2649     return ret;
2650 }
2651
2652 /* Compute LAST for a Vreg.  */
2653 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2654 {
2655     if (sve_access_check(s)) {
2656         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2657         write_fp_dreg(s, a->rd, val);
2658         tcg_temp_free_i64(val);
2659     }
2660     return true;
2661 }
2662
2663 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664 {
2665     return do_last_fp(s, a, false);
2666 }
2667
2668 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2669 {
2670     return do_last_fp(s, a, true);
2671 }
2672
2673 /* Compute LAST for a Xreg.  */
2674 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2675 {
2676     if (sve_access_check(s)) {
2677         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2678         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2679         tcg_temp_free_i64(val);
2680     }
2681     return true;
2682 }
2683
2684 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2685 {
2686     return do_last_general(s, a, false);
2687 }
2688
2689 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690 {
2691     return do_last_general(s, a, true);
2692 }
2693
2694 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2695 {
2696     if (sve_access_check(s)) {
2697         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2698     }
2699     return true;
2700 }
2701
2702 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2703 {
2704     if (sve_access_check(s)) {
2705         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2706         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2707         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2708         tcg_temp_free_i64(t);
2709     }
2710     return true;
2711 }
2712
2713 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2714 {
2715     static gen_helper_gvec_3 * const fns[4] = {
2716         NULL,
2717         gen_helper_sve_revb_h,
2718         gen_helper_sve_revb_s,
2719         gen_helper_sve_revb_d,
2720     };
2721     return do_zpz_ool(s, a, fns[a->esz]);
2722 }
2723
2724 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2725 {
2726     static gen_helper_gvec_3 * const fns[4] = {
2727         NULL,
2728         NULL,
2729         gen_helper_sve_revh_s,
2730         gen_helper_sve_revh_d,
2731     };
2732     return do_zpz_ool(s, a, fns[a->esz]);
2733 }
2734
2735 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2736 {
2737     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2738 }
2739
2740 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2741 {
2742     static gen_helper_gvec_3 * const fns[4] = {
2743         gen_helper_sve_rbit_b,
2744         gen_helper_sve_rbit_h,
2745         gen_helper_sve_rbit_s,
2746         gen_helper_sve_rbit_d,
2747     };
2748     return do_zpz_ool(s, a, fns[a->esz]);
2749 }
2750
2751 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2752 {
2753     if (sve_access_check(s)) {
2754         unsigned vsz = vec_full_reg_size(s);
2755         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2756                            vec_full_reg_offset(s, a->rn),
2757                            vec_full_reg_offset(s, a->rm),
2758                            pred_full_reg_offset(s, a->pg),
2759                            vsz, vsz, a->esz, gen_helper_sve_splice);
2760     }
2761     return true;
2762 }
2763
2764 /*
2765  *** SVE Integer Compare - Vectors Group
2766  */
2767
2768 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2769                           gen_helper_gvec_flags_4 *gen_fn)
2770 {
2771     TCGv_ptr pd, zn, zm, pg;
2772     unsigned vsz;
2773     TCGv_i32 t;
2774
2775     if (gen_fn == NULL) {
2776         return false;
2777     }
2778     if (!sve_access_check(s)) {
2779         return true;
2780     }
2781
2782     vsz = vec_full_reg_size(s);
2783     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2784     pd = tcg_temp_new_ptr();
2785     zn = tcg_temp_new_ptr();
2786     zm = tcg_temp_new_ptr();
2787     pg = tcg_temp_new_ptr();
2788
2789     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2790     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2791     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2792     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2793
2794     gen_fn(t, pd, zn, zm, pg, t);
2795
2796     tcg_temp_free_ptr(pd);
2797     tcg_temp_free_ptr(zn);
2798     tcg_temp_free_ptr(zm);
2799     tcg_temp_free_ptr(pg);
2800
2801     do_pred_flags(t);
2802
2803     tcg_temp_free_i32(t);
2804     return true;
2805 }
2806
2807 #define DO_PPZZ(NAME, name) \
2808 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2809                                 uint32_t insn)                            \
2810 {                                                                         \
2811     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2812         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2813         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2814     };                                                                    \
2815     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2816 }
2817
2818 DO_PPZZ(CMPEQ, cmpeq)
2819 DO_PPZZ(CMPNE, cmpne)
2820 DO_PPZZ(CMPGT, cmpgt)
2821 DO_PPZZ(CMPGE, cmpge)
2822 DO_PPZZ(CMPHI, cmphi)
2823 DO_PPZZ(CMPHS, cmphs)
2824
2825 #undef DO_PPZZ
2826
2827 #define DO_PPZW(NAME, name) \
2828 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2829                                 uint32_t insn)                            \
2830 {                                                                         \
2831     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2832         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2833         gen_helper_sve_##name##_ppzw_s, NULL                              \
2834     };                                                                    \
2835     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2836 }
2837
2838 DO_PPZW(CMPEQ, cmpeq)
2839 DO_PPZW(CMPNE, cmpne)
2840 DO_PPZW(CMPGT, cmpgt)
2841 DO_PPZW(CMPGE, cmpge)
2842 DO_PPZW(CMPHI, cmphi)
2843 DO_PPZW(CMPHS, cmphs)
2844 DO_PPZW(CMPLT, cmplt)
2845 DO_PPZW(CMPLE, cmple)
2846 DO_PPZW(CMPLO, cmplo)
2847 DO_PPZW(CMPLS, cmpls)
2848
2849 #undef DO_PPZW
2850
2851 /*
2852  *** SVE Integer Compare - Immediate Groups
2853  */
2854
2855 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2856                           gen_helper_gvec_flags_3 *gen_fn)
2857 {
2858     TCGv_ptr pd, zn, pg;
2859     unsigned vsz;
2860     TCGv_i32 t;
2861
2862     if (gen_fn == NULL) {
2863         return false;
2864     }
2865     if (!sve_access_check(s)) {
2866         return true;
2867     }
2868
2869     vsz = vec_full_reg_size(s);
2870     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2871     pd = tcg_temp_new_ptr();
2872     zn = tcg_temp_new_ptr();
2873     pg = tcg_temp_new_ptr();
2874
2875     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2876     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2877     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2878
2879     gen_fn(t, pd, zn, pg, t);
2880
2881     tcg_temp_free_ptr(pd);
2882     tcg_temp_free_ptr(zn);
2883     tcg_temp_free_ptr(pg);
2884
2885     do_pred_flags(t);
2886
2887     tcg_temp_free_i32(t);
2888     return true;
2889 }
2890
2891 #define DO_PPZI(NAME, name) \
2892 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2893                                 uint32_t insn)                            \
2894 {                                                                         \
2895     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2896         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2897         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2898     };                                                                    \
2899     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2900 }
2901
2902 DO_PPZI(CMPEQ, cmpeq)
2903 DO_PPZI(CMPNE, cmpne)
2904 DO_PPZI(CMPGT, cmpgt)
2905 DO_PPZI(CMPGE, cmpge)
2906 DO_PPZI(CMPHI, cmphi)
2907 DO_PPZI(CMPHS, cmphs)
2908 DO_PPZI(CMPLT, cmplt)
2909 DO_PPZI(CMPLE, cmple)
2910 DO_PPZI(CMPLO, cmplo)
2911 DO_PPZI(CMPLS, cmpls)
2912
2913 #undef DO_PPZI
2914
2915 /*
2916  *** SVE Partition Break Group
2917  */
2918
2919 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2920                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2921 {
2922     if (!sve_access_check(s)) {
2923         return true;
2924     }
2925
2926     unsigned vsz = pred_full_reg_size(s);
2927
2928     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2929     TCGv_ptr d = tcg_temp_new_ptr();
2930     TCGv_ptr n = tcg_temp_new_ptr();
2931     TCGv_ptr m = tcg_temp_new_ptr();
2932     TCGv_ptr g = tcg_temp_new_ptr();
2933     TCGv_i32 t = tcg_const_i32(vsz - 2);
2934
2935     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2936     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2937     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2938     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2939
2940     if (a->s) {
2941         fn_s(t, d, n, m, g, t);
2942         do_pred_flags(t);
2943     } else {
2944         fn(d, n, m, g, t);
2945     }
2946     tcg_temp_free_ptr(d);
2947     tcg_temp_free_ptr(n);
2948     tcg_temp_free_ptr(m);
2949     tcg_temp_free_ptr(g);
2950     tcg_temp_free_i32(t);
2951     return true;
2952 }
2953
2954 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2955                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2956 {
2957     if (!sve_access_check(s)) {
2958         return true;
2959     }
2960
2961     unsigned vsz = pred_full_reg_size(s);
2962
2963     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2964     TCGv_ptr d = tcg_temp_new_ptr();
2965     TCGv_ptr n = tcg_temp_new_ptr();
2966     TCGv_ptr g = tcg_temp_new_ptr();
2967     TCGv_i32 t = tcg_const_i32(vsz - 2);
2968
2969     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2970     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2971     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2972
2973     if (a->s) {
2974         fn_s(t, d, n, g, t);
2975         do_pred_flags(t);
2976     } else {
2977         fn(d, n, g, t);
2978     }
2979     tcg_temp_free_ptr(d);
2980     tcg_temp_free_ptr(n);
2981     tcg_temp_free_ptr(g);
2982     tcg_temp_free_i32(t);
2983     return true;
2984 }
2985
2986 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2987 {
2988     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2989 }
2990
2991 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2992 {
2993     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2994 }
2995
2996 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2997 {
2998     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2999 }
3000
3001 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3002 {
3003     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3004 }
3005
3006 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3007 {
3008     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3009 }
3010
3011 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3012 {
3013     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3014 }
3015
3016 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3017 {
3018     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3019 }
3020
3021 /*
3022  *** SVE Predicate Count Group
3023  */
3024
3025 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3026 {
3027     unsigned psz = pred_full_reg_size(s);
3028
3029     if (psz <= 8) {
3030         uint64_t psz_mask;
3031
3032         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3033         if (pn != pg) {
3034             TCGv_i64 g = tcg_temp_new_i64();
3035             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3036             tcg_gen_and_i64(val, val, g);
3037             tcg_temp_free_i64(g);
3038         }
3039
3040         /* Reduce the pred_esz_masks value simply to reduce the
3041          * size of the code generated here.
3042          */
3043         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3044         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3045
3046         tcg_gen_ctpop_i64(val, val);
3047     } else {
3048         TCGv_ptr t_pn = tcg_temp_new_ptr();
3049         TCGv_ptr t_pg = tcg_temp_new_ptr();
3050         unsigned desc;
3051         TCGv_i32 t_desc;
3052
3053         desc = psz - 2;
3054         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3055
3056         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3057         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3058         t_desc = tcg_const_i32(desc);
3059
3060         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3061         tcg_temp_free_ptr(t_pn);
3062         tcg_temp_free_ptr(t_pg);
3063         tcg_temp_free_i32(t_desc);
3064     }
3065 }
3066
3067 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3068 {
3069     if (sve_access_check(s)) {
3070         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3071     }
3072     return true;
3073 }
3074
3075 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3076                             uint32_t insn)
3077 {
3078     if (sve_access_check(s)) {
3079         TCGv_i64 reg = cpu_reg(s, a->rd);
3080         TCGv_i64 val = tcg_temp_new_i64();
3081
3082         do_cntp(s, val, a->esz, a->pg, a->pg);
3083         if (a->d) {
3084             tcg_gen_sub_i64(reg, reg, val);
3085         } else {
3086             tcg_gen_add_i64(reg, reg, val);
3087         }
3088         tcg_temp_free_i64(val);
3089     }
3090     return true;
3091 }
3092
3093 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3094                             uint32_t insn)
3095 {
3096     if (a->esz == 0) {
3097         return false;
3098     }
3099     if (sve_access_check(s)) {
3100         unsigned vsz = vec_full_reg_size(s);
3101         TCGv_i64 val = tcg_temp_new_i64();
3102         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3103
3104         do_cntp(s, val, a->esz, a->pg, a->pg);
3105         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3106                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3107     }
3108     return true;
3109 }
3110
3111 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3112                                 uint32_t insn)
3113 {
3114     if (sve_access_check(s)) {
3115         TCGv_i64 reg = cpu_reg(s, a->rd);
3116         TCGv_i64 val = tcg_temp_new_i64();
3117
3118         do_cntp(s, val, a->esz, a->pg, a->pg);
3119         do_sat_addsub_32(reg, val, a->u, a->d);
3120     }
3121     return true;
3122 }
3123
3124 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3125                                 uint32_t insn)
3126 {
3127     if (sve_access_check(s)) {
3128         TCGv_i64 reg = cpu_reg(s, a->rd);
3129         TCGv_i64 val = tcg_temp_new_i64();
3130
3131         do_cntp(s, val, a->esz, a->pg, a->pg);
3132         do_sat_addsub_64(reg, val, a->u, a->d);
3133     }
3134     return true;
3135 }
3136
3137 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3138                              uint32_t insn)
3139 {
3140     if (a->esz == 0) {
3141         return false;
3142     }
3143     if (sve_access_check(s)) {
3144         TCGv_i64 val = tcg_temp_new_i64();
3145         do_cntp(s, val, a->esz, a->pg, a->pg);
3146         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3147     }
3148     return true;
3149 }
3150
3151 /*
3152  *** SVE Integer Compare Scalars Group
3153  */
3154
3155 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3156 {
3157     if (!sve_access_check(s)) {
3158         return true;
3159     }
3160
3161     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3162     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3163     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3164     TCGv_i64 cmp = tcg_temp_new_i64();
3165
3166     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3167     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3168     tcg_temp_free_i64(cmp);
3169
3170     /* VF = !NF & !CF.  */
3171     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3172     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3173
3174     /* Both NF and VF actually look at bit 31.  */
3175     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3176     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3177     return true;
3178 }
3179
3180 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3181 {
3182     if (!sve_access_check(s)) {
3183         return true;
3184     }
3185
3186     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3187     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3188     TCGv_i64 t0 = tcg_temp_new_i64();
3189     TCGv_i64 t1 = tcg_temp_new_i64();
3190     TCGv_i32 t2, t3;
3191     TCGv_ptr ptr;
3192     unsigned desc, vsz = vec_full_reg_size(s);
3193     TCGCond cond;
3194
3195     if (!a->sf) {
3196         if (a->u) {
3197             tcg_gen_ext32u_i64(op0, op0);
3198             tcg_gen_ext32u_i64(op1, op1);
3199         } else {
3200             tcg_gen_ext32s_i64(op0, op0);
3201             tcg_gen_ext32s_i64(op1, op1);
3202         }
3203     }
3204
3205     /* For the helper, compress the different conditions into a computation
3206      * of how many iterations for which the condition is true.
3207      *
3208      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3209      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3210      * aren't that large, so any value >= predicate size is sufficient.
3211      */
3212     tcg_gen_sub_i64(t0, op1, op0);
3213
3214     /* t0 = MIN(op1 - op0, vsz).  */
3215     tcg_gen_movi_i64(t1, vsz);
3216     tcg_gen_umin_i64(t0, t0, t1);
3217     if (a->eq) {
3218         /* Equality means one more iteration.  */
3219         tcg_gen_addi_i64(t0, t0, 1);
3220     }
3221
3222     /* t0 = (condition true ? t0 : 0).  */
3223     cond = (a->u
3224             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3225             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3226     tcg_gen_movi_i64(t1, 0);
3227     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3228
3229     t2 = tcg_temp_new_i32();
3230     tcg_gen_extrl_i64_i32(t2, t0);
3231     tcg_temp_free_i64(t0);
3232     tcg_temp_free_i64(t1);
3233
3234     desc = (vsz / 8) - 2;
3235     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3236     t3 = tcg_const_i32(desc);
3237
3238     ptr = tcg_temp_new_ptr();
3239     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3240
3241     gen_helper_sve_while(t2, ptr, t2, t3);
3242     do_pred_flags(t2);
3243
3244     tcg_temp_free_ptr(ptr);
3245     tcg_temp_free_i32(t2);
3246     tcg_temp_free_i32(t3);
3247     return true;
3248 }
3249
3250 /*
3251  *** SVE Integer Wide Immediate - Unpredicated Group
3252  */
3253
3254 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3255 {
3256     if (a->esz == 0) {
3257         return false;
3258     }
3259     if (sve_access_check(s)) {
3260         unsigned vsz = vec_full_reg_size(s);
3261         int dofs = vec_full_reg_offset(s, a->rd);
3262         uint64_t imm;
3263
3264         /* Decode the VFP immediate.  */
3265         imm = vfp_expand_imm(a->esz, a->imm);
3266         imm = dup_const(a->esz, imm);
3267
3268         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3269     }
3270     return true;
3271 }
3272
3273 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3274 {
3275     if (a->esz == 0 && extract32(insn, 13, 1)) {
3276         return false;
3277     }
3278     if (sve_access_check(s)) {
3279         unsigned vsz = vec_full_reg_size(s);
3280         int dofs = vec_full_reg_offset(s, a->rd);
3281
3282         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3283     }
3284     return true;
3285 }
3286
3287 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3288 {
3289     if (a->esz == 0 && extract32(insn, 13, 1)) {
3290         return false;
3291     }
3292     if (sve_access_check(s)) {
3293         unsigned vsz = vec_full_reg_size(s);
3294         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3295                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3296     }
3297     return true;
3298 }
3299
3300 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301 {
3302     a->imm = -a->imm;
3303     return trans_ADD_zzi(s, a, insn);
3304 }
3305
3306 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3307 {
3308     static const GVecGen2s op[4] = {
3309         { .fni8 = tcg_gen_vec_sub8_i64,
3310           .fniv = tcg_gen_sub_vec,
3311           .fno = gen_helper_sve_subri_b,
3312           .opc = INDEX_op_sub_vec,
3313           .vece = MO_8,
3314           .scalar_first = true },
3315         { .fni8 = tcg_gen_vec_sub16_i64,
3316           .fniv = tcg_gen_sub_vec,
3317           .fno = gen_helper_sve_subri_h,
3318           .opc = INDEX_op_sub_vec,
3319           .vece = MO_16,
3320           .scalar_first = true },
3321         { .fni4 = tcg_gen_sub_i32,
3322           .fniv = tcg_gen_sub_vec,
3323           .fno = gen_helper_sve_subri_s,
3324           .opc = INDEX_op_sub_vec,
3325           .vece = MO_32,
3326           .scalar_first = true },
3327         { .fni8 = tcg_gen_sub_i64,
3328           .fniv = tcg_gen_sub_vec,
3329           .fno = gen_helper_sve_subri_d,
3330           .opc = INDEX_op_sub_vec,
3331           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332           .vece = MO_64,
3333           .scalar_first = true }
3334     };
3335
3336     if (a->esz == 0 && extract32(insn, 13, 1)) {
3337         return false;
3338     }
3339     if (sve_access_check(s)) {
3340         unsigned vsz = vec_full_reg_size(s);
3341         TCGv_i64 c = tcg_const_i64(a->imm);
3342         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343                         vec_full_reg_offset(s, a->rn),
3344                         vsz, vsz, c, &op[a->esz]);
3345         tcg_temp_free_i64(c);
3346     }
3347     return true;
3348 }
3349
3350 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3351 {
3352     if (sve_access_check(s)) {
3353         unsigned vsz = vec_full_reg_size(s);
3354         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3356     }
3357     return true;
3358 }
3359
3360 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3361                        bool u, bool d)
3362 {
3363     if (a->esz == 0 && extract32(insn, 13, 1)) {
3364         return false;
3365     }
3366     if (sve_access_check(s)) {
3367         TCGv_i64 val = tcg_const_i64(a->imm);
3368         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3369         tcg_temp_free_i64(val);
3370     }
3371     return true;
3372 }
3373
3374 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3375 {
3376     return do_zzi_sat(s, a, insn, false, false);
3377 }
3378
3379 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3380 {
3381     return do_zzi_sat(s, a, insn, true, false);
3382 }
3383
3384 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3385 {
3386     return do_zzi_sat(s, a, insn, false, true);
3387 }
3388
3389 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3390 {
3391     return do_zzi_sat(s, a, insn, true, true);
3392 }
3393
3394 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3395 {
3396     if (sve_access_check(s)) {
3397         unsigned vsz = vec_full_reg_size(s);
3398         TCGv_i64 c = tcg_const_i64(a->imm);
3399
3400         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3401                             vec_full_reg_offset(s, a->rn),
3402                             c, vsz, vsz, 0, fn);
3403         tcg_temp_free_i64(c);
3404     }
3405     return true;
3406 }
3407
3408 #define DO_ZZI(NAME, name) \
3409 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3410                                uint32_t insn)                           \
3411 {                                                                       \
3412     static gen_helper_gvec_2i * const fns[4] = {                        \
3413         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3414         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3415     };                                                                  \
3416     return do_zzi_ool(s, a, fns[a->esz]);                               \
3417 }
3418
3419 DO_ZZI(SMAX, smax)
3420 DO_ZZI(UMAX, umax)
3421 DO_ZZI(SMIN, smin)
3422 DO_ZZI(UMIN, umin)
3423
3424 #undef DO_ZZI
3425
3426 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3427 {
3428     static gen_helper_gvec_3 * const fns[2][2] = {
3429         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3430         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3431     };
3432
3433     if (sve_access_check(s)) {
3434         unsigned vsz = vec_full_reg_size(s);
3435         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3436                            vec_full_reg_offset(s, a->rn),
3437                            vec_full_reg_offset(s, a->rm),
3438                            vsz, vsz, 0, fns[a->u][a->sz]);
3439     }
3440     return true;
3441 }
3442
3443 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3444 {
3445     static gen_helper_gvec_3 * const fns[2][2] = {
3446         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3447         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3448     };
3449
3450     if (sve_access_check(s)) {
3451         unsigned vsz = vec_full_reg_size(s);
3452         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3453                            vec_full_reg_offset(s, a->rn),
3454                            vec_full_reg_offset(s, a->rm),
3455                            vsz, vsz, a->index, fns[a->u][a->sz]);
3456     }
3457     return true;
3458 }
3459
3460
3461 /*
3462  *** SVE Floating Point Multiply-Add Indexed Group
3463  */
3464
3465 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3466 {
3467     static gen_helper_gvec_4_ptr * const fns[3] = {
3468         gen_helper_gvec_fmla_idx_h,
3469         gen_helper_gvec_fmla_idx_s,
3470         gen_helper_gvec_fmla_idx_d,
3471     };
3472
3473     if (sve_access_check(s)) {
3474         unsigned vsz = vec_full_reg_size(s);
3475         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3476         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3477                            vec_full_reg_offset(s, a->rn),
3478                            vec_full_reg_offset(s, a->rm),
3479                            vec_full_reg_offset(s, a->ra),
3480                            status, vsz, vsz, (a->index << 1) | a->sub,
3481                            fns[a->esz - 1]);
3482         tcg_temp_free_ptr(status);
3483     }
3484     return true;
3485 }
3486
3487 /*
3488  *** SVE Floating Point Multiply Indexed Group
3489  */
3490
3491 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3492 {
3493     static gen_helper_gvec_3_ptr * const fns[3] = {
3494         gen_helper_gvec_fmul_idx_h,
3495         gen_helper_gvec_fmul_idx_s,
3496         gen_helper_gvec_fmul_idx_d,
3497     };
3498
3499     if (sve_access_check(s)) {
3500         unsigned vsz = vec_full_reg_size(s);
3501         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3502         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3503                            vec_full_reg_offset(s, a->rn),
3504                            vec_full_reg_offset(s, a->rm),
3505                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3506         tcg_temp_free_ptr(status);
3507     }
3508     return true;
3509 }
3510
3511 /*
3512  *** SVE Floating Point Fast Reduction Group
3513  */
3514
3515 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3516                                   TCGv_ptr, TCGv_i32);
3517
3518 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3519                       gen_helper_fp_reduce *fn)
3520 {
3521     unsigned vsz = vec_full_reg_size(s);
3522     unsigned p2vsz = pow2ceil(vsz);
3523     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3524     TCGv_ptr t_zn, t_pg, status;
3525     TCGv_i64 temp;
3526
3527     temp = tcg_temp_new_i64();
3528     t_zn = tcg_temp_new_ptr();
3529     t_pg = tcg_temp_new_ptr();
3530
3531     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3532     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3533     status = get_fpstatus_ptr(a->esz == MO_16);
3534
3535     fn(temp, t_zn, t_pg, status, t_desc);
3536     tcg_temp_free_ptr(t_zn);
3537     tcg_temp_free_ptr(t_pg);
3538     tcg_temp_free_ptr(status);
3539     tcg_temp_free_i32(t_desc);
3540
3541     write_fp_dreg(s, a->rd, temp);
3542     tcg_temp_free_i64(temp);
3543 }
3544
3545 #define DO_VPZ(NAME, name) \
3546 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3547 {                                                                        \
3548     static gen_helper_fp_reduce * const fns[3] = {                       \
3549         gen_helper_sve_##name##_h,                                       \
3550         gen_helper_sve_##name##_s,                                       \
3551         gen_helper_sve_##name##_d,                                       \
3552     };                                                                   \
3553     if (a->esz == 0) {                                                   \
3554         return false;                                                    \
3555     }                                                                    \
3556     if (sve_access_check(s)) {                                           \
3557         do_reduce(s, a, fns[a->esz - 1]);                                \
3558     }                                                                    \
3559     return true;                                                         \
3560 }
3561
3562 DO_VPZ(FADDV, faddv)
3563 DO_VPZ(FMINNMV, fminnmv)
3564 DO_VPZ(FMAXNMV, fmaxnmv)
3565 DO_VPZ(FMINV, fminv)
3566 DO_VPZ(FMAXV, fmaxv)
3567
3568 /*
3569  *** SVE Floating Point Unary Operations - Unpredicated Group
3570  */
3571
3572 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3573 {
3574     unsigned vsz = vec_full_reg_size(s);
3575     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3576
3577     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3578                        vec_full_reg_offset(s, a->rn),
3579                        status, vsz, vsz, 0, fn);
3580     tcg_temp_free_ptr(status);
3581 }
3582
3583 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3584 {
3585     static gen_helper_gvec_2_ptr * const fns[3] = {
3586         gen_helper_gvec_frecpe_h,
3587         gen_helper_gvec_frecpe_s,
3588         gen_helper_gvec_frecpe_d,
3589     };
3590     if (a->esz == 0) {
3591         return false;
3592     }
3593     if (sve_access_check(s)) {
3594         do_zz_fp(s, a, fns[a->esz - 1]);
3595     }
3596     return true;
3597 }
3598
3599 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3600 {
3601     static gen_helper_gvec_2_ptr * const fns[3] = {
3602         gen_helper_gvec_frsqrte_h,
3603         gen_helper_gvec_frsqrte_s,
3604         gen_helper_gvec_frsqrte_d,
3605     };
3606     if (a->esz == 0) {
3607         return false;
3608     }
3609     if (sve_access_check(s)) {
3610         do_zz_fp(s, a, fns[a->esz - 1]);
3611     }
3612     return true;
3613 }
3614
3615 /*
3616  *** SVE Floating Point Compare with Zero Group
3617  */
3618
3619 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3620                       gen_helper_gvec_3_ptr *fn)
3621 {
3622     unsigned vsz = vec_full_reg_size(s);
3623     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3624
3625     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3626                        vec_full_reg_offset(s, a->rn),
3627                        pred_full_reg_offset(s, a->pg),
3628                        status, vsz, vsz, 0, fn);
3629     tcg_temp_free_ptr(status);
3630 }
3631
3632 #define DO_PPZ(NAME, name) \
3633 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3634 {                                                                 \
3635     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3636         gen_helper_sve_##name##_h,                                \
3637         gen_helper_sve_##name##_s,                                \
3638         gen_helper_sve_##name##_d,                                \
3639     };                                                            \
3640     if (a->esz == 0) {                                            \
3641         return false;                                             \
3642     }                                                             \
3643     if (sve_access_check(s)) {                                    \
3644         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3645     }                                                             \
3646     return true;                                                  \
3647 }
3648
3649 DO_PPZ(FCMGE_ppz0, fcmge0)
3650 DO_PPZ(FCMGT_ppz0, fcmgt0)
3651 DO_PPZ(FCMLE_ppz0, fcmle0)
3652 DO_PPZ(FCMLT_ppz0, fcmlt0)
3653 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3654 DO_PPZ(FCMNE_ppz0, fcmne0)
3655
3656 #undef DO_PPZ
3657
3658 /*
3659  *** SVE floating-point trig multiply-add coefficient
3660  */
3661
3662 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3663 {
3664     static gen_helper_gvec_3_ptr * const fns[3] = {
3665         gen_helper_sve_ftmad_h,
3666         gen_helper_sve_ftmad_s,
3667         gen_helper_sve_ftmad_d,
3668     };
3669
3670     if (a->esz == 0) {
3671         return false;
3672     }
3673     if (sve_access_check(s)) {
3674         unsigned vsz = vec_full_reg_size(s);
3675         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3676         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3677                            vec_full_reg_offset(s, a->rn),
3678                            vec_full_reg_offset(s, a->rm),
3679                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3680         tcg_temp_free_ptr(status);
3681     }
3682     return true;
3683 }
3684
3685 /*
3686  *** SVE Floating Point Accumulating Reduction Group
3687  */
3688
3689 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3690 {
3691     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3692                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3693     static fadda_fn * const fns[3] = {
3694         gen_helper_sve_fadda_h,
3695         gen_helper_sve_fadda_s,
3696         gen_helper_sve_fadda_d,
3697     };
3698     unsigned vsz = vec_full_reg_size(s);
3699     TCGv_ptr t_rm, t_pg, t_fpst;
3700     TCGv_i64 t_val;
3701     TCGv_i32 t_desc;
3702
3703     if (a->esz == 0) {
3704         return false;
3705     }
3706     if (!sve_access_check(s)) {
3707         return true;
3708     }
3709
3710     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3711     t_rm = tcg_temp_new_ptr();
3712     t_pg = tcg_temp_new_ptr();
3713     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3714     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3715     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3716     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3717
3718     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3719
3720     tcg_temp_free_i32(t_desc);
3721     tcg_temp_free_ptr(t_fpst);
3722     tcg_temp_free_ptr(t_pg);
3723     tcg_temp_free_ptr(t_rm);
3724
3725     write_fp_dreg(s, a->rd, t_val);
3726     tcg_temp_free_i64(t_val);
3727     return true;
3728 }
3729
3730 /*
3731  *** SVE Floating Point Arithmetic - Unpredicated Group
3732  */
3733
3734 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3735                       gen_helper_gvec_3_ptr *fn)
3736 {
3737     if (fn == NULL) {
3738         return false;
3739     }
3740     if (sve_access_check(s)) {
3741         unsigned vsz = vec_full_reg_size(s);
3742         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3743         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3744                            vec_full_reg_offset(s, a->rn),
3745                            vec_full_reg_offset(s, a->rm),
3746                            status, vsz, vsz, 0, fn);
3747         tcg_temp_free_ptr(status);
3748     }
3749     return true;
3750 }
3751
3752
3753 #define DO_FP3(NAME, name) \
3754 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3755 {                                                                   \
3756     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3757         NULL, gen_helper_gvec_##name##_h,                           \
3758         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3759     };                                                              \
3760     return do_zzz_fp(s, a, fns[a->esz]);                            \
3761 }
3762
3763 DO_FP3(FADD_zzz, fadd)
3764 DO_FP3(FSUB_zzz, fsub)
3765 DO_FP3(FMUL_zzz, fmul)
3766 DO_FP3(FTSMUL, ftsmul)
3767 DO_FP3(FRECPS, recps)
3768 DO_FP3(FRSQRTS, rsqrts)
3769
3770 #undef DO_FP3
3771
3772 /*
3773  *** SVE Floating Point Arithmetic - Predicated Group
3774  */
3775
3776 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3777                        gen_helper_gvec_4_ptr *fn)
3778 {
3779     if (fn == NULL) {
3780         return false;
3781     }
3782     if (sve_access_check(s)) {
3783         unsigned vsz = vec_full_reg_size(s);
3784         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3785         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3786                            vec_full_reg_offset(s, a->rn),
3787                            vec_full_reg_offset(s, a->rm),
3788                            pred_full_reg_offset(s, a->pg),
3789                            status, vsz, vsz, 0, fn);
3790         tcg_temp_free_ptr(status);
3791     }
3792     return true;
3793 }
3794
3795 #define DO_FP3(NAME, name) \
3796 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3797 {                                                                   \
3798     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3799         NULL, gen_helper_sve_##name##_h,                            \
3800         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3801     };                                                              \
3802     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3803 }
3804
3805 DO_FP3(FADD_zpzz, fadd)
3806 DO_FP3(FSUB_zpzz, fsub)
3807 DO_FP3(FMUL_zpzz, fmul)
3808 DO_FP3(FMIN_zpzz, fmin)
3809 DO_FP3(FMAX_zpzz, fmax)
3810 DO_FP3(FMINNM_zpzz, fminnum)
3811 DO_FP3(FMAXNM_zpzz, fmaxnum)
3812 DO_FP3(FABD, fabd)
3813 DO_FP3(FSCALE, fscalbn)
3814 DO_FP3(FDIV, fdiv)
3815 DO_FP3(FMULX, fmulx)
3816
3817 #undef DO_FP3
3818
3819 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3820                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3821
3822 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3823                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3824 {
3825     unsigned vsz = vec_full_reg_size(s);
3826     TCGv_ptr t_zd, t_zn, t_pg, status;
3827     TCGv_i32 desc;
3828
3829     t_zd = tcg_temp_new_ptr();
3830     t_zn = tcg_temp_new_ptr();
3831     t_pg = tcg_temp_new_ptr();
3832     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3833     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3834     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3835
3836     status = get_fpstatus_ptr(is_fp16);
3837     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3838     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3839
3840     tcg_temp_free_i32(desc);
3841     tcg_temp_free_ptr(status);
3842     tcg_temp_free_ptr(t_pg);
3843     tcg_temp_free_ptr(t_zn);
3844     tcg_temp_free_ptr(t_zd);
3845 }
3846
3847 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3848                       gen_helper_sve_fp2scalar *fn)
3849 {
3850     TCGv_i64 temp = tcg_const_i64(imm);
3851     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3852     tcg_temp_free_i64(temp);
3853 }
3854
3855 #define DO_FP_IMM(NAME, name, const0, const1) \
3856 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3857                                 uint32_t insn)                            \
3858 {                                                                         \
3859     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3860         gen_helper_sve_##name##_h,                                        \
3861         gen_helper_sve_##name##_s,                                        \
3862         gen_helper_sve_##name##_d                                         \
3863     };                                                                    \
3864     static uint64_t const val[3][2] = {                                   \
3865         { float16_##const0, float16_##const1 },                           \
3866         { float32_##const0, float32_##const1 },                           \
3867         { float64_##const0, float64_##const1 },                           \
3868     };                                                                    \
3869     if (a->esz == 0) {                                                    \
3870         return false;                                                     \
3871     }                                                                     \
3872     if (sve_access_check(s)) {                                            \
3873         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3874     }                                                                     \
3875     return true;                                                          \
3876 }
3877
3878 #define float16_two  make_float16(0x4000)
3879 #define float32_two  make_float32(0x40000000)
3880 #define float64_two  make_float64(0x4000000000000000ULL)
3881
3882 DO_FP_IMM(FADD, fadds, half, one)
3883 DO_FP_IMM(FSUB, fsubs, half, one)
3884 DO_FP_IMM(FMUL, fmuls, half, two)
3885 DO_FP_IMM(FSUBR, fsubrs, half, one)
3886 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3887 DO_FP_IMM(FMINNM, fminnms, zero, one)
3888 DO_FP_IMM(FMAX, fmaxs, zero, one)
3889 DO_FP_IMM(FMIN, fmins, zero, one)
3890
3891 #undef DO_FP_IMM
3892
3893 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3894                       gen_helper_gvec_4_ptr *fn)
3895 {
3896     if (fn == NULL) {
3897         return false;
3898     }
3899     if (sve_access_check(s)) {
3900         unsigned vsz = vec_full_reg_size(s);
3901         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3902         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3903                            vec_full_reg_offset(s, a->rn),
3904                            vec_full_reg_offset(s, a->rm),
3905                            pred_full_reg_offset(s, a->pg),
3906                            status, vsz, vsz, 0, fn);
3907         tcg_temp_free_ptr(status);
3908     }
3909     return true;
3910 }
3911
3912 #define DO_FPCMP(NAME, name) \
3913 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3914                                 uint32_t insn)                        \
3915 {                                                                     \
3916     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3917         NULL, gen_helper_sve_##name##_h,                              \
3918         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3919     };                                                                \
3920     return do_fp_cmp(s, a, fns[a->esz]);                              \
3921 }
3922
3923 DO_FPCMP(FCMGE, fcmge)
3924 DO_FPCMP(FCMGT, fcmgt)
3925 DO_FPCMP(FCMEQ, fcmeq)
3926 DO_FPCMP(FCMNE, fcmne)
3927 DO_FPCMP(FCMUO, fcmuo)
3928 DO_FPCMP(FACGE, facge)
3929 DO_FPCMP(FACGT, facgt)
3930
3931 #undef DO_FPCMP
3932
3933 static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3934 {
3935     static gen_helper_gvec_4_ptr * const fns[3] = {
3936         gen_helper_sve_fcadd_h,
3937         gen_helper_sve_fcadd_s,
3938         gen_helper_sve_fcadd_d
3939     };
3940
3941     if (a->esz == 0) {
3942         return false;
3943     }
3944     if (sve_access_check(s)) {
3945         unsigned vsz = vec_full_reg_size(s);
3946         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3947         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3948                            vec_full_reg_offset(s, a->rn),
3949                            vec_full_reg_offset(s, a->rm),
3950                            pred_full_reg_offset(s, a->pg),
3951                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3952         tcg_temp_free_ptr(status);
3953     }
3954     return true;
3955 }
3956
3957 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3958
3959 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3960 {
3961     if (fn == NULL) {
3962         return false;
3963     }
3964     if (!sve_access_check(s)) {
3965         return true;
3966     }
3967
3968     unsigned vsz = vec_full_reg_size(s);
3969     unsigned desc;
3970     TCGv_i32 t_desc;
3971     TCGv_ptr pg = tcg_temp_new_ptr();
3972
3973     /* We would need 7 operands to pass these arguments "properly".
3974      * So we encode all the register numbers into the descriptor.
3975      */
3976     desc = deposit32(a->rd, 5, 5, a->rn);
3977     desc = deposit32(desc, 10, 5, a->rm);
3978     desc = deposit32(desc, 15, 5, a->ra);
3979     desc = simd_desc(vsz, vsz, desc);
3980
3981     t_desc = tcg_const_i32(desc);
3982     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3983     fn(cpu_env, pg, t_desc);
3984     tcg_temp_free_i32(t_desc);
3985     tcg_temp_free_ptr(pg);
3986     return true;
3987 }
3988
3989 #define DO_FMLA(NAME, name) \
3990 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3991 {                                                                    \
3992     static gen_helper_sve_fmla * const fns[4] = {                    \
3993         NULL, gen_helper_sve_##name##_h,                             \
3994         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3995     };                                                               \
3996     return do_fmla(s, a, fns[a->esz]);                               \
3997 }
3998
3999 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4000 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4001 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4002 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4003
4004 #undef DO_FMLA
4005
4006 static bool trans_FCMLA_zpzzz(DisasContext *s,
4007                               arg_FCMLA_zpzzz *a, uint32_t insn)
4008 {
4009     static gen_helper_sve_fmla * const fns[3] = {
4010         gen_helper_sve_fcmla_zpzzz_h,
4011         gen_helper_sve_fcmla_zpzzz_s,
4012         gen_helper_sve_fcmla_zpzzz_d,
4013     };
4014
4015     if (a->esz == 0) {
4016         return false;
4017     }
4018     if (sve_access_check(s)) {
4019         unsigned vsz = vec_full_reg_size(s);
4020         unsigned desc;
4021         TCGv_i32 t_desc;
4022         TCGv_ptr pg = tcg_temp_new_ptr();
4023
4024         /* We would need 7 operands to pass these arguments "properly".
4025          * So we encode all the register numbers into the descriptor.
4026          */
4027         desc = deposit32(a->rd, 5, 5, a->rn);
4028         desc = deposit32(desc, 10, 5, a->rm);
4029         desc = deposit32(desc, 15, 5, a->ra);
4030         desc = deposit32(desc, 20, 2, a->rot);
4031         desc = sextract32(desc, 0, 22);
4032         desc = simd_desc(vsz, vsz, desc);
4033
4034         t_desc = tcg_const_i32(desc);
4035         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4036         fns[a->esz - 1](cpu_env, pg, t_desc);
4037         tcg_temp_free_i32(t_desc);
4038         tcg_temp_free_ptr(pg);
4039     }
4040     return true;
4041 }
4042
4043 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4044 {
4045     static gen_helper_gvec_3_ptr * const fns[2] = {
4046         gen_helper_gvec_fcmlah_idx,
4047         gen_helper_gvec_fcmlas_idx,
4048     };
4049
4050     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4051     tcg_debug_assert(a->rd == a->ra);
4052     if (sve_access_check(s)) {
4053         unsigned vsz = vec_full_reg_size(s);
4054         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4055         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4056                            vec_full_reg_offset(s, a->rn),
4057                            vec_full_reg_offset(s, a->rm),
4058                            status, vsz, vsz,
4059                            a->index * 4 + a->rot,
4060                            fns[a->esz - 1]);
4061         tcg_temp_free_ptr(status);
4062     }
4063     return true;
4064 }
4065
4066 /*
4067  *** SVE Floating Point Unary Operations Predicated Group
4068  */
4069
4070 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4071                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4072 {
4073     if (sve_access_check(s)) {
4074         unsigned vsz = vec_full_reg_size(s);
4075         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4076         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4077                            vec_full_reg_offset(s, rn),
4078                            pred_full_reg_offset(s, pg),
4079                            status, vsz, vsz, 0, fn);
4080         tcg_temp_free_ptr(status);
4081     }
4082     return true;
4083 }
4084
4085 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4086 {
4087     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
4088 }
4089
4090 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4091 {
4092     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4093 }
4094
4095 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4096 {
4097     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4098 }
4099
4100 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4101 {
4102     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4103 }
4104
4105 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4106 {
4107     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4108 }
4109
4110 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4111 {
4112     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4113 }
4114
4115 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4116 {
4117     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4118 }
4119
4120 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4121 {
4122     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4123 }
4124
4125 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4126 {
4127     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4128 }
4129
4130 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4131 {
4132     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4133 }
4134
4135 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4136 {
4137     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4138 }
4139
4140 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4141 {
4142     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4143 }
4144
4145 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4146 {
4147     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4148 }
4149
4150 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4151 {
4152     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4153 }
4154
4155 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4156 {
4157     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4158 }
4159
4160 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4161 {
4162     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4163 }
4164
4165 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4166 {
4167     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4168 }
4169
4170 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4171 {
4172     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4173 }
4174
4175 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4176 {
4177     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4178 }
4179
4180 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4181 {
4182     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4183 }
4184
4185 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4186     gen_helper_sve_frint_h,
4187     gen_helper_sve_frint_s,
4188     gen_helper_sve_frint_d
4189 };
4190
4191 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4192 {
4193     if (a->esz == 0) {
4194         return false;
4195     }
4196     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4197                       frint_fns[a->esz - 1]);
4198 }
4199
4200 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4201 {
4202     static gen_helper_gvec_3_ptr * const fns[3] = {
4203         gen_helper_sve_frintx_h,
4204         gen_helper_sve_frintx_s,
4205         gen_helper_sve_frintx_d
4206     };
4207     if (a->esz == 0) {
4208         return false;
4209     }
4210     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4211 }
4212
4213 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4214 {
4215     if (a->esz == 0) {
4216         return false;
4217     }
4218     if (sve_access_check(s)) {
4219         unsigned vsz = vec_full_reg_size(s);
4220         TCGv_i32 tmode = tcg_const_i32(mode);
4221         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4222
4223         gen_helper_set_rmode(tmode, tmode, status);
4224
4225         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4226                            vec_full_reg_offset(s, a->rn),
4227                            pred_full_reg_offset(s, a->pg),
4228                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4229
4230         gen_helper_set_rmode(tmode, tmode, status);
4231         tcg_temp_free_i32(tmode);
4232         tcg_temp_free_ptr(status);
4233     }
4234     return true;
4235 }
4236
4237 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4238 {
4239     return do_frint_mode(s, a, float_round_nearest_even);
4240 }
4241
4242 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4243 {
4244     return do_frint_mode(s, a, float_round_up);
4245 }
4246
4247 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4248 {
4249     return do_frint_mode(s, a, float_round_down);
4250 }
4251
4252 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4253 {
4254     return do_frint_mode(s, a, float_round_to_zero);
4255 }
4256
4257 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4258 {
4259     return do_frint_mode(s, a, float_round_ties_away);
4260 }
4261
4262 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4263 {
4264     static gen_helper_gvec_3_ptr * const fns[3] = {
4265         gen_helper_sve_frecpx_h,
4266         gen_helper_sve_frecpx_s,
4267         gen_helper_sve_frecpx_d
4268     };
4269     if (a->esz == 0) {
4270         return false;
4271     }
4272     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4273 }
4274
4275 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4276 {
4277     static gen_helper_gvec_3_ptr * const fns[3] = {
4278         gen_helper_sve_fsqrt_h,
4279         gen_helper_sve_fsqrt_s,
4280         gen_helper_sve_fsqrt_d
4281     };
4282     if (a->esz == 0) {
4283         return false;
4284     }
4285     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4286 }
4287
4288 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4289 {
4290     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4291 }
4292
4293 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4294 {
4295     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4296 }
4297
4298 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4299 {
4300     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4301 }
4302
4303 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4304 {
4305     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4306 }
4307
4308 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4309 {
4310     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4311 }
4312
4313 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4314 {
4315     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4316 }
4317
4318 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4319 {
4320     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4321 }
4322
4323 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4324 {
4325     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4326 }
4327
4328 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4329 {
4330     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4331 }
4332
4333 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4334 {
4335     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4336 }
4337
4338 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4339 {
4340     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4341 }
4342
4343 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4344 {
4345     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4346 }
4347
4348 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4349 {
4350     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4351 }
4352
4353 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4354 {
4355     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4356 }
4357
4358 /*
4359  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4360  */
4361
4362 /* Subroutine loading a vector register at VOFS of LEN bytes.
4363  * The load should begin at the address Rn + IMM.
4364  */
4365
4366 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4367                    int rn, int imm)
4368 {
4369     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4370     uint32_t len_remain = len % 8;
4371     uint32_t nparts = len / 8 + ctpop8(len_remain);
4372     int midx = get_mem_index(s);
4373     TCGv_i64 addr, t0, t1;
4374
4375     addr = tcg_temp_new_i64();
4376     t0 = tcg_temp_new_i64();
4377
4378     /* Note that unpredicated load/store of vector/predicate registers
4379      * are defined as a stream of bytes, which equates to little-endian
4380      * operations on larger quantities.  There is no nice way to force
4381      * a little-endian load for aarch64_be-linux-user out of line.
4382      *
4383      * Attempt to keep code expansion to a minimum by limiting the
4384      * amount of unrolling done.
4385      */
4386     if (nparts <= 4) {
4387         int i;
4388
4389         for (i = 0; i < len_align; i += 8) {
4390             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4391             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4392             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4393         }
4394     } else {
4395         TCGLabel *loop = gen_new_label();
4396         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4397
4398         gen_set_label(loop);
4399
4400         /* Minimize the number of local temps that must be re-read from
4401          * the stack each iteration.  Instead, re-compute values other
4402          * than the loop counter.
4403          */
4404         tp = tcg_temp_new_ptr();
4405         tcg_gen_addi_ptr(tp, i, imm);
4406         tcg_gen_extu_ptr_i64(addr, tp);
4407         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4408
4409         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4410
4411         tcg_gen_add_ptr(tp, cpu_env, i);
4412         tcg_gen_addi_ptr(i, i, 8);
4413         tcg_gen_st_i64(t0, tp, vofs);
4414         tcg_temp_free_ptr(tp);
4415
4416         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4417         tcg_temp_free_ptr(i);
4418     }
4419
4420     /* Predicate register loads can be any multiple of 2.
4421      * Note that we still store the entire 64-bit unit into cpu_env.
4422      */
4423     if (len_remain) {
4424         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4425
4426         switch (len_remain) {
4427         case 2:
4428         case 4:
4429         case 8:
4430             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4431             break;
4432
4433         case 6:
4434             t1 = tcg_temp_new_i64();
4435             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4436             tcg_gen_addi_i64(addr, addr, 4);
4437             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4438             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4439             tcg_temp_free_i64(t1);
4440             break;
4441
4442         default:
4443             g_assert_not_reached();
4444         }
4445         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4446     }
4447     tcg_temp_free_i64(addr);
4448     tcg_temp_free_i64(t0);
4449 }
4450
4451 /* Similarly for stores.  */
4452 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4453                    int rn, int imm)
4454 {
4455     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4456     uint32_t len_remain = len % 8;
4457     uint32_t nparts = len / 8 + ctpop8(len_remain);
4458     int midx = get_mem_index(s);
4459     TCGv_i64 addr, t0;
4460
4461     addr = tcg_temp_new_i64();
4462     t0 = tcg_temp_new_i64();
4463
4464     /* Note that unpredicated load/store of vector/predicate registers
4465      * are defined as a stream of bytes, which equates to little-endian
4466      * operations on larger quantities.  There is no nice way to force
4467      * a little-endian store for aarch64_be-linux-user out of line.
4468      *
4469      * Attempt to keep code expansion to a minimum by limiting the
4470      * amount of unrolling done.
4471      */
4472     if (nparts <= 4) {
4473         int i;
4474
4475         for (i = 0; i < len_align; i += 8) {
4476             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4477             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4478             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4479         }
4480     } else {
4481         TCGLabel *loop = gen_new_label();
4482         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4483
4484         gen_set_label(loop);
4485
4486         t2 = tcg_temp_new_ptr();
4487         tcg_gen_add_ptr(t2, cpu_env, i);
4488         tcg_gen_ld_i64(t0, t2, vofs);
4489
4490         /* Minimize the number of local temps that must be re-read from
4491          * the stack each iteration.  Instead, re-compute values other
4492          * than the loop counter.
4493          */
4494         tcg_gen_addi_ptr(t2, i, imm);
4495         tcg_gen_extu_ptr_i64(addr, t2);
4496         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4497         tcg_temp_free_ptr(t2);
4498
4499         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4500
4501         tcg_gen_addi_ptr(i, i, 8);
4502
4503         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4504         tcg_temp_free_ptr(i);
4505     }
4506
4507     /* Predicate register stores can be any multiple of 2.  */
4508     if (len_remain) {
4509         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4510         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4511
4512         switch (len_remain) {
4513         case 2:
4514         case 4:
4515         case 8:
4516             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4517             break;
4518
4519         case 6:
4520             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4521             tcg_gen_addi_i64(addr, addr, 4);
4522             tcg_gen_shri_i64(t0, t0, 32);
4523             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4524             break;
4525
4526         default:
4527             g_assert_not_reached();
4528         }
4529     }
4530     tcg_temp_free_i64(addr);
4531     tcg_temp_free_i64(t0);
4532 }
4533
4534 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4535 {
4536     if (sve_access_check(s)) {
4537         int size = vec_full_reg_size(s);
4538         int off = vec_full_reg_offset(s, a->rd);
4539         do_ldr(s, off, size, a->rn, a->imm * size);
4540     }
4541     return true;
4542 }
4543
4544 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4545 {
4546     if (sve_access_check(s)) {
4547         int size = pred_full_reg_size(s);
4548         int off = pred_full_reg_offset(s, a->rd);
4549         do_ldr(s, off, size, a->rn, a->imm * size);
4550     }
4551     return true;
4552 }
4553
4554 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4555 {
4556     if (sve_access_check(s)) {
4557         int size = vec_full_reg_size(s);
4558         int off = vec_full_reg_offset(s, a->rd);
4559         do_str(s, off, size, a->rn, a->imm * size);
4560     }
4561     return true;
4562 }
4563
4564 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4565 {
4566     if (sve_access_check(s)) {
4567         int size = pred_full_reg_size(s);
4568         int off = pred_full_reg_offset(s, a->rd);
4569         do_str(s, off, size, a->rn, a->imm * size);
4570     }
4571     return true;
4572 }
4573
4574 /*
4575  *** SVE Memory - Contiguous Load Group
4576  */
4577
4578 /* The memory mode of the dtype.  */
4579 static const TCGMemOp dtype_mop[16] = {
4580     MO_UB, MO_UB, MO_UB, MO_UB,
4581     MO_SL, MO_UW, MO_UW, MO_UW,
4582     MO_SW, MO_SW, MO_UL, MO_UL,
4583     MO_SB, MO_SB, MO_SB, MO_Q
4584 };
4585
4586 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4587
4588 /* The vector element size of dtype.  */
4589 static const uint8_t dtype_esz[16] = {
4590     0, 1, 2, 3,
4591     3, 1, 2, 3,
4592     3, 2, 2, 3,
4593     3, 2, 1, 3
4594 };
4595
4596 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4597                        gen_helper_gvec_mem *fn)
4598 {
4599     unsigned vsz = vec_full_reg_size(s);
4600     TCGv_ptr t_pg;
4601     TCGv_i32 desc;
4602
4603     /* For e.g. LD4, there are not enough arguments to pass all 4
4604      * registers as pointers, so encode the regno into the data field.
4605      * For consistency, do this even for LD1.
4606      */
4607     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4608     t_pg = tcg_temp_new_ptr();
4609
4610     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4611     fn(cpu_env, t_pg, addr, desc);
4612
4613     tcg_temp_free_ptr(t_pg);
4614     tcg_temp_free_i32(desc);
4615 }
4616
4617 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4618                       TCGv_i64 addr, int dtype, int nreg)
4619 {
4620     static gen_helper_gvec_mem * const fns[16][4] = {
4621         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4622           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4623         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4624         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4625         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4626
4627         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4628         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4629           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4630         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4631         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4632
4633         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4634         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4635         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4636           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4637         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4638
4639         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4640         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4641         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4642         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4643           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4644     };
4645     gen_helper_gvec_mem *fn = fns[dtype][nreg];
4646
4647     /* While there are holes in the table, they are not
4648      * accessible via the instruction encoding.
4649      */
4650     assert(fn != NULL);
4651     do_mem_zpa(s, zt, pg, addr, fn);
4652 }
4653
4654 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4655 {
4656     if (a->rm == 31) {
4657         return false;
4658     }
4659     if (sve_access_check(s)) {
4660         TCGv_i64 addr = new_tmp_a64(s);
4661         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4662                          (a->nreg + 1) << dtype_msz(a->dtype));
4663         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4664         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4665     }
4666     return true;
4667 }
4668
4669 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4670 {
4671     if (sve_access_check(s)) {
4672         int vsz = vec_full_reg_size(s);
4673         int elements = vsz >> dtype_esz[a->dtype];
4674         TCGv_i64 addr = new_tmp_a64(s);
4675
4676         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4677                          (a->imm * elements * (a->nreg + 1))
4678                          << dtype_msz(a->dtype));
4679         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4680     }
4681     return true;
4682 }
4683
4684 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4685 {
4686     static gen_helper_gvec_mem * const fns[16] = {
4687         gen_helper_sve_ldff1bb_r,
4688         gen_helper_sve_ldff1bhu_r,
4689         gen_helper_sve_ldff1bsu_r,
4690         gen_helper_sve_ldff1bdu_r,
4691
4692         gen_helper_sve_ldff1sds_r,
4693         gen_helper_sve_ldff1hh_r,
4694         gen_helper_sve_ldff1hsu_r,
4695         gen_helper_sve_ldff1hdu_r,
4696
4697         gen_helper_sve_ldff1hds_r,
4698         gen_helper_sve_ldff1hss_r,
4699         gen_helper_sve_ldff1ss_r,
4700         gen_helper_sve_ldff1sdu_r,
4701
4702         gen_helper_sve_ldff1bds_r,
4703         gen_helper_sve_ldff1bss_r,
4704         gen_helper_sve_ldff1bhs_r,
4705         gen_helper_sve_ldff1dd_r,
4706     };
4707
4708     if (sve_access_check(s)) {
4709         TCGv_i64 addr = new_tmp_a64(s);
4710         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4711         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4712         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4713     }
4714     return true;
4715 }
4716
4717 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4718 {
4719     static gen_helper_gvec_mem * const fns[16] = {
4720         gen_helper_sve_ldnf1bb_r,
4721         gen_helper_sve_ldnf1bhu_r,
4722         gen_helper_sve_ldnf1bsu_r,
4723         gen_helper_sve_ldnf1bdu_r,
4724
4725         gen_helper_sve_ldnf1sds_r,
4726         gen_helper_sve_ldnf1hh_r,
4727         gen_helper_sve_ldnf1hsu_r,
4728         gen_helper_sve_ldnf1hdu_r,
4729
4730         gen_helper_sve_ldnf1hds_r,
4731         gen_helper_sve_ldnf1hss_r,
4732         gen_helper_sve_ldnf1ss_r,
4733         gen_helper_sve_ldnf1sdu_r,
4734
4735         gen_helper_sve_ldnf1bds_r,
4736         gen_helper_sve_ldnf1bss_r,
4737         gen_helper_sve_ldnf1bhs_r,
4738         gen_helper_sve_ldnf1dd_r,
4739     };
4740
4741     if (sve_access_check(s)) {
4742         int vsz = vec_full_reg_size(s);
4743         int elements = vsz >> dtype_esz[a->dtype];
4744         int off = (a->imm * elements) << dtype_msz(a->dtype);
4745         TCGv_i64 addr = new_tmp_a64(s);
4746
4747         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4748         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4749     }
4750     return true;
4751 }
4752
4753 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4754 {
4755     static gen_helper_gvec_mem * const fns[4] = {
4756         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4757         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4758     };
4759     unsigned vsz = vec_full_reg_size(s);
4760     TCGv_ptr t_pg;
4761     TCGv_i32 desc;
4762
4763     /* Load the first quadword using the normal predicated load helpers.  */
4764     desc = tcg_const_i32(simd_desc(16, 16, zt));
4765     t_pg = tcg_temp_new_ptr();
4766
4767     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4768     fns[msz](cpu_env, t_pg, addr, desc);
4769
4770     tcg_temp_free_ptr(t_pg);
4771     tcg_temp_free_i32(desc);
4772
4773     /* Replicate that first quadword.  */
4774     if (vsz > 16) {
4775         unsigned dofs = vec_full_reg_offset(s, zt);
4776         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4777     }
4778 }
4779
4780 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4781 {
4782     if (a->rm == 31) {
4783         return false;
4784     }
4785     if (sve_access_check(s)) {
4786         int msz = dtype_msz(a->dtype);
4787         TCGv_i64 addr = new_tmp_a64(s);
4788         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4789         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4790         do_ldrq(s, a->rd, a->pg, addr, msz);
4791     }
4792     return true;
4793 }
4794
4795 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4796 {
4797     if (sve_access_check(s)) {
4798         TCGv_i64 addr = new_tmp_a64(s);
4799         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4800         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4801     }
4802     return true;
4803 }
4804
4805 /* Load and broadcast element.  */
4806 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4807 {
4808     if (!sve_access_check(s)) {
4809         return true;
4810     }
4811
4812     unsigned vsz = vec_full_reg_size(s);
4813     unsigned psz = pred_full_reg_size(s);
4814     unsigned esz = dtype_esz[a->dtype];
4815     TCGLabel *over = gen_new_label();
4816     TCGv_i64 temp;
4817
4818     /* If the guarding predicate has no bits set, no load occurs.  */
4819     if (psz <= 8) {
4820         /* Reduce the pred_esz_masks value simply to reduce the
4821          * size of the code generated here.
4822          */
4823         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4824         temp = tcg_temp_new_i64();
4825         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4826         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4827         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4828         tcg_temp_free_i64(temp);
4829     } else {
4830         TCGv_i32 t32 = tcg_temp_new_i32();
4831         find_last_active(s, t32, esz, a->pg);
4832         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4833         tcg_temp_free_i32(t32);
4834     }
4835
4836     /* Load the data.  */
4837     temp = tcg_temp_new_i64();
4838     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4839     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4840                         s->be_data | dtype_mop[a->dtype]);
4841
4842     /* Broadcast to *all* elements.  */
4843     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4844                          vsz, vsz, temp);
4845     tcg_temp_free_i64(temp);
4846
4847     /* Zero the inactive elements.  */
4848     gen_set_label(over);
4849     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4850     return true;
4851 }
4852
4853 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4854                       int msz, int esz, int nreg)
4855 {
4856     static gen_helper_gvec_mem * const fn_single[4][4] = {
4857         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4858           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4859         { NULL,                   gen_helper_sve_st1hh_r,
4860           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4861         { NULL, NULL,
4862           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4863         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4864     };
4865     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4866         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4867           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4868         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4869           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4870         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4871           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4872     };
4873     gen_helper_gvec_mem *fn;
4874
4875     if (nreg == 0) {
4876         /* ST1 */
4877         fn = fn_single[msz][esz];
4878     } else {
4879         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4880         assert(msz == esz);
4881         fn = fn_multiple[nreg - 1][msz];
4882     }
4883     assert(fn != NULL);
4884     do_mem_zpa(s, zt, pg, addr, fn);
4885 }
4886
4887 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4888 {
4889     if (a->rm == 31 || a->msz > a->esz) {
4890         return false;
4891     }
4892     if (sve_access_check(s)) {
4893         TCGv_i64 addr = new_tmp_a64(s);
4894         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4895         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4896         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4897     }
4898     return true;
4899 }
4900
4901 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4902 {
4903     if (a->msz > a->esz) {
4904         return false;
4905     }
4906     if (sve_access_check(s)) {
4907         int vsz = vec_full_reg_size(s);
4908         int elements = vsz >> a->esz;
4909         TCGv_i64 addr = new_tmp_a64(s);
4910
4911         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4912                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4913         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4914     }
4915     return true;
4916 }
4917
4918 /*
4919  *** SVE gather loads / scatter stores
4920  */
4921
4922 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4923                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4924 {
4925     unsigned vsz = vec_full_reg_size(s);
4926     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4927     TCGv_ptr t_zm = tcg_temp_new_ptr();
4928     TCGv_ptr t_pg = tcg_temp_new_ptr();
4929     TCGv_ptr t_zt = tcg_temp_new_ptr();
4930
4931     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4932     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4933     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4934     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4935
4936     tcg_temp_free_ptr(t_zt);
4937     tcg_temp_free_ptr(t_zm);
4938     tcg_temp_free_ptr(t_pg);
4939     tcg_temp_free_i32(desc);
4940 }
4941
4942 /* Indexed by [ff][xs][u][msz].  */
4943 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4944     { { { gen_helper_sve_ldbss_zsu,
4945           gen_helper_sve_ldhss_zsu,
4946           NULL, },
4947         { gen_helper_sve_ldbsu_zsu,
4948           gen_helper_sve_ldhsu_zsu,
4949           gen_helper_sve_ldssu_zsu, } },
4950       { { gen_helper_sve_ldbss_zss,
4951           gen_helper_sve_ldhss_zss,
4952           NULL, },
4953         { gen_helper_sve_ldbsu_zss,
4954           gen_helper_sve_ldhsu_zss,
4955           gen_helper_sve_ldssu_zss, } } },
4956
4957     { { { gen_helper_sve_ldffbss_zsu,
4958           gen_helper_sve_ldffhss_zsu,
4959           NULL, },
4960         { gen_helper_sve_ldffbsu_zsu,
4961           gen_helper_sve_ldffhsu_zsu,
4962           gen_helper_sve_ldffssu_zsu, } },
4963       { { gen_helper_sve_ldffbss_zss,
4964           gen_helper_sve_ldffhss_zss,
4965           NULL, },
4966         { gen_helper_sve_ldffbsu_zss,
4967           gen_helper_sve_ldffhsu_zss,
4968           gen_helper_sve_ldffssu_zss, } } }
4969 };
4970
4971 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4972 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4973     { { { gen_helper_sve_ldbds_zsu,
4974           gen_helper_sve_ldhds_zsu,
4975           gen_helper_sve_ldsds_zsu,
4976           NULL, },
4977         { gen_helper_sve_ldbdu_zsu,
4978           gen_helper_sve_ldhdu_zsu,
4979           gen_helper_sve_ldsdu_zsu,
4980           gen_helper_sve_ldddu_zsu, } },
4981       { { gen_helper_sve_ldbds_zss,
4982           gen_helper_sve_ldhds_zss,
4983           gen_helper_sve_ldsds_zss,
4984           NULL, },
4985         { gen_helper_sve_ldbdu_zss,
4986           gen_helper_sve_ldhdu_zss,
4987           gen_helper_sve_ldsdu_zss,
4988           gen_helper_sve_ldddu_zss, } },
4989       { { gen_helper_sve_ldbds_zd,
4990           gen_helper_sve_ldhds_zd,
4991           gen_helper_sve_ldsds_zd,
4992           NULL, },
4993         { gen_helper_sve_ldbdu_zd,
4994           gen_helper_sve_ldhdu_zd,
4995           gen_helper_sve_ldsdu_zd,
4996           gen_helper_sve_ldddu_zd, } } },
4997
4998     { { { gen_helper_sve_ldffbds_zsu,
4999           gen_helper_sve_ldffhds_zsu,
5000           gen_helper_sve_ldffsds_zsu,
5001           NULL, },
5002         { gen_helper_sve_ldffbdu_zsu,
5003           gen_helper_sve_ldffhdu_zsu,
5004           gen_helper_sve_ldffsdu_zsu,
5005           gen_helper_sve_ldffddu_zsu, } },
5006       { { gen_helper_sve_ldffbds_zss,
5007           gen_helper_sve_ldffhds_zss,
5008           gen_helper_sve_ldffsds_zss,
5009           NULL, },
5010         { gen_helper_sve_ldffbdu_zss,
5011           gen_helper_sve_ldffhdu_zss,
5012           gen_helper_sve_ldffsdu_zss,
5013           gen_helper_sve_ldffddu_zss, } },
5014       { { gen_helper_sve_ldffbds_zd,
5015           gen_helper_sve_ldffhds_zd,
5016           gen_helper_sve_ldffsds_zd,
5017           NULL, },
5018         { gen_helper_sve_ldffbdu_zd,
5019           gen_helper_sve_ldffhdu_zd,
5020           gen_helper_sve_ldffsdu_zd,
5021           gen_helper_sve_ldffddu_zd, } } }
5022 };
5023
5024 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5025 {
5026     gen_helper_gvec_mem_scatter *fn = NULL;
5027
5028     if (!sve_access_check(s)) {
5029         return true;
5030     }
5031
5032     switch (a->esz) {
5033     case MO_32:
5034         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5035         break;
5036     case MO_64:
5037         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5038         break;
5039     }
5040     assert(fn != NULL);
5041
5042     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5043                cpu_reg_sp(s, a->rn), fn);
5044     return true;
5045 }
5046
5047 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5048 {
5049     gen_helper_gvec_mem_scatter *fn = NULL;
5050     TCGv_i64 imm;
5051
5052     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5053         return false;
5054     }
5055     if (!sve_access_check(s)) {
5056         return true;
5057     }
5058
5059     switch (a->esz) {
5060     case MO_32:
5061         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5062         break;
5063     case MO_64:
5064         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5065         break;
5066     }
5067     assert(fn != NULL);
5068
5069     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5070      * by loading the immediate into the scalar parameter.
5071      */
5072     imm = tcg_const_i64(a->imm << a->msz);
5073     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5074     tcg_temp_free_i64(imm);
5075     return true;
5076 }
5077
5078 /* Indexed by [xs][msz].  */
5079 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5080     { gen_helper_sve_stbs_zsu,
5081       gen_helper_sve_sths_zsu,
5082       gen_helper_sve_stss_zsu, },
5083     { gen_helper_sve_stbs_zss,
5084       gen_helper_sve_sths_zss,
5085       gen_helper_sve_stss_zss, },
5086 };
5087
5088 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5089 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5090     { gen_helper_sve_stbd_zsu,
5091       gen_helper_sve_sthd_zsu,
5092       gen_helper_sve_stsd_zsu,
5093       gen_helper_sve_stdd_zsu, },
5094     { gen_helper_sve_stbd_zss,
5095       gen_helper_sve_sthd_zss,
5096       gen_helper_sve_stsd_zss,
5097       gen_helper_sve_stdd_zss, },
5098     { gen_helper_sve_stbd_zd,
5099       gen_helper_sve_sthd_zd,
5100       gen_helper_sve_stsd_zd,
5101       gen_helper_sve_stdd_zd, },
5102 };
5103
5104 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5105 {
5106     gen_helper_gvec_mem_scatter *fn;
5107
5108     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5109         return false;
5110     }
5111     if (!sve_access_check(s)) {
5112         return true;
5113     }
5114     switch (a->esz) {
5115     case MO_32:
5116         fn = scatter_store_fn32[a->xs][a->msz];
5117         break;
5118     case MO_64:
5119         fn = scatter_store_fn64[a->xs][a->msz];
5120         break;
5121     default:
5122         g_assert_not_reached();
5123     }
5124     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5125                cpu_reg_sp(s, a->rn), fn);
5126     return true;
5127 }
5128
5129 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5130 {
5131     gen_helper_gvec_mem_scatter *fn = NULL;
5132     TCGv_i64 imm;
5133
5134     if (a->esz < a->msz) {
5135         return false;
5136     }
5137     if (!sve_access_check(s)) {
5138         return true;
5139     }
5140
5141     switch (a->esz) {
5142     case MO_32:
5143         fn = scatter_store_fn32[0][a->msz];
5144         break;
5145     case MO_64:
5146         fn = scatter_store_fn64[2][a->msz];
5147         break;
5148     }
5149     assert(fn != NULL);
5150
5151     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5152      * by loading the immediate into the scalar parameter.
5153      */
5154     imm = tcg_const_i64(a->imm << a->msz);
5155     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5156     tcg_temp_free_i64(imm);
5157     return true;
5158 }
5159
5160 /*
5161  * Prefetches
5162  */
5163
5164 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5165 {
5166     /* Prefetch is a nop within QEMU.  */
5167     sve_access_check(s);
5168     return true;
5169 }
5170
5171 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5172 {
5173     if (a->rm == 31) {
5174         return false;
5175     }
5176     /* Prefetch is a nop within QEMU.  */
5177     sve_access_check(s);
5178     return true;
5179 }
5180
5181 /*
5182  * Move Prefix
5183  *
5184  * TODO: The implementation so far could handle predicated merging movprfx.
5185  * The helper functions as written take an extra source register to
5186  * use in the operation, but the result is only written when predication
5187  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5188  * to allow the final write back to the destination to be unconditional.
5189  * For predicated zeroing movprfx, we need to rearrange the helpers to
5190  * allow the final write back to zero inactives.
5191  *
5192  * In the meantime, just emit the moves.
5193  */
5194
5195 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5196 {
5197     return do_mov_z(s, a->rd, a->rn);
5198 }
5199
5200 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5201 {
5202     if (sve_access_check(s)) {
5203         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5204     }
5205     return true;
5206 }
5207
5208 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5209 {
5210     if (sve_access_check(s)) {
5211         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5212     }
5213     return true;
5214 }