target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 /* Select active elememnts from Zn and inactive elements from Zm,
 355  * storing the result in Zd.
 356  */
 357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358 {
 359     static gen_helper_gvec_4 * const fns[4] = {
 360         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362     };
 363     unsigned vsz = vec_full_reg_size(s);
 364     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                        vec_full_reg_offset(s, rn),
 366                        vec_full_reg_offset(s, rm),
 367                        pred_full_reg_offset(s, pg),
 368                        vsz, vsz, 0, fns[esz]);
 369 }
 370
 371 #define DO_ZPZZ(NAME, name) \
 372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 373                                 uint32_t insn)                            \
 374 {                                                                         \
 375     static gen_helper_gvec_4 * const fns[4] = {                           \
 376         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 377         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 378     };                                                                    \
 379     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 380 }
 381
 382 DO_ZPZZ(AND, and)
 383 DO_ZPZZ(EOR, eor)
 384 DO_ZPZZ(ORR, orr)
 385 DO_ZPZZ(BIC, bic)
 386
 387 DO_ZPZZ(ADD, add)
 388 DO_ZPZZ(SUB, sub)
 389
 390 DO_ZPZZ(SMAX, smax)
 391 DO_ZPZZ(UMAX, umax)
 392 DO_ZPZZ(SMIN, smin)
 393 DO_ZPZZ(UMIN, umin)
 394 DO_ZPZZ(SABD, sabd)
 395 DO_ZPZZ(UABD, uabd)
 396
 397 DO_ZPZZ(MUL, mul)
 398 DO_ZPZZ(SMULH, smulh)
 399 DO_ZPZZ(UMULH, umulh)
 400
 401 DO_ZPZZ(ASR, asr)
 402 DO_ZPZZ(LSR, lsr)
 403 DO_ZPZZ(LSL, lsl)
 404
 405 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 406 {
 407     static gen_helper_gvec_4 * const fns[4] = {
 408         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 409     };
 410     return do_zpzz_ool(s, a, fns[a->esz]);
 411 }
 412
 413 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 414 {
 415     static gen_helper_gvec_4 * const fns[4] = {
 416         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 417     };
 418     return do_zpzz_ool(s, a, fns[a->esz]);
 419 }
 420
 421 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 422 {
 423     if (sve_access_check(s)) {
 424         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 425     }
 426     return true;
 427 }
 428
 429 #undef DO_ZPZZ
 430
 431 /*
 432  *** SVE Integer Arithmetic - Unary Predicated Group
 433  */
 434
 435 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 436 {
 437     if (fn == NULL) {
 438         return false;
 439     }
 440     if (sve_access_check(s)) {
 441         unsigned vsz = vec_full_reg_size(s);
 442         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 443                            vec_full_reg_offset(s, a->rn),
 444                            pred_full_reg_offset(s, a->pg),
 445                            vsz, vsz, 0, fn);
 446     }
 447     return true;
 448 }
 449
 450 #define DO_ZPZ(NAME, name) \
 451 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 452 {                                                                   \
 453     static gen_helper_gvec_3 * const fns[4] = {                     \
 454         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 455         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 456     };                                                              \
 457     return do_zpz_ool(s, a, fns[a->esz]);                           \
 458 }
 459
 460 DO_ZPZ(CLS, cls)
 461 DO_ZPZ(CLZ, clz)
 462 DO_ZPZ(CNT_zpz, cnt_zpz)
 463 DO_ZPZ(CNOT, cnot)
 464 DO_ZPZ(NOT_zpz, not_zpz)
 465 DO_ZPZ(ABS, abs)
 466 DO_ZPZ(NEG, neg)
 467
 468 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 469 {
 470     static gen_helper_gvec_3 * const fns[4] = {
 471         NULL,
 472         gen_helper_sve_fabs_h,
 473         gen_helper_sve_fabs_s,
 474         gen_helper_sve_fabs_d
 475     };
 476     return do_zpz_ool(s, a, fns[a->esz]);
 477 }
 478
 479 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 480 {
 481     static gen_helper_gvec_3 * const fns[4] = {
 482         NULL,
 483         gen_helper_sve_fneg_h,
 484         gen_helper_sve_fneg_s,
 485         gen_helper_sve_fneg_d
 486     };
 487     return do_zpz_ool(s, a, fns[a->esz]);
 488 }
 489
 490 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 491 {
 492     static gen_helper_gvec_3 * const fns[4] = {
 493         NULL,
 494         gen_helper_sve_sxtb_h,
 495         gen_helper_sve_sxtb_s,
 496         gen_helper_sve_sxtb_d
 497     };
 498     return do_zpz_ool(s, a, fns[a->esz]);
 499 }
 500
 501 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 502 {
 503     static gen_helper_gvec_3 * const fns[4] = {
 504         NULL,
 505         gen_helper_sve_uxtb_h,
 506         gen_helper_sve_uxtb_s,
 507         gen_helper_sve_uxtb_d
 508     };
 509     return do_zpz_ool(s, a, fns[a->esz]);
 510 }
 511
 512 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 513 {
 514     static gen_helper_gvec_3 * const fns[4] = {
 515         NULL, NULL,
 516         gen_helper_sve_sxth_s,
 517         gen_helper_sve_sxth_d
 518     };
 519     return do_zpz_ool(s, a, fns[a->esz]);
 520 }
 521
 522 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 523 {
 524     static gen_helper_gvec_3 * const fns[4] = {
 525         NULL, NULL,
 526         gen_helper_sve_uxth_s,
 527         gen_helper_sve_uxth_d
 528     };
 529     return do_zpz_ool(s, a, fns[a->esz]);
 530 }
 531
 532 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 535 }
 536
 537 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 538 {
 539     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 540 }
 541
 542 #undef DO_ZPZ
 543
 544 /*
 545  *** SVE Integer Reduction Group
 546  */
 547
 548 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 549 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 550                        gen_helper_gvec_reduc *fn)
 551 {
 552     unsigned vsz = vec_full_reg_size(s);
 553     TCGv_ptr t_zn, t_pg;
 554     TCGv_i32 desc;
 555     TCGv_i64 temp;
 556
 557     if (fn == NULL) {
 558         return false;
 559     }
 560     if (!sve_access_check(s)) {
 561         return true;
 562     }
 563
 564     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 565     temp = tcg_temp_new_i64();
 566     t_zn = tcg_temp_new_ptr();
 567     t_pg = tcg_temp_new_ptr();
 568
 569     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 570     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 571     fn(temp, t_zn, t_pg, desc);
 572     tcg_temp_free_ptr(t_zn);
 573     tcg_temp_free_ptr(t_pg);
 574     tcg_temp_free_i32(desc);
 575
 576     write_fp_dreg(s, a->rd, temp);
 577     tcg_temp_free_i64(temp);
 578     return true;
 579 }
 580
 581 #define DO_VPZ(NAME, name) \
 582 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 583 {                                                                        \
 584     static gen_helper_gvec_reduc * const fns[4] = {                      \
 585         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 586         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 587     };                                                                   \
 588     return do_vpz_ool(s, a, fns[a->esz]);                                \
 589 }
 590
 591 DO_VPZ(ORV, orv)
 592 DO_VPZ(ANDV, andv)
 593 DO_VPZ(EORV, eorv)
 594
 595 DO_VPZ(UADDV, uaddv)
 596 DO_VPZ(SMAXV, smaxv)
 597 DO_VPZ(UMAXV, umaxv)
 598 DO_VPZ(SMINV, sminv)
 599 DO_VPZ(UMINV, uminv)
 600
 601 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 602 {
 603     static gen_helper_gvec_reduc * const fns[4] = {
 604         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 605         gen_helper_sve_saddv_s, NULL
 606     };
 607     return do_vpz_ool(s, a, fns[a->esz]);
 608 }
 609
 610 #undef DO_VPZ
 611
 612 /*
 613  *** SVE Shift by Immediate - Predicated Group
 614  */
 615
 616 /* Store zero into every active element of Zd.  We will use this for two
 617  * and three-operand predicated instructions for which logic dictates a
 618  * zero result.
 619  */
 620 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 621 {
 622     static gen_helper_gvec_2 * const fns[4] = {
 623         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 624         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 625     };
 626     if (sve_access_check(s)) {
 627         unsigned vsz = vec_full_reg_size(s);
 628         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 629                            pred_full_reg_offset(s, pg),
 630                            vsz, vsz, 0, fns[esz]);
 631     }
 632     return true;
 633 }
 634
 635 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 636 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 637 {
 638     static gen_helper_gvec_3 * const fns[4] = {
 639         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 640         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 641     };
 642     unsigned vsz = vec_full_reg_size(s);
 643     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 644                        vec_full_reg_offset(s, rn),
 645                        pred_full_reg_offset(s, pg),
 646                        vsz, vsz, 0, fns[esz]);
 647 }
 648
 649 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 650                         gen_helper_gvec_3 *fn)
 651 {
 652     if (sve_access_check(s)) {
 653         unsigned vsz = vec_full_reg_size(s);
 654         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 655                            vec_full_reg_offset(s, a->rn),
 656                            pred_full_reg_offset(s, a->pg),
 657                            vsz, vsz, a->imm, fn);
 658     }
 659     return true;
 660 }
 661
 662 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 663 {
 664     static gen_helper_gvec_3 * const fns[4] = {
 665         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 666         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 667     };
 668     if (a->esz < 0) {
 669         /* Invalid tsz encoding -- see tszimm_esz. */
 670         return false;
 671     }
 672     /* Shift by element size is architecturally valid.  For
 673        arithmetic right-shift, it's the same as by one less. */
 674     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 675     return do_zpzi_ool(s, a, fns[a->esz]);
 676 }
 677
 678 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 679 {
 680     static gen_helper_gvec_3 * const fns[4] = {
 681         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 682         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 683     };
 684     if (a->esz < 0) {
 685         return false;
 686     }
 687     /* Shift by element size is architecturally valid.
 688        For logical shifts, it is a zeroing operation.  */
 689     if (a->imm >= (8 << a->esz)) {
 690         return do_clr_zp(s, a->rd, a->pg, a->esz);
 691     } else {
 692         return do_zpzi_ool(s, a, fns[a->esz]);
 693     }
 694 }
 695
 696 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 697 {
 698     static gen_helper_gvec_3 * const fns[4] = {
 699         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 700         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 701     };
 702     if (a->esz < 0) {
 703         return false;
 704     }
 705     /* Shift by element size is architecturally valid.
 706        For logical shifts, it is a zeroing operation.  */
 707     if (a->imm >= (8 << a->esz)) {
 708         return do_clr_zp(s, a->rd, a->pg, a->esz);
 709     } else {
 710         return do_zpzi_ool(s, a, fns[a->esz]);
 711     }
 712 }
 713
 714 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 715 {
 716     static gen_helper_gvec_3 * const fns[4] = {
 717         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 718         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 719     };
 720     if (a->esz < 0) {
 721         return false;
 722     }
 723     /* Shift by element size is architecturally valid.  For arithmetic
 724        right shift for division, it is a zeroing operation.  */
 725     if (a->imm >= (8 << a->esz)) {
 726         return do_clr_zp(s, a->rd, a->pg, a->esz);
 727     } else {
 728         return do_zpzi_ool(s, a, fns[a->esz]);
 729     }
 730 }
 731
 732 /*
 733  *** SVE Bitwise Shift - Predicated Group
 734  */
 735
 736 #define DO_ZPZW(NAME, name) \
 737 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 738                                 uint32_t insn)                            \
 739 {                                                                         \
 740     static gen_helper_gvec_4 * const fns[3] = {                           \
 741         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 742         gen_helper_sve_##name##_zpzw_s,                                   \
 743     };                                                                    \
 744     if (a->esz < 0 || a->esz >= 3) {                                      \
 745         return false;                                                     \
 746     }                                                                     \
 747     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 748 }
 749
 750 DO_ZPZW(ASR, asr)
 751 DO_ZPZW(LSR, lsr)
 752 DO_ZPZW(LSL, lsl)
 753
 754 #undef DO_ZPZW
 755
 756 /*
 757  *** SVE Bitwise Shift - Unpredicated Group
 758  */
 759
 760 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 761                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 762                                          int64_t, uint32_t, uint32_t))
 763 {
 764     if (a->esz < 0) {
 765         /* Invalid tsz encoding -- see tszimm_esz. */
 766         return false;
 767     }
 768     if (sve_access_check(s)) {
 769         unsigned vsz = vec_full_reg_size(s);
 770         /* Shift by element size is architecturally valid.  For
 771            arithmetic right-shift, it's the same as by one less.
 772            Otherwise it is a zeroing operation.  */
 773         if (a->imm >= 8 << a->esz) {
 774             if (asr) {
 775                 a->imm = (8 << a->esz) - 1;
 776             } else {
 777                 do_dupi_z(s, a->rd, 0);
 778                 return true;
 779             }
 780         }
 781         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 782                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 783     }
 784     return true;
 785 }
 786
 787 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 788 {
 789     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 790 }
 791
 792 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 793 {
 794     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 795 }
 796
 797 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 798 {
 799     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 800 }
 801
 802 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 803 {
 804     if (fn == NULL) {
 805         return false;
 806     }
 807     if (sve_access_check(s)) {
 808         unsigned vsz = vec_full_reg_size(s);
 809         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 810                            vec_full_reg_offset(s, a->rn),
 811                            vec_full_reg_offset(s, a->rm),
 812                            vsz, vsz, 0, fn);
 813     }
 814     return true;
 815 }
 816
 817 #define DO_ZZW(NAME, name) \
 818 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 819                                uint32_t insn)                             \
 820 {                                                                         \
 821     static gen_helper_gvec_3 * const fns[4] = {                           \
 822         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 823         gen_helper_sve_##name##_zzw_s, NULL                               \
 824     };                                                                    \
 825     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 826 }
 827
 828 DO_ZZW(ASR, asr)
 829 DO_ZZW(LSR, lsr)
 830 DO_ZZW(LSL, lsl)
 831
 832 #undef DO_ZZW
 833
 834 /*
 835  *** SVE Integer Multiply-Add Group
 836  */
 837
 838 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 839                          gen_helper_gvec_5 *fn)
 840 {
 841     if (sve_access_check(s)) {
 842         unsigned vsz = vec_full_reg_size(s);
 843         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 844                            vec_full_reg_offset(s, a->ra),
 845                            vec_full_reg_offset(s, a->rn),
 846                            vec_full_reg_offset(s, a->rm),
 847                            pred_full_reg_offset(s, a->pg),
 848                            vsz, vsz, 0, fn);
 849     }
 850     return true;
 851 }
 852
 853 #define DO_ZPZZZ(NAME, name) \
 854 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 855 {                                                                    \
 856     static gen_helper_gvec_5 * const fns[4] = {                      \
 857         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 858         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 859     };                                                               \
 860     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 861 }
 862
 863 DO_ZPZZZ(MLA, mla)
 864 DO_ZPZZZ(MLS, mls)
 865
 866 #undef DO_ZPZZZ
 867
 868 /*
 869  *** SVE Index Generation Group
 870  */
 871
 872 static void do_index(DisasContext *s, int esz, int rd,
 873                      TCGv_i64 start, TCGv_i64 incr)
 874 {
 875     unsigned vsz = vec_full_reg_size(s);
 876     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 877     TCGv_ptr t_zd = tcg_temp_new_ptr();
 878
 879     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 880     if (esz == 3) {
 881         gen_helper_sve_index_d(t_zd, start, incr, desc);
 882     } else {
 883         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 884         static index_fn * const fns[3] = {
 885             gen_helper_sve_index_b,
 886             gen_helper_sve_index_h,
 887             gen_helper_sve_index_s,
 888         };
 889         TCGv_i32 s32 = tcg_temp_new_i32();
 890         TCGv_i32 i32 = tcg_temp_new_i32();
 891
 892         tcg_gen_extrl_i64_i32(s32, start);
 893         tcg_gen_extrl_i64_i32(i32, incr);
 894         fns[esz](t_zd, s32, i32, desc);
 895
 896         tcg_temp_free_i32(s32);
 897         tcg_temp_free_i32(i32);
 898     }
 899     tcg_temp_free_ptr(t_zd);
 900     tcg_temp_free_i32(desc);
 901 }
 902
 903 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 904 {
 905     if (sve_access_check(s)) {
 906         TCGv_i64 start = tcg_const_i64(a->imm1);
 907         TCGv_i64 incr = tcg_const_i64(a->imm2);
 908         do_index(s, a->esz, a->rd, start, incr);
 909         tcg_temp_free_i64(start);
 910         tcg_temp_free_i64(incr);
 911     }
 912     return true;
 913 }
 914
 915 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 916 {
 917     if (sve_access_check(s)) {
 918         TCGv_i64 start = tcg_const_i64(a->imm);
 919         TCGv_i64 incr = cpu_reg(s, a->rm);
 920         do_index(s, a->esz, a->rd, start, incr);
 921         tcg_temp_free_i64(start);
 922     }
 923     return true;
 924 }
 925
 926 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 927 {
 928     if (sve_access_check(s)) {
 929         TCGv_i64 start = cpu_reg(s, a->rn);
 930         TCGv_i64 incr = tcg_const_i64(a->imm);
 931         do_index(s, a->esz, a->rd, start, incr);
 932         tcg_temp_free_i64(incr);
 933     }
 934     return true;
 935 }
 936
 937 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 938 {
 939     if (sve_access_check(s)) {
 940         TCGv_i64 start = cpu_reg(s, a->rn);
 941         TCGv_i64 incr = cpu_reg(s, a->rm);
 942         do_index(s, a->esz, a->rd, start, incr);
 943     }
 944     return true;
 945 }
 946
 947 /*
 948  *** SVE Stack Allocation Group
 949  */
 950
 951 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 952 {
 953     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 954     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 955     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 956     return true;
 957 }
 958
 959 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 960 {
 961     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 962     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 963     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 964     return true;
 965 }
 966
 967 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 968 {
 969     TCGv_i64 reg = cpu_reg(s, a->rd);
 970     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 971     return true;
 972 }
 973
 974 /*
 975  *** SVE Compute Vector Address Group
 976  */
 977
 978 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 979 {
 980     if (sve_access_check(s)) {
 981         unsigned vsz = vec_full_reg_size(s);
 982         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 983                            vec_full_reg_offset(s, a->rn),
 984                            vec_full_reg_offset(s, a->rm),
 985                            vsz, vsz, a->imm, fn);
 986     }
 987     return true;
 988 }
 989
 990 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 991 {
 992     return do_adr(s, a, gen_helper_sve_adr_p32);
 993 }
 994
 995 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 996 {
 997     return do_adr(s, a, gen_helper_sve_adr_p64);
 998 }
 999
1000 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001 {
1002     return do_adr(s, a, gen_helper_sve_adr_s32);
1003 }
1004
1005 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006 {
1007     return do_adr(s, a, gen_helper_sve_adr_u32);
1008 }
1009
1010 /*
1011  *** SVE Integer Misc - Unpredicated Group
1012  */
1013
1014 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015 {
1016     static gen_helper_gvec_2 * const fns[4] = {
1017         NULL,
1018         gen_helper_sve_fexpa_h,
1019         gen_helper_sve_fexpa_s,
1020         gen_helper_sve_fexpa_d,
1021     };
1022     if (a->esz == 0) {
1023         return false;
1024     }
1025     if (sve_access_check(s)) {
1026         unsigned vsz = vec_full_reg_size(s);
1027         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028                            vec_full_reg_offset(s, a->rn),
1029                            vsz, vsz, 0, fns[a->esz]);
1030     }
1031     return true;
1032 }
1033
1034 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035 {
1036     static gen_helper_gvec_3 * const fns[4] = {
1037         NULL,
1038         gen_helper_sve_ftssel_h,
1039         gen_helper_sve_ftssel_s,
1040         gen_helper_sve_ftssel_d,
1041     };
1042     if (a->esz == 0) {
1043         return false;
1044     }
1045     if (sve_access_check(s)) {
1046         unsigned vsz = vec_full_reg_size(s);
1047         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048                            vec_full_reg_offset(s, a->rn),
1049                            vec_full_reg_offset(s, a->rm),
1050                            vsz, vsz, 0, fns[a->esz]);
1051     }
1052     return true;
1053 }
1054
1055 /*
1056  *** SVE Predicate Logical Operations Group
1057  */
1058
1059 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060                           const GVecGen4 *gvec_op)
1061 {
1062     if (!sve_access_check(s)) {
1063         return true;
1064     }
1065
1066     unsigned psz = pred_gvec_reg_size(s);
1067     int dofs = pred_full_reg_offset(s, a->rd);
1068     int nofs = pred_full_reg_offset(s, a->rn);
1069     int mofs = pred_full_reg_offset(s, a->rm);
1070     int gofs = pred_full_reg_offset(s, a->pg);
1071
1072     if (psz == 8) {
1073         /* Do the operation and the flags generation in temps.  */
1074         TCGv_i64 pd = tcg_temp_new_i64();
1075         TCGv_i64 pn = tcg_temp_new_i64();
1076         TCGv_i64 pm = tcg_temp_new_i64();
1077         TCGv_i64 pg = tcg_temp_new_i64();
1078
1079         tcg_gen_ld_i64(pn, cpu_env, nofs);
1080         tcg_gen_ld_i64(pm, cpu_env, mofs);
1081         tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083         gvec_op->fni8(pd, pn, pm, pg);
1084         tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086         do_predtest1(pd, pg);
1087
1088         tcg_temp_free_i64(pd);
1089         tcg_temp_free_i64(pn);
1090         tcg_temp_free_i64(pm);
1091         tcg_temp_free_i64(pg);
1092     } else {
1093         /* The operation and flags generation is large.  The computation
1094          * of the flags depends on the original contents of the guarding
1095          * predicate.  If the destination overwrites the guarding predicate,
1096          * then the easiest way to get this right is to save a copy.
1097           */
1098         int tofs = gofs;
1099         if (a->rd == a->pg) {
1100             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102         }
1103
1104         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105         do_predtest(s, dofs, tofs, psz / 8);
1106     }
1107     return true;
1108 }
1109
1110 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 {
1112     tcg_gen_and_i64(pd, pn, pm);
1113     tcg_gen_and_i64(pd, pd, pg);
1114 }
1115
1116 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117                            TCGv_vec pm, TCGv_vec pg)
1118 {
1119     tcg_gen_and_vec(vece, pd, pn, pm);
1120     tcg_gen_and_vec(vece, pd, pd, pg);
1121 }
1122
1123 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124 {
1125     static const GVecGen4 op = {
1126         .fni8 = gen_and_pg_i64,
1127         .fniv = gen_and_pg_vec,
1128         .fno = gen_helper_sve_and_pppp,
1129         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130     };
1131     if (a->s) {
1132         return do_pppp_flags(s, a, &op);
1133     } else if (a->rn == a->rm) {
1134         if (a->pg == a->rn) {
1135             return do_mov_p(s, a->rd, a->rn);
1136         } else {
1137             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138         }
1139     } else if (a->pg == a->rn || a->pg == a->rm) {
1140         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141     } else {
1142         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143     }
1144 }
1145
1146 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147 {
1148     tcg_gen_andc_i64(pd, pn, pm);
1149     tcg_gen_and_i64(pd, pd, pg);
1150 }
1151
1152 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153                            TCGv_vec pm, TCGv_vec pg)
1154 {
1155     tcg_gen_andc_vec(vece, pd, pn, pm);
1156     tcg_gen_and_vec(vece, pd, pd, pg);
1157 }
1158
1159 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160 {
1161     static const GVecGen4 op = {
1162         .fni8 = gen_bic_pg_i64,
1163         .fniv = gen_bic_pg_vec,
1164         .fno = gen_helper_sve_bic_pppp,
1165         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166     };
1167     if (a->s) {
1168         return do_pppp_flags(s, a, &op);
1169     } else if (a->pg == a->rn) {
1170         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171     } else {
1172         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173     }
1174 }
1175
1176 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177 {
1178     tcg_gen_xor_i64(pd, pn, pm);
1179     tcg_gen_and_i64(pd, pd, pg);
1180 }
1181
1182 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183                            TCGv_vec pm, TCGv_vec pg)
1184 {
1185     tcg_gen_xor_vec(vece, pd, pn, pm);
1186     tcg_gen_and_vec(vece, pd, pd, pg);
1187 }
1188
1189 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190 {
1191     static const GVecGen4 op = {
1192         .fni8 = gen_eor_pg_i64,
1193         .fniv = gen_eor_pg_vec,
1194         .fno = gen_helper_sve_eor_pppp,
1195         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196     };
1197     if (a->s) {
1198         return do_pppp_flags(s, a, &op);
1199     } else {
1200         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201     }
1202 }
1203
1204 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205 {
1206     tcg_gen_and_i64(pn, pn, pg);
1207     tcg_gen_andc_i64(pm, pm, pg);
1208     tcg_gen_or_i64(pd, pn, pm);
1209 }
1210
1211 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212                            TCGv_vec pm, TCGv_vec pg)
1213 {
1214     tcg_gen_and_vec(vece, pn, pn, pg);
1215     tcg_gen_andc_vec(vece, pm, pm, pg);
1216     tcg_gen_or_vec(vece, pd, pn, pm);
1217 }
1218
1219 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220 {
1221     static const GVecGen4 op = {
1222         .fni8 = gen_sel_pg_i64,
1223         .fniv = gen_sel_pg_vec,
1224         .fno = gen_helper_sve_sel_pppp,
1225         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226     };
1227     if (a->s) {
1228         return false;
1229     } else {
1230         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231     }
1232 }
1233
1234 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235 {
1236     tcg_gen_or_i64(pd, pn, pm);
1237     tcg_gen_and_i64(pd, pd, pg);
1238 }
1239
1240 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241                            TCGv_vec pm, TCGv_vec pg)
1242 {
1243     tcg_gen_or_vec(vece, pd, pn, pm);
1244     tcg_gen_and_vec(vece, pd, pd, pg);
1245 }
1246
1247 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248 {
1249     static const GVecGen4 op = {
1250         .fni8 = gen_orr_pg_i64,
1251         .fniv = gen_orr_pg_vec,
1252         .fno = gen_helper_sve_orr_pppp,
1253         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254     };
1255     if (a->s) {
1256         return do_pppp_flags(s, a, &op);
1257     } else if (a->pg == a->rn && a->rn == a->rm) {
1258         return do_mov_p(s, a->rd, a->rn);
1259     } else {
1260         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261     }
1262 }
1263
1264 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 {
1266     tcg_gen_orc_i64(pd, pn, pm);
1267     tcg_gen_and_i64(pd, pd, pg);
1268 }
1269
1270 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271                            TCGv_vec pm, TCGv_vec pg)
1272 {
1273     tcg_gen_orc_vec(vece, pd, pn, pm);
1274     tcg_gen_and_vec(vece, pd, pd, pg);
1275 }
1276
1277 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278 {
1279     static const GVecGen4 op = {
1280         .fni8 = gen_orn_pg_i64,
1281         .fniv = gen_orn_pg_vec,
1282         .fno = gen_helper_sve_orn_pppp,
1283         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284     };
1285     if (a->s) {
1286         return do_pppp_flags(s, a, &op);
1287     } else {
1288         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289     }
1290 }
1291
1292 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293 {
1294     tcg_gen_or_i64(pd, pn, pm);
1295     tcg_gen_andc_i64(pd, pg, pd);
1296 }
1297
1298 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299                            TCGv_vec pm, TCGv_vec pg)
1300 {
1301     tcg_gen_or_vec(vece, pd, pn, pm);
1302     tcg_gen_andc_vec(vece, pd, pg, pd);
1303 }
1304
1305 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306 {
1307     static const GVecGen4 op = {
1308         .fni8 = gen_nor_pg_i64,
1309         .fniv = gen_nor_pg_vec,
1310         .fno = gen_helper_sve_nor_pppp,
1311         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312     };
1313     if (a->s) {
1314         return do_pppp_flags(s, a, &op);
1315     } else {
1316         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317     }
1318 }
1319
1320 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321 {
1322     tcg_gen_and_i64(pd, pn, pm);
1323     tcg_gen_andc_i64(pd, pg, pd);
1324 }
1325
1326 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327                            TCGv_vec pm, TCGv_vec pg)
1328 {
1329     tcg_gen_and_vec(vece, pd, pn, pm);
1330     tcg_gen_andc_vec(vece, pd, pg, pd);
1331 }
1332
1333 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334 {
1335     static const GVecGen4 op = {
1336         .fni8 = gen_nand_pg_i64,
1337         .fniv = gen_nand_pg_vec,
1338         .fno = gen_helper_sve_nand_pppp,
1339         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340     };
1341     if (a->s) {
1342         return do_pppp_flags(s, a, &op);
1343     } else {
1344         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345     }
1346 }
1347
1348 /*
1349  *** SVE Predicate Misc Group
1350  */
1351
1352 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353 {
1354     if (sve_access_check(s)) {
1355         int nofs = pred_full_reg_offset(s, a->rn);
1356         int gofs = pred_full_reg_offset(s, a->pg);
1357         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359         if (words == 1) {
1360             TCGv_i64 pn = tcg_temp_new_i64();
1361             TCGv_i64 pg = tcg_temp_new_i64();
1362
1363             tcg_gen_ld_i64(pn, cpu_env, nofs);
1364             tcg_gen_ld_i64(pg, cpu_env, gofs);
1365             do_predtest1(pn, pg);
1366
1367             tcg_temp_free_i64(pn);
1368             tcg_temp_free_i64(pg);
1369         } else {
1370             do_predtest(s, nofs, gofs, words);
1371         }
1372     }
1373     return true;
1374 }
1375
1376 /* See the ARM pseudocode DecodePredCount.  */
1377 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378 {
1379     unsigned elements = fullsz >> esz;
1380     unsigned bound;
1381
1382     switch (pattern) {
1383     case 0x0: /* POW2 */
1384         return pow2floor(elements);
1385     case 0x1: /* VL1 */
1386     case 0x2: /* VL2 */
1387     case 0x3: /* VL3 */
1388     case 0x4: /* VL4 */
1389     case 0x5: /* VL5 */
1390     case 0x6: /* VL6 */
1391     case 0x7: /* VL7 */
1392     case 0x8: /* VL8 */
1393         bound = pattern;
1394         break;
1395     case 0x9: /* VL16 */
1396     case 0xa: /* VL32 */
1397     case 0xb: /* VL64 */
1398     case 0xc: /* VL128 */
1399     case 0xd: /* VL256 */
1400         bound = 16 << (pattern - 9);
1401         break;
1402     case 0x1d: /* MUL4 */
1403         return elements - elements % 4;
1404     case 0x1e: /* MUL3 */
1405         return elements - elements % 3;
1406     case 0x1f: /* ALL */
1407         return elements;
1408     default:   /* #uimm5 */
1409         return 0;
1410     }
1411     return elements >= bound ? bound : 0;
1412 }
1413
1414 /* This handles all of the predicate initialization instructions,
1415  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1416  * so that decode_pred_count returns 0.  For SETFFR, we will have
1417  * set RD == 16 == FFR.
1418  */
1419 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420 {
1421     if (!sve_access_check(s)) {
1422         return true;
1423     }
1424
1425     unsigned fullsz = vec_full_reg_size(s);
1426     unsigned ofs = pred_full_reg_offset(s, rd);
1427     unsigned numelem, setsz, i;
1428     uint64_t word, lastword;
1429     TCGv_i64 t;
1430
1431     numelem = decode_pred_count(fullsz, pat, esz);
1432
1433     /* Determine what we must store into each bit, and how many.  */
1434     if (numelem == 0) {
1435         lastword = word = 0;
1436         setsz = fullsz;
1437     } else {
1438         setsz = numelem << esz;
1439         lastword = word = pred_esz_masks[esz];
1440         if (setsz % 64) {
1441             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1442         }
1443     }
1444
1445     t = tcg_temp_new_i64();
1446     if (fullsz <= 64) {
1447         tcg_gen_movi_i64(t, lastword);
1448         tcg_gen_st_i64(t, cpu_env, ofs);
1449         goto done;
1450     }
1451
1452     if (word == lastword) {
1453         unsigned maxsz = size_for_gvec(fullsz / 8);
1454         unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456         if (oprsz * 8 == setsz) {
1457             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458             goto done;
1459         }
1460     }
1461
1462     setsz /= 8;
1463     fullsz /= 8;
1464
1465     tcg_gen_movi_i64(t, word);
1466     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1467         tcg_gen_st_i64(t, cpu_env, ofs + i);
1468     }
1469     if (lastword != word) {
1470         tcg_gen_movi_i64(t, lastword);
1471         tcg_gen_st_i64(t, cpu_env, ofs + i);
1472         i += 8;
1473     }
1474     if (i < fullsz) {
1475         tcg_gen_movi_i64(t, 0);
1476         for (; i < fullsz; i += 8) {
1477             tcg_gen_st_i64(t, cpu_env, ofs + i);
1478         }
1479     }
1480
1481  done:
1482     tcg_temp_free_i64(t);
1483
1484     /* PTRUES */
1485     if (setflag) {
1486         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1487         tcg_gen_movi_i32(cpu_CF, word == 0);
1488         tcg_gen_movi_i32(cpu_VF, 0);
1489         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1490     }
1491     return true;
1492 }
1493
1494 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1495 {
1496     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1497 }
1498
1499 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1500 {
1501     /* Note pat == 31 is #all, to set all elements.  */
1502     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1503 }
1504
1505 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1506 {
1507     /* Note pat == 32 is #unimp, to set no elements.  */
1508     return do_predset(s, 0, a->rd, 32, false);
1509 }
1510
1511 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1512 {
1513     /* The path through do_pppp_flags is complicated enough to want to avoid
1514      * duplication.  Frob the arguments into the form of a predicated AND.
1515      */
1516     arg_rprr_s alt_a = {
1517         .rd = a->rd, .pg = a->pg, .s = a->s,
1518         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1519     };
1520     return trans_AND_pppp(s, &alt_a, insn);
1521 }
1522
1523 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1524 {
1525     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1526 }
1527
1528 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1529 {
1530     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1531 }
1532
1533 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1534                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1535                                            TCGv_ptr, TCGv_i32))
1536 {
1537     if (!sve_access_check(s)) {
1538         return true;
1539     }
1540
1541     TCGv_ptr t_pd = tcg_temp_new_ptr();
1542     TCGv_ptr t_pg = tcg_temp_new_ptr();
1543     TCGv_i32 t;
1544     unsigned desc;
1545
1546     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1547     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1548
1549     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1550     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1551     t = tcg_const_i32(desc);
1552
1553     gen_fn(t, t_pd, t_pg, t);
1554     tcg_temp_free_ptr(t_pd);
1555     tcg_temp_free_ptr(t_pg);
1556
1557     do_pred_flags(t);
1558     tcg_temp_free_i32(t);
1559     return true;
1560 }
1561
1562 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1563 {
1564     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1565 }
1566
1567 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1568 {
1569     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1570 }
1571
1572 /*
1573  *** SVE Element Count Group
1574  */
1575
1576 /* Perform an inline saturating addition of a 32-bit value within
1577  * a 64-bit register.  The second operand is known to be positive,
1578  * which halves the comparisions we must perform to bound the result.
1579  */
1580 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1581 {
1582     int64_t ibound;
1583     TCGv_i64 bound;
1584     TCGCond cond;
1585
1586     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1587     if (u) {
1588         tcg_gen_ext32u_i64(reg, reg);
1589     } else {
1590         tcg_gen_ext32s_i64(reg, reg);
1591     }
1592     if (d) {
1593         tcg_gen_sub_i64(reg, reg, val);
1594         ibound = (u ? 0 : INT32_MIN);
1595         cond = TCG_COND_LT;
1596     } else {
1597         tcg_gen_add_i64(reg, reg, val);
1598         ibound = (u ? UINT32_MAX : INT32_MAX);
1599         cond = TCG_COND_GT;
1600     }
1601     bound = tcg_const_i64(ibound);
1602     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1603     tcg_temp_free_i64(bound);
1604 }
1605
1606 /* Similarly with 64-bit values.  */
1607 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1608 {
1609     TCGv_i64 t0 = tcg_temp_new_i64();
1610     TCGv_i64 t1 = tcg_temp_new_i64();
1611     TCGv_i64 t2;
1612
1613     if (u) {
1614         if (d) {
1615             tcg_gen_sub_i64(t0, reg, val);
1616             tcg_gen_movi_i64(t1, 0);
1617             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1618         } else {
1619             tcg_gen_add_i64(t0, reg, val);
1620             tcg_gen_movi_i64(t1, -1);
1621             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1622         }
1623     } else {
1624         if (d) {
1625             /* Detect signed overflow for subtraction.  */
1626             tcg_gen_xor_i64(t0, reg, val);
1627             tcg_gen_sub_i64(t1, reg, val);
1628             tcg_gen_xor_i64(reg, reg, t0);
1629             tcg_gen_and_i64(t0, t0, reg);
1630
1631             /* Bound the result.  */
1632             tcg_gen_movi_i64(reg, INT64_MIN);
1633             t2 = tcg_const_i64(0);
1634             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1635         } else {
1636             /* Detect signed overflow for addition.  */
1637             tcg_gen_xor_i64(t0, reg, val);
1638             tcg_gen_add_i64(reg, reg, val);
1639             tcg_gen_xor_i64(t1, reg, val);
1640             tcg_gen_andc_i64(t0, t1, t0);
1641
1642             /* Bound the result.  */
1643             tcg_gen_movi_i64(t1, INT64_MAX);
1644             t2 = tcg_const_i64(0);
1645             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1646         }
1647         tcg_temp_free_i64(t2);
1648     }
1649     tcg_temp_free_i64(t0);
1650     tcg_temp_free_i64(t1);
1651 }
1652
1653 /* Similarly with a vector and a scalar operand.  */
1654 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1655                               TCGv_i64 val, bool u, bool d)
1656 {
1657     unsigned vsz = vec_full_reg_size(s);
1658     TCGv_ptr dptr, nptr;
1659     TCGv_i32 t32, desc;
1660     TCGv_i64 t64;
1661
1662     dptr = tcg_temp_new_ptr();
1663     nptr = tcg_temp_new_ptr();
1664     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1665     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1666     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1667
1668     switch (esz) {
1669     case MO_8:
1670         t32 = tcg_temp_new_i32();
1671         tcg_gen_extrl_i64_i32(t32, val);
1672         if (d) {
1673             tcg_gen_neg_i32(t32, t32);
1674         }
1675         if (u) {
1676             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1677         } else {
1678             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1679         }
1680         tcg_temp_free_i32(t32);
1681         break;
1682
1683     case MO_16:
1684         t32 = tcg_temp_new_i32();
1685         tcg_gen_extrl_i64_i32(t32, val);
1686         if (d) {
1687             tcg_gen_neg_i32(t32, t32);
1688         }
1689         if (u) {
1690             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1691         } else {
1692             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1693         }
1694         tcg_temp_free_i32(t32);
1695         break;
1696
1697     case MO_32:
1698         t64 = tcg_temp_new_i64();
1699         if (d) {
1700             tcg_gen_neg_i64(t64, val);
1701         } else {
1702             tcg_gen_mov_i64(t64, val);
1703         }
1704         if (u) {
1705             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1706         } else {
1707             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1708         }
1709         tcg_temp_free_i64(t64);
1710         break;
1711
1712     case MO_64:
1713         if (u) {
1714             if (d) {
1715                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1716             } else {
1717                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1718             }
1719         } else if (d) {
1720             t64 = tcg_temp_new_i64();
1721             tcg_gen_neg_i64(t64, val);
1722             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1723             tcg_temp_free_i64(t64);
1724         } else {
1725             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1726         }
1727         break;
1728
1729     default:
1730         g_assert_not_reached();
1731     }
1732
1733     tcg_temp_free_ptr(dptr);
1734     tcg_temp_free_ptr(nptr);
1735     tcg_temp_free_i32(desc);
1736 }
1737
1738 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1739 {
1740     if (sve_access_check(s)) {
1741         unsigned fullsz = vec_full_reg_size(s);
1742         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1743         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1744     }
1745     return true;
1746 }
1747
1748 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1749 {
1750     if (sve_access_check(s)) {
1751         unsigned fullsz = vec_full_reg_size(s);
1752         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753         int inc = numelem * a->imm * (a->d ? -1 : 1);
1754         TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756         tcg_gen_addi_i64(reg, reg, inc);
1757     }
1758     return true;
1759 }
1760
1761 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1762                                uint32_t insn)
1763 {
1764     if (!sve_access_check(s)) {
1765         return true;
1766     }
1767
1768     unsigned fullsz = vec_full_reg_size(s);
1769     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1770     int inc = numelem * a->imm;
1771     TCGv_i64 reg = cpu_reg(s, a->rd);
1772
1773     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1774     if (inc == 0) {
1775         if (a->u) {
1776             tcg_gen_ext32u_i64(reg, reg);
1777         } else {
1778             tcg_gen_ext32s_i64(reg, reg);
1779         }
1780     } else {
1781         TCGv_i64 t = tcg_const_i64(inc);
1782         do_sat_addsub_32(reg, t, a->u, a->d);
1783         tcg_temp_free_i64(t);
1784     }
1785     return true;
1786 }
1787
1788 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1789                                uint32_t insn)
1790 {
1791     if (!sve_access_check(s)) {
1792         return true;
1793     }
1794
1795     unsigned fullsz = vec_full_reg_size(s);
1796     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1797     int inc = numelem * a->imm;
1798     TCGv_i64 reg = cpu_reg(s, a->rd);
1799
1800     if (inc != 0) {
1801         TCGv_i64 t = tcg_const_i64(inc);
1802         do_sat_addsub_64(reg, t, a->u, a->d);
1803         tcg_temp_free_i64(t);
1804     }
1805     return true;
1806 }
1807
1808 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1809 {
1810     if (a->esz == 0) {
1811         return false;
1812     }
1813
1814     unsigned fullsz = vec_full_reg_size(s);
1815     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1816     int inc = numelem * a->imm;
1817
1818     if (inc != 0) {
1819         if (sve_access_check(s)) {
1820             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1821             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1822                               vec_full_reg_offset(s, a->rn),
1823                               t, fullsz, fullsz);
1824             tcg_temp_free_i64(t);
1825         }
1826     } else {
1827         do_mov_z(s, a->rd, a->rn);
1828     }
1829     return true;
1830 }
1831
1832 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1833                             uint32_t insn)
1834 {
1835     if (a->esz == 0) {
1836         return false;
1837     }
1838
1839     unsigned fullsz = vec_full_reg_size(s);
1840     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1841     int inc = numelem * a->imm;
1842
1843     if (inc != 0) {
1844         if (sve_access_check(s)) {
1845             TCGv_i64 t = tcg_const_i64(inc);
1846             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1847             tcg_temp_free_i64(t);
1848         }
1849     } else {
1850         do_mov_z(s, a->rd, a->rn);
1851     }
1852     return true;
1853 }
1854
1855 /*
1856  *** SVE Bitwise Immediate Group
1857  */
1858
1859 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1860 {
1861     uint64_t imm;
1862     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1863                                 extract32(a->dbm, 0, 6),
1864                                 extract32(a->dbm, 6, 6))) {
1865         return false;
1866     }
1867     if (sve_access_check(s)) {
1868         unsigned vsz = vec_full_reg_size(s);
1869         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1870                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1871     }
1872     return true;
1873 }
1874
1875 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1876 {
1877     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1878 }
1879
1880 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1881 {
1882     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1883 }
1884
1885 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1886 {
1887     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1888 }
1889
1890 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1891 {
1892     uint64_t imm;
1893     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1894                                 extract32(a->dbm, 0, 6),
1895                                 extract32(a->dbm, 6, 6))) {
1896         return false;
1897     }
1898     if (sve_access_check(s)) {
1899         do_dupi_z(s, a->rd, imm);
1900     }
1901     return true;
1902 }
1903
1904 /*
1905  *** SVE Integer Wide Immediate - Predicated Group
1906  */
1907
1908 /* Implement all merging copies.  This is used for CPY (immediate),
1909  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1910  */
1911 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1912                      TCGv_i64 val)
1913 {
1914     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1915     static gen_cpy * const fns[4] = {
1916         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1917         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1918     };
1919     unsigned vsz = vec_full_reg_size(s);
1920     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1921     TCGv_ptr t_zd = tcg_temp_new_ptr();
1922     TCGv_ptr t_zn = tcg_temp_new_ptr();
1923     TCGv_ptr t_pg = tcg_temp_new_ptr();
1924
1925     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1926     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1927     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1928
1929     fns[esz](t_zd, t_zn, t_pg, val, desc);
1930
1931     tcg_temp_free_ptr(t_zd);
1932     tcg_temp_free_ptr(t_zn);
1933     tcg_temp_free_ptr(t_pg);
1934     tcg_temp_free_i32(desc);
1935 }
1936
1937 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1938 {
1939     if (a->esz == 0) {
1940         return false;
1941     }
1942     if (sve_access_check(s)) {
1943         /* Decode the VFP immediate.  */
1944         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1945         TCGv_i64 t_imm = tcg_const_i64(imm);
1946         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1947         tcg_temp_free_i64(t_imm);
1948     }
1949     return true;
1950 }
1951
1952 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1953 {
1954     if (a->esz == 0 && extract32(insn, 13, 1)) {
1955         return false;
1956     }
1957     if (sve_access_check(s)) {
1958         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1959         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1960         tcg_temp_free_i64(t_imm);
1961     }
1962     return true;
1963 }
1964
1965 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1966 {
1967     static gen_helper_gvec_2i * const fns[4] = {
1968         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1969         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1970     };
1971
1972     if (a->esz == 0 && extract32(insn, 13, 1)) {
1973         return false;
1974     }
1975     if (sve_access_check(s)) {
1976         unsigned vsz = vec_full_reg_size(s);
1977         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1978         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1979                             pred_full_reg_offset(s, a->pg),
1980                             t_imm, vsz, vsz, 0, fns[a->esz]);
1981         tcg_temp_free_i64(t_imm);
1982     }
1983     return true;
1984 }
1985
1986 /*
1987  *** SVE Permute Extract Group
1988  */
1989
1990 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1991 {
1992     if (!sve_access_check(s)) {
1993         return true;
1994     }
1995
1996     unsigned vsz = vec_full_reg_size(s);
1997     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1998     unsigned n_siz = vsz - n_ofs;
1999     unsigned d = vec_full_reg_offset(s, a->rd);
2000     unsigned n = vec_full_reg_offset(s, a->rn);
2001     unsigned m = vec_full_reg_offset(s, a->rm);
2002
2003     /* Use host vector move insns if we have appropriate sizes
2004      * and no unfortunate overlap.
2005      */
2006     if (m != d
2007         && n_ofs == size_for_gvec(n_ofs)
2008         && n_siz == size_for_gvec(n_siz)
2009         && (d != n || n_siz <= n_ofs)) {
2010         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2011         if (n_ofs != 0) {
2012             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2013         }
2014     } else {
2015         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2016     }
2017     return true;
2018 }
2019
2020 /*
2021  *** SVE Permute - Unpredicated Group
2022  */
2023
2024 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2025 {
2026     if (sve_access_check(s)) {
2027         unsigned vsz = vec_full_reg_size(s);
2028         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2029                              vsz, vsz, cpu_reg_sp(s, a->rn));
2030     }
2031     return true;
2032 }
2033
2034 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2035 {
2036     if ((a->imm & 0x1f) == 0) {
2037         return false;
2038     }
2039     if (sve_access_check(s)) {
2040         unsigned vsz = vec_full_reg_size(s);
2041         unsigned dofs = vec_full_reg_offset(s, a->rd);
2042         unsigned esz, index;
2043
2044         esz = ctz32(a->imm);
2045         index = a->imm >> (esz + 1);
2046
2047         if ((index << esz) < vsz) {
2048             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2049             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2050         } else {
2051             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2052         }
2053     }
2054     return true;
2055 }
2056
2057 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058 {
2059     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060     static gen_insr * const fns[4] = {
2061         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063     };
2064     unsigned vsz = vec_full_reg_size(s);
2065     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066     TCGv_ptr t_zd = tcg_temp_new_ptr();
2067     TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072     fns[a->esz](t_zd, t_zn, val, desc);
2073
2074     tcg_temp_free_ptr(t_zd);
2075     tcg_temp_free_ptr(t_zn);
2076     tcg_temp_free_i32(desc);
2077 }
2078
2079 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2080 {
2081     if (sve_access_check(s)) {
2082         TCGv_i64 t = tcg_temp_new_i64();
2083         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084         do_insr_i64(s, a, t);
2085         tcg_temp_free_i64(t);
2086     }
2087     return true;
2088 }
2089
2090 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2091 {
2092     if (sve_access_check(s)) {
2093         do_insr_i64(s, a, cpu_reg(s, a->rm));
2094     }
2095     return true;
2096 }
2097
2098 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2099 {
2100     static gen_helper_gvec_2 * const fns[4] = {
2101         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103     };
2104
2105     if (sve_access_check(s)) {
2106         unsigned vsz = vec_full_reg_size(s);
2107         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108                            vec_full_reg_offset(s, a->rn),
2109                            vsz, vsz, 0, fns[a->esz]);
2110     }
2111     return true;
2112 }
2113
2114 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2115 {
2116     static gen_helper_gvec_3 * const fns[4] = {
2117         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119     };
2120
2121     if (sve_access_check(s)) {
2122         unsigned vsz = vec_full_reg_size(s);
2123         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124                            vec_full_reg_offset(s, a->rn),
2125                            vec_full_reg_offset(s, a->rm),
2126                            vsz, vsz, 0, fns[a->esz]);
2127     }
2128     return true;
2129 }
2130
2131 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2132 {
2133     static gen_helper_gvec_2 * const fns[4][2] = {
2134         { NULL, NULL },
2135         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138     };
2139
2140     if (a->esz == 0) {
2141         return false;
2142     }
2143     if (sve_access_check(s)) {
2144         unsigned vsz = vec_full_reg_size(s);
2145         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146                            vec_full_reg_offset(s, a->rn)
2147                            + (a->h ? vsz / 2 : 0),
2148                            vsz, vsz, 0, fns[a->esz][a->u]);
2149     }
2150     return true;
2151 }
2152
2153 /*
2154  *** SVE Permute - Predicates Group
2155  */
2156
2157 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158                           gen_helper_gvec_3 *fn)
2159 {
2160     if (!sve_access_check(s)) {
2161         return true;
2162     }
2163
2164     unsigned vsz = pred_full_reg_size(s);
2165
2166     /* Predicate sizes may be smaller and cannot use simd_desc.
2167        We cannot round up, as we do elsewhere, because we need
2168        the exact size for ZIP2 and REV.  We retain the style for
2169        the other helpers for consistency.  */
2170     TCGv_ptr t_d = tcg_temp_new_ptr();
2171     TCGv_ptr t_n = tcg_temp_new_ptr();
2172     TCGv_ptr t_m = tcg_temp_new_ptr();
2173     TCGv_i32 t_desc;
2174     int desc;
2175
2176     desc = vsz - 2;
2177     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183     t_desc = tcg_const_i32(desc);
2184
2185     fn(t_d, t_n, t_m, t_desc);
2186
2187     tcg_temp_free_ptr(t_d);
2188     tcg_temp_free_ptr(t_n);
2189     tcg_temp_free_ptr(t_m);
2190     tcg_temp_free_i32(t_desc);
2191     return true;
2192 }
2193
2194 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195                           gen_helper_gvec_2 *fn)
2196 {
2197     if (!sve_access_check(s)) {
2198         return true;
2199     }
2200
2201     unsigned vsz = pred_full_reg_size(s);
2202     TCGv_ptr t_d = tcg_temp_new_ptr();
2203     TCGv_ptr t_n = tcg_temp_new_ptr();
2204     TCGv_i32 t_desc;
2205     int desc;
2206
2207     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210     /* Predicate sizes may be smaller and cannot use simd_desc.
2211        We cannot round up, as we do elsewhere, because we need
2212        the exact size for ZIP2 and REV.  We retain the style for
2213        the other helpers for consistency.  */
2214
2215     desc = vsz - 2;
2216     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218     t_desc = tcg_const_i32(desc);
2219
2220     fn(t_d, t_n, t_desc);
2221
2222     tcg_temp_free_i32(t_desc);
2223     tcg_temp_free_ptr(t_d);
2224     tcg_temp_free_ptr(t_n);
2225     return true;
2226 }
2227
2228 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229 {
2230     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231 }
2232
2233 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234 {
2235     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236 }
2237
2238 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2239 {
2240     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241 }
2242
2243 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2244 {
2245     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246 }
2247
2248 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2249 {
2250     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251 }
2252
2253 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2254 {
2255     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256 }
2257
2258 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2259 {
2260     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261 }
2262
2263 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2264 {
2265     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266 }
2267
2268 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2269 {
2270     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271 }
2272
2273 /*
2274  *** SVE Permute - Interleaving Group
2275  */
2276
2277 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278 {
2279     static gen_helper_gvec_3 * const fns[4] = {
2280         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282     };
2283
2284     if (sve_access_check(s)) {
2285         unsigned vsz = vec_full_reg_size(s);
2286         unsigned high_ofs = high ? vsz / 2 : 0;
2287         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288                            vec_full_reg_offset(s, a->rn) + high_ofs,
2289                            vec_full_reg_offset(s, a->rm) + high_ofs,
2290                            vsz, vsz, 0, fns[a->esz]);
2291     }
2292     return true;
2293 }
2294
2295 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296                             gen_helper_gvec_3 *fn)
2297 {
2298     if (sve_access_check(s)) {
2299         unsigned vsz = vec_full_reg_size(s);
2300         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301                            vec_full_reg_offset(s, a->rn),
2302                            vec_full_reg_offset(s, a->rm),
2303                            vsz, vsz, data, fn);
2304     }
2305     return true;
2306 }
2307
2308 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309 {
2310     return do_zip(s, a, false);
2311 }
2312
2313 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2314 {
2315     return do_zip(s, a, true);
2316 }
2317
2318 static gen_helper_gvec_3 * const uzp_fns[4] = {
2319     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321 };
2322
2323 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324 {
2325     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326 }
2327
2328 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2329 {
2330     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331 }
2332
2333 static gen_helper_gvec_3 * const trn_fns[4] = {
2334     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336 };
2337
2338 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2339 {
2340     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341 }
2342
2343 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2344 {
2345     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346 }
2347
2348 /*
2349  *** SVE Permute Vector - Predicated Group
2350  */
2351
2352 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2353 {
2354     static gen_helper_gvec_3 * const fns[4] = {
2355         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356     };
2357     return do_zpz_ool(s, a, fns[a->esz]);
2358 }
2359
2360 /* Call the helper that computes the ARM LastActiveElement pseudocode
2361  * function, scaled by the element size.  This includes the not found
2362  * indication; e.g. not found for esz=3 is -8.
2363  */
2364 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365 {
2366     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2367      * round up, as we do elsewhere, because we need the exact size.
2368      */
2369     TCGv_ptr t_p = tcg_temp_new_ptr();
2370     TCGv_i32 t_desc;
2371     unsigned vsz = pred_full_reg_size(s);
2372     unsigned desc;
2373
2374     desc = vsz - 2;
2375     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378     t_desc = tcg_const_i32(desc);
2379
2380     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382     tcg_temp_free_i32(t_desc);
2383     tcg_temp_free_ptr(t_p);
2384 }
2385
2386 /* Increment LAST to the offset of the next element in the vector,
2387  * wrapping around to 0.
2388  */
2389 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391     unsigned vsz = vec_full_reg_size(s);
2392
2393     tcg_gen_addi_i32(last, last, 1 << esz);
2394     if (is_power_of_2(vsz)) {
2395         tcg_gen_andi_i32(last, last, vsz - 1);
2396     } else {
2397         TCGv_i32 max = tcg_const_i32(vsz);
2398         TCGv_i32 zero = tcg_const_i32(0);
2399         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400         tcg_temp_free_i32(max);
2401         tcg_temp_free_i32(zero);
2402     }
2403 }
2404
2405 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2406 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407 {
2408     unsigned vsz = vec_full_reg_size(s);
2409
2410     if (is_power_of_2(vsz)) {
2411         tcg_gen_andi_i32(last, last, vsz - 1);
2412     } else {
2413         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414         TCGv_i32 zero = tcg_const_i32(0);
2415         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416         tcg_temp_free_i32(max);
2417         tcg_temp_free_i32(zero);
2418     }
2419 }
2420
2421 /* Load an unsigned element of ESZ from BASE+OFS.  */
2422 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423 {
2424     TCGv_i64 r = tcg_temp_new_i64();
2425
2426     switch (esz) {
2427     case 0:
2428         tcg_gen_ld8u_i64(r, base, ofs);
2429         break;
2430     case 1:
2431         tcg_gen_ld16u_i64(r, base, ofs);
2432         break;
2433     case 2:
2434         tcg_gen_ld32u_i64(r, base, ofs);
2435         break;
2436     case 3:
2437         tcg_gen_ld_i64(r, base, ofs);
2438         break;
2439     default:
2440         g_assert_not_reached();
2441     }
2442     return r;
2443 }
2444
2445 /* Load an unsigned element of ESZ from RM[LAST].  */
2446 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447                                  int rm, int esz)
2448 {
2449     TCGv_ptr p = tcg_temp_new_ptr();
2450     TCGv_i64 r;
2451
2452     /* Convert offset into vector into offset into ENV.
2453      * The final adjustment for the vector register base
2454      * is added via constant offset to the load.
2455      */
2456 #ifdef HOST_WORDS_BIGENDIAN
2457     /* Adjust for element ordering.  See vec_reg_offset.  */
2458     if (esz < 3) {
2459         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460     }
2461 #endif
2462     tcg_gen_ext_i32_ptr(p, last);
2463     tcg_gen_add_ptr(p, p, cpu_env);
2464
2465     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466     tcg_temp_free_ptr(p);
2467
2468     return r;
2469 }
2470
2471 /* Compute CLAST for a Zreg.  */
2472 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473 {
2474     TCGv_i32 last;
2475     TCGLabel *over;
2476     TCGv_i64 ele;
2477     unsigned vsz, esz = a->esz;
2478
2479     if (!sve_access_check(s)) {
2480         return true;
2481     }
2482
2483     last = tcg_temp_local_new_i32();
2484     over = gen_new_label();
2485
2486     find_last_active(s, last, esz, a->pg);
2487
2488     /* There is of course no movcond for a 2048-bit vector,
2489      * so we must branch over the actual store.
2490      */
2491     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493     if (!before) {
2494         incr_last_active(s, last, esz);
2495     }
2496
2497     ele = load_last_active(s, last, a->rm, esz);
2498     tcg_temp_free_i32(last);
2499
2500     vsz = vec_full_reg_size(s);
2501     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502     tcg_temp_free_i64(ele);
2503
2504     /* If this insn used MOVPRFX, we may need a second move.  */
2505     if (a->rd != a->rn) {
2506         TCGLabel *done = gen_new_label();
2507         tcg_gen_br(done);
2508
2509         gen_set_label(over);
2510         do_mov_z(s, a->rd, a->rn);
2511
2512         gen_set_label(done);
2513     } else {
2514         gen_set_label(over);
2515     }
2516     return true;
2517 }
2518
2519 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2520 {
2521     return do_clast_vector(s, a, false);
2522 }
2523
2524 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2525 {
2526     return do_clast_vector(s, a, true);
2527 }
2528
2529 /* Compute CLAST for a scalar.  */
2530 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531                             bool before, TCGv_i64 reg_val)
2532 {
2533     TCGv_i32 last = tcg_temp_new_i32();
2534     TCGv_i64 ele, cmp, zero;
2535
2536     find_last_active(s, last, esz, pg);
2537
2538     /* Extend the original value of last prior to incrementing.  */
2539     cmp = tcg_temp_new_i64();
2540     tcg_gen_ext_i32_i64(cmp, last);
2541
2542     if (!before) {
2543         incr_last_active(s, last, esz);
2544     }
2545
2546     /* The conceit here is that while last < 0 indicates not found, after
2547      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548      * from which we can load garbage.  We then discard the garbage with
2549      * a conditional move.
2550      */
2551     ele = load_last_active(s, last, rm, esz);
2552     tcg_temp_free_i32(last);
2553
2554     zero = tcg_const_i64(0);
2555     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557     tcg_temp_free_i64(zero);
2558     tcg_temp_free_i64(cmp);
2559     tcg_temp_free_i64(ele);
2560 }
2561
2562 /* Compute CLAST for a Vreg.  */
2563 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564 {
2565     if (sve_access_check(s)) {
2566         int esz = a->esz;
2567         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571         write_fp_dreg(s, a->rd, reg);
2572         tcg_temp_free_i64(reg);
2573     }
2574     return true;
2575 }
2576
2577 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578 {
2579     return do_clast_fp(s, a, false);
2580 }
2581
2582 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2583 {
2584     return do_clast_fp(s, a, true);
2585 }
2586
2587 /* Compute CLAST for a Xreg.  */
2588 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589 {
2590     TCGv_i64 reg;
2591
2592     if (!sve_access_check(s)) {
2593         return true;
2594     }
2595
2596     reg = cpu_reg(s, a->rd);
2597     switch (a->esz) {
2598     case 0:
2599         tcg_gen_ext8u_i64(reg, reg);
2600         break;
2601     case 1:
2602         tcg_gen_ext16u_i64(reg, reg);
2603         break;
2604     case 2:
2605         tcg_gen_ext32u_i64(reg, reg);
2606         break;
2607     case 3:
2608         break;
2609     default:
2610         g_assert_not_reached();
2611     }
2612
2613     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614     return true;
2615 }
2616
2617 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618 {
2619     return do_clast_general(s, a, false);
2620 }
2621
2622 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623 {
2624     return do_clast_general(s, a, true);
2625 }
2626
2627 /* Compute LAST for a scalar.  */
2628 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629                                int pg, int rm, bool before)
2630 {
2631     TCGv_i32 last = tcg_temp_new_i32();
2632     TCGv_i64 ret;
2633
2634     find_last_active(s, last, esz, pg);
2635     if (before) {
2636         wrap_last_active(s, last, esz);
2637     } else {
2638         incr_last_active(s, last, esz);
2639     }
2640
2641     ret = load_last_active(s, last, rm, esz);
2642     tcg_temp_free_i32(last);
2643     return ret;
2644 }
2645
2646 /* Compute LAST for a Vreg.  */
2647 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648 {
2649     if (sve_access_check(s)) {
2650         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651         write_fp_dreg(s, a->rd, val);
2652         tcg_temp_free_i64(val);
2653     }
2654     return true;
2655 }
2656
2657 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658 {
2659     return do_last_fp(s, a, false);
2660 }
2661
2662 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664     return do_last_fp(s, a, true);
2665 }
2666
2667 /* Compute LAST for a Xreg.  */
2668 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669 {
2670     if (sve_access_check(s)) {
2671         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673         tcg_temp_free_i64(val);
2674     }
2675     return true;
2676 }
2677
2678 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679 {
2680     return do_last_general(s, a, false);
2681 }
2682
2683 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2684 {
2685     return do_last_general(s, a, true);
2686 }
2687
2688 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2689 {
2690     if (sve_access_check(s)) {
2691         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692     }
2693     return true;
2694 }
2695
2696 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2697 {
2698     if (sve_access_check(s)) {
2699         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702         tcg_temp_free_i64(t);
2703     }
2704     return true;
2705 }
2706
2707 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2708 {
2709     static gen_helper_gvec_3 * const fns[4] = {
2710         NULL,
2711         gen_helper_sve_revb_h,
2712         gen_helper_sve_revb_s,
2713         gen_helper_sve_revb_d,
2714     };
2715     return do_zpz_ool(s, a, fns[a->esz]);
2716 }
2717
2718 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2719 {
2720     static gen_helper_gvec_3 * const fns[4] = {
2721         NULL,
2722         NULL,
2723         gen_helper_sve_revh_s,
2724         gen_helper_sve_revh_d,
2725     };
2726     return do_zpz_ool(s, a, fns[a->esz]);
2727 }
2728
2729 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2730 {
2731     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732 }
2733
2734 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2735 {
2736     static gen_helper_gvec_3 * const fns[4] = {
2737         gen_helper_sve_rbit_b,
2738         gen_helper_sve_rbit_h,
2739         gen_helper_sve_rbit_s,
2740         gen_helper_sve_rbit_d,
2741     };
2742     return do_zpz_ool(s, a, fns[a->esz]);
2743 }
2744
2745 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2746 {
2747     if (sve_access_check(s)) {
2748         unsigned vsz = vec_full_reg_size(s);
2749         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750                            vec_full_reg_offset(s, a->rn),
2751                            vec_full_reg_offset(s, a->rm),
2752                            pred_full_reg_offset(s, a->pg),
2753                            vsz, vsz, a->esz, gen_helper_sve_splice);
2754     }
2755     return true;
2756 }
2757
2758 /*
2759  *** SVE Integer Compare - Vectors Group
2760  */
2761
2762 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763                           gen_helper_gvec_flags_4 *gen_fn)
2764 {
2765     TCGv_ptr pd, zn, zm, pg;
2766     unsigned vsz;
2767     TCGv_i32 t;
2768
2769     if (gen_fn == NULL) {
2770         return false;
2771     }
2772     if (!sve_access_check(s)) {
2773         return true;
2774     }
2775
2776     vsz = vec_full_reg_size(s);
2777     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778     pd = tcg_temp_new_ptr();
2779     zn = tcg_temp_new_ptr();
2780     zm = tcg_temp_new_ptr();
2781     pg = tcg_temp_new_ptr();
2782
2783     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788     gen_fn(t, pd, zn, zm, pg, t);
2789
2790     tcg_temp_free_ptr(pd);
2791     tcg_temp_free_ptr(zn);
2792     tcg_temp_free_ptr(zm);
2793     tcg_temp_free_ptr(pg);
2794
2795     do_pred_flags(t);
2796
2797     tcg_temp_free_i32(t);
2798     return true;
2799 }
2800
2801 #define DO_PPZZ(NAME, name) \
2802 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2803                                 uint32_t insn)                            \
2804 {                                                                         \
2805     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2806         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2807         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2808     };                                                                    \
2809     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2810 }
2811
2812 DO_PPZZ(CMPEQ, cmpeq)
2813 DO_PPZZ(CMPNE, cmpne)
2814 DO_PPZZ(CMPGT, cmpgt)
2815 DO_PPZZ(CMPGE, cmpge)
2816 DO_PPZZ(CMPHI, cmphi)
2817 DO_PPZZ(CMPHS, cmphs)
2818
2819 #undef DO_PPZZ
2820
2821 #define DO_PPZW(NAME, name) \
2822 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2823                                 uint32_t insn)                            \
2824 {                                                                         \
2825     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2826         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2827         gen_helper_sve_##name##_ppzw_s, NULL                              \
2828     };                                                                    \
2829     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2830 }
2831
2832 DO_PPZW(CMPEQ, cmpeq)
2833 DO_PPZW(CMPNE, cmpne)
2834 DO_PPZW(CMPGT, cmpgt)
2835 DO_PPZW(CMPGE, cmpge)
2836 DO_PPZW(CMPHI, cmphi)
2837 DO_PPZW(CMPHS, cmphs)
2838 DO_PPZW(CMPLT, cmplt)
2839 DO_PPZW(CMPLE, cmple)
2840 DO_PPZW(CMPLO, cmplo)
2841 DO_PPZW(CMPLS, cmpls)
2842
2843 #undef DO_PPZW
2844
2845 /*
2846  *** SVE Integer Compare - Immediate Groups
2847  */
2848
2849 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2850                           gen_helper_gvec_flags_3 *gen_fn)
2851 {
2852     TCGv_ptr pd, zn, pg;
2853     unsigned vsz;
2854     TCGv_i32 t;
2855
2856     if (gen_fn == NULL) {
2857         return false;
2858     }
2859     if (!sve_access_check(s)) {
2860         return true;
2861     }
2862
2863     vsz = vec_full_reg_size(s);
2864     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2865     pd = tcg_temp_new_ptr();
2866     zn = tcg_temp_new_ptr();
2867     pg = tcg_temp_new_ptr();
2868
2869     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2870     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2871     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2872
2873     gen_fn(t, pd, zn, pg, t);
2874
2875     tcg_temp_free_ptr(pd);
2876     tcg_temp_free_ptr(zn);
2877     tcg_temp_free_ptr(pg);
2878
2879     do_pred_flags(t);
2880
2881     tcg_temp_free_i32(t);
2882     return true;
2883 }
2884
2885 #define DO_PPZI(NAME, name) \
2886 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2887                                 uint32_t insn)                            \
2888 {                                                                         \
2889     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2890         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2891         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2892     };                                                                    \
2893     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2894 }
2895
2896 DO_PPZI(CMPEQ, cmpeq)
2897 DO_PPZI(CMPNE, cmpne)
2898 DO_PPZI(CMPGT, cmpgt)
2899 DO_PPZI(CMPGE, cmpge)
2900 DO_PPZI(CMPHI, cmphi)
2901 DO_PPZI(CMPHS, cmphs)
2902 DO_PPZI(CMPLT, cmplt)
2903 DO_PPZI(CMPLE, cmple)
2904 DO_PPZI(CMPLO, cmplo)
2905 DO_PPZI(CMPLS, cmpls)
2906
2907 #undef DO_PPZI
2908
2909 /*
2910  *** SVE Partition Break Group
2911  */
2912
2913 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2914                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2915 {
2916     if (!sve_access_check(s)) {
2917         return true;
2918     }
2919
2920     unsigned vsz = pred_full_reg_size(s);
2921
2922     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2923     TCGv_ptr d = tcg_temp_new_ptr();
2924     TCGv_ptr n = tcg_temp_new_ptr();
2925     TCGv_ptr m = tcg_temp_new_ptr();
2926     TCGv_ptr g = tcg_temp_new_ptr();
2927     TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2932     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2933
2934     if (a->s) {
2935         fn_s(t, d, n, m, g, t);
2936         do_pred_flags(t);
2937     } else {
2938         fn(d, n, m, g, t);
2939     }
2940     tcg_temp_free_ptr(d);
2941     tcg_temp_free_ptr(n);
2942     tcg_temp_free_ptr(m);
2943     tcg_temp_free_ptr(g);
2944     tcg_temp_free_i32(t);
2945     return true;
2946 }
2947
2948 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2949                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2950 {
2951     if (!sve_access_check(s)) {
2952         return true;
2953     }
2954
2955     unsigned vsz = pred_full_reg_size(s);
2956
2957     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2958     TCGv_ptr d = tcg_temp_new_ptr();
2959     TCGv_ptr n = tcg_temp_new_ptr();
2960     TCGv_ptr g = tcg_temp_new_ptr();
2961     TCGv_i32 t = tcg_const_i32(vsz - 2);
2962
2963     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2964     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2965     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2966
2967     if (a->s) {
2968         fn_s(t, d, n, g, t);
2969         do_pred_flags(t);
2970     } else {
2971         fn(d, n, g, t);
2972     }
2973     tcg_temp_free_ptr(d);
2974     tcg_temp_free_ptr(n);
2975     tcg_temp_free_ptr(g);
2976     tcg_temp_free_i32(t);
2977     return true;
2978 }
2979
2980 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2981 {
2982     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2983 }
2984
2985 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2986 {
2987     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2988 }
2989
2990 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991 {
2992     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2993 }
2994
2995 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2996 {
2997     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2998 }
2999
3000 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3001 {
3002     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3003 }
3004
3005 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3006 {
3007     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3008 }
3009
3010 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3011 {
3012     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3013 }
3014
3015 /*
3016  *** SVE Predicate Count Group
3017  */
3018
3019 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3020 {
3021     unsigned psz = pred_full_reg_size(s);
3022
3023     if (psz <= 8) {
3024         uint64_t psz_mask;
3025
3026         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3027         if (pn != pg) {
3028             TCGv_i64 g = tcg_temp_new_i64();
3029             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3030             tcg_gen_and_i64(val, val, g);
3031             tcg_temp_free_i64(g);
3032         }
3033
3034         /* Reduce the pred_esz_masks value simply to reduce the
3035          * size of the code generated here.
3036          */
3037         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3038         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3039
3040         tcg_gen_ctpop_i64(val, val);
3041     } else {
3042         TCGv_ptr t_pn = tcg_temp_new_ptr();
3043         TCGv_ptr t_pg = tcg_temp_new_ptr();
3044         unsigned desc;
3045         TCGv_i32 t_desc;
3046
3047         desc = psz - 2;
3048         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3049
3050         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3051         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3052         t_desc = tcg_const_i32(desc);
3053
3054         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3055         tcg_temp_free_ptr(t_pn);
3056         tcg_temp_free_ptr(t_pg);
3057         tcg_temp_free_i32(t_desc);
3058     }
3059 }
3060
3061 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3062 {
3063     if (sve_access_check(s)) {
3064         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3065     }
3066     return true;
3067 }
3068
3069 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3070                             uint32_t insn)
3071 {
3072     if (sve_access_check(s)) {
3073         TCGv_i64 reg = cpu_reg(s, a->rd);
3074         TCGv_i64 val = tcg_temp_new_i64();
3075
3076         do_cntp(s, val, a->esz, a->pg, a->pg);
3077         if (a->d) {
3078             tcg_gen_sub_i64(reg, reg, val);
3079         } else {
3080             tcg_gen_add_i64(reg, reg, val);
3081         }
3082         tcg_temp_free_i64(val);
3083     }
3084     return true;
3085 }
3086
3087 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3088                             uint32_t insn)
3089 {
3090     if (a->esz == 0) {
3091         return false;
3092     }
3093     if (sve_access_check(s)) {
3094         unsigned vsz = vec_full_reg_size(s);
3095         TCGv_i64 val = tcg_temp_new_i64();
3096         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3097
3098         do_cntp(s, val, a->esz, a->pg, a->pg);
3099         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3100                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3101     }
3102     return true;
3103 }
3104
3105 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3106                                 uint32_t insn)
3107 {
3108     if (sve_access_check(s)) {
3109         TCGv_i64 reg = cpu_reg(s, a->rd);
3110         TCGv_i64 val = tcg_temp_new_i64();
3111
3112         do_cntp(s, val, a->esz, a->pg, a->pg);
3113         do_sat_addsub_32(reg, val, a->u, a->d);
3114     }
3115     return true;
3116 }
3117
3118 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3119                                 uint32_t insn)
3120 {
3121     if (sve_access_check(s)) {
3122         TCGv_i64 reg = cpu_reg(s, a->rd);
3123         TCGv_i64 val = tcg_temp_new_i64();
3124
3125         do_cntp(s, val, a->esz, a->pg, a->pg);
3126         do_sat_addsub_64(reg, val, a->u, a->d);
3127     }
3128     return true;
3129 }
3130
3131 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3132                              uint32_t insn)
3133 {
3134     if (a->esz == 0) {
3135         return false;
3136     }
3137     if (sve_access_check(s)) {
3138         TCGv_i64 val = tcg_temp_new_i64();
3139         do_cntp(s, val, a->esz, a->pg, a->pg);
3140         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3141     }
3142     return true;
3143 }
3144
3145 /*
3146  *** SVE Integer Compare Scalars Group
3147  */
3148
3149 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3150 {
3151     if (!sve_access_check(s)) {
3152         return true;
3153     }
3154
3155     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3156     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3157     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3158     TCGv_i64 cmp = tcg_temp_new_i64();
3159
3160     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3161     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3162     tcg_temp_free_i64(cmp);
3163
3164     /* VF = !NF & !CF.  */
3165     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3166     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3167
3168     /* Both NF and VF actually look at bit 31.  */
3169     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3170     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3171     return true;
3172 }
3173
3174 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3175 {
3176     if (!sve_access_check(s)) {
3177         return true;
3178     }
3179
3180     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3181     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3182     TCGv_i64 t0 = tcg_temp_new_i64();
3183     TCGv_i64 t1 = tcg_temp_new_i64();
3184     TCGv_i32 t2, t3;
3185     TCGv_ptr ptr;
3186     unsigned desc, vsz = vec_full_reg_size(s);
3187     TCGCond cond;
3188
3189     if (!a->sf) {
3190         if (a->u) {
3191             tcg_gen_ext32u_i64(op0, op0);
3192             tcg_gen_ext32u_i64(op1, op1);
3193         } else {
3194             tcg_gen_ext32s_i64(op0, op0);
3195             tcg_gen_ext32s_i64(op1, op1);
3196         }
3197     }
3198
3199     /* For the helper, compress the different conditions into a computation
3200      * of how many iterations for which the condition is true.
3201      *
3202      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3203      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3204      * aren't that large, so any value >= predicate size is sufficient.
3205      */
3206     tcg_gen_sub_i64(t0, op1, op0);
3207
3208     /* t0 = MIN(op1 - op0, vsz).  */
3209     tcg_gen_movi_i64(t1, vsz);
3210     tcg_gen_umin_i64(t0, t0, t1);
3211     if (a->eq) {
3212         /* Equality means one more iteration.  */
3213         tcg_gen_addi_i64(t0, t0, 1);
3214     }
3215
3216     /* t0 = (condition true ? t0 : 0).  */
3217     cond = (a->u
3218             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3219             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3220     tcg_gen_movi_i64(t1, 0);
3221     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3222
3223     t2 = tcg_temp_new_i32();
3224     tcg_gen_extrl_i64_i32(t2, t0);
3225     tcg_temp_free_i64(t0);
3226     tcg_temp_free_i64(t1);
3227
3228     desc = (vsz / 8) - 2;
3229     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3230     t3 = tcg_const_i32(desc);
3231
3232     ptr = tcg_temp_new_ptr();
3233     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3234
3235     gen_helper_sve_while(t2, ptr, t2, t3);
3236     do_pred_flags(t2);
3237
3238     tcg_temp_free_ptr(ptr);
3239     tcg_temp_free_i32(t2);
3240     tcg_temp_free_i32(t3);
3241     return true;
3242 }
3243
3244 /*
3245  *** SVE Integer Wide Immediate - Unpredicated Group
3246  */
3247
3248 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3249 {
3250     if (a->esz == 0) {
3251         return false;
3252     }
3253     if (sve_access_check(s)) {
3254         unsigned vsz = vec_full_reg_size(s);
3255         int dofs = vec_full_reg_offset(s, a->rd);
3256         uint64_t imm;
3257
3258         /* Decode the VFP immediate.  */
3259         imm = vfp_expand_imm(a->esz, a->imm);
3260         imm = dup_const(a->esz, imm);
3261
3262         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3263     }
3264     return true;
3265 }
3266
3267 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3268 {
3269     if (a->esz == 0 && extract32(insn, 13, 1)) {
3270         return false;
3271     }
3272     if (sve_access_check(s)) {
3273         unsigned vsz = vec_full_reg_size(s);
3274         int dofs = vec_full_reg_offset(s, a->rd);
3275
3276         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3277     }
3278     return true;
3279 }
3280
3281 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3282 {
3283     if (a->esz == 0 && extract32(insn, 13, 1)) {
3284         return false;
3285     }
3286     if (sve_access_check(s)) {
3287         unsigned vsz = vec_full_reg_size(s);
3288         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3289                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3290     }
3291     return true;
3292 }
3293
3294 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3295 {
3296     a->imm = -a->imm;
3297     return trans_ADD_zzi(s, a, insn);
3298 }
3299
3300 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301 {
3302     static const GVecGen2s op[4] = {
3303         { .fni8 = tcg_gen_vec_sub8_i64,
3304           .fniv = tcg_gen_sub_vec,
3305           .fno = gen_helper_sve_subri_b,
3306           .opc = INDEX_op_sub_vec,
3307           .vece = MO_8,
3308           .scalar_first = true },
3309         { .fni8 = tcg_gen_vec_sub16_i64,
3310           .fniv = tcg_gen_sub_vec,
3311           .fno = gen_helper_sve_subri_h,
3312           .opc = INDEX_op_sub_vec,
3313           .vece = MO_16,
3314           .scalar_first = true },
3315         { .fni4 = tcg_gen_sub_i32,
3316           .fniv = tcg_gen_sub_vec,
3317           .fno = gen_helper_sve_subri_s,
3318           .opc = INDEX_op_sub_vec,
3319           .vece = MO_32,
3320           .scalar_first = true },
3321         { .fni8 = tcg_gen_sub_i64,
3322           .fniv = tcg_gen_sub_vec,
3323           .fno = gen_helper_sve_subri_d,
3324           .opc = INDEX_op_sub_vec,
3325           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3326           .vece = MO_64,
3327           .scalar_first = true }
3328     };
3329
3330     if (a->esz == 0 && extract32(insn, 13, 1)) {
3331         return false;
3332     }
3333     if (sve_access_check(s)) {
3334         unsigned vsz = vec_full_reg_size(s);
3335         TCGv_i64 c = tcg_const_i64(a->imm);
3336         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3337                         vec_full_reg_offset(s, a->rn),
3338                         vsz, vsz, c, &op[a->esz]);
3339         tcg_temp_free_i64(c);
3340     }
3341     return true;
3342 }
3343
3344 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3345 {
3346     if (sve_access_check(s)) {
3347         unsigned vsz = vec_full_reg_size(s);
3348         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3349                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3350     }
3351     return true;
3352 }
3353
3354 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3355                        bool u, bool d)
3356 {
3357     if (a->esz == 0 && extract32(insn, 13, 1)) {
3358         return false;
3359     }
3360     if (sve_access_check(s)) {
3361         TCGv_i64 val = tcg_const_i64(a->imm);
3362         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3363         tcg_temp_free_i64(val);
3364     }
3365     return true;
3366 }
3367
3368 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3369 {
3370     return do_zzi_sat(s, a, insn, false, false);
3371 }
3372
3373 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3374 {
3375     return do_zzi_sat(s, a, insn, true, false);
3376 }
3377
3378 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3379 {
3380     return do_zzi_sat(s, a, insn, false, true);
3381 }
3382
3383 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3384 {
3385     return do_zzi_sat(s, a, insn, true, true);
3386 }
3387
3388 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3389 {
3390     if (sve_access_check(s)) {
3391         unsigned vsz = vec_full_reg_size(s);
3392         TCGv_i64 c = tcg_const_i64(a->imm);
3393
3394         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3395                             vec_full_reg_offset(s, a->rn),
3396                             c, vsz, vsz, 0, fn);
3397         tcg_temp_free_i64(c);
3398     }
3399     return true;
3400 }
3401
3402 #define DO_ZZI(NAME, name) \
3403 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3404                                uint32_t insn)                           \
3405 {                                                                       \
3406     static gen_helper_gvec_2i * const fns[4] = {                        \
3407         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3408         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3409     };                                                                  \
3410     return do_zzi_ool(s, a, fns[a->esz]);                               \
3411 }
3412
3413 DO_ZZI(SMAX, smax)
3414 DO_ZZI(UMAX, umax)
3415 DO_ZZI(SMIN, smin)
3416 DO_ZZI(UMIN, umin)
3417
3418 #undef DO_ZZI
3419
3420 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3421 {
3422     static gen_helper_gvec_3 * const fns[2][2] = {
3423         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3424         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3425     };
3426
3427     if (sve_access_check(s)) {
3428         unsigned vsz = vec_full_reg_size(s);
3429         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3430                            vec_full_reg_offset(s, a->rn),
3431                            vec_full_reg_offset(s, a->rm),
3432                            vsz, vsz, 0, fns[a->u][a->sz]);
3433     }
3434     return true;
3435 }
3436
3437 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3438 {
3439     static gen_helper_gvec_3 * const fns[2][2] = {
3440         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3441         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3442     };
3443
3444     if (sve_access_check(s)) {
3445         unsigned vsz = vec_full_reg_size(s);
3446         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3447                            vec_full_reg_offset(s, a->rn),
3448                            vec_full_reg_offset(s, a->rm),
3449                            vsz, vsz, a->index, fns[a->u][a->sz]);
3450     }
3451     return true;
3452 }
3453
3454
3455 /*
3456  *** SVE Floating Point Multiply-Add Indexed Group
3457  */
3458
3459 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3460 {
3461     static gen_helper_gvec_4_ptr * const fns[3] = {
3462         gen_helper_gvec_fmla_idx_h,
3463         gen_helper_gvec_fmla_idx_s,
3464         gen_helper_gvec_fmla_idx_d,
3465     };
3466
3467     if (sve_access_check(s)) {
3468         unsigned vsz = vec_full_reg_size(s);
3469         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3470         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3471                            vec_full_reg_offset(s, a->rn),
3472                            vec_full_reg_offset(s, a->rm),
3473                            vec_full_reg_offset(s, a->ra),
3474                            status, vsz, vsz, (a->index << 1) | a->sub,
3475                            fns[a->esz - 1]);
3476         tcg_temp_free_ptr(status);
3477     }
3478     return true;
3479 }
3480
3481 /*
3482  *** SVE Floating Point Multiply Indexed Group
3483  */
3484
3485 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3486 {
3487     static gen_helper_gvec_3_ptr * const fns[3] = {
3488         gen_helper_gvec_fmul_idx_h,
3489         gen_helper_gvec_fmul_idx_s,
3490         gen_helper_gvec_fmul_idx_d,
3491     };
3492
3493     if (sve_access_check(s)) {
3494         unsigned vsz = vec_full_reg_size(s);
3495         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3496         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3497                            vec_full_reg_offset(s, a->rn),
3498                            vec_full_reg_offset(s, a->rm),
3499                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3500         tcg_temp_free_ptr(status);
3501     }
3502     return true;
3503 }
3504
3505 /*
3506  *** SVE Floating Point Fast Reduction Group
3507  */
3508
3509 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3510                                   TCGv_ptr, TCGv_i32);
3511
3512 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3513                       gen_helper_fp_reduce *fn)
3514 {
3515     unsigned vsz = vec_full_reg_size(s);
3516     unsigned p2vsz = pow2ceil(vsz);
3517     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3518     TCGv_ptr t_zn, t_pg, status;
3519     TCGv_i64 temp;
3520
3521     temp = tcg_temp_new_i64();
3522     t_zn = tcg_temp_new_ptr();
3523     t_pg = tcg_temp_new_ptr();
3524
3525     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3526     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3527     status = get_fpstatus_ptr(a->esz == MO_16);
3528
3529     fn(temp, t_zn, t_pg, status, t_desc);
3530     tcg_temp_free_ptr(t_zn);
3531     tcg_temp_free_ptr(t_pg);
3532     tcg_temp_free_ptr(status);
3533     tcg_temp_free_i32(t_desc);
3534
3535     write_fp_dreg(s, a->rd, temp);
3536     tcg_temp_free_i64(temp);
3537 }
3538
3539 #define DO_VPZ(NAME, name) \
3540 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3541 {                                                                        \
3542     static gen_helper_fp_reduce * const fns[3] = {                       \
3543         gen_helper_sve_##name##_h,                                       \
3544         gen_helper_sve_##name##_s,                                       \
3545         gen_helper_sve_##name##_d,                                       \
3546     };                                                                   \
3547     if (a->esz == 0) {                                                   \
3548         return false;                                                    \
3549     }                                                                    \
3550     if (sve_access_check(s)) {                                           \
3551         do_reduce(s, a, fns[a->esz - 1]);                                \
3552     }                                                                    \
3553     return true;                                                         \
3554 }
3555
3556 DO_VPZ(FADDV, faddv)
3557 DO_VPZ(FMINNMV, fminnmv)
3558 DO_VPZ(FMAXNMV, fmaxnmv)
3559 DO_VPZ(FMINV, fminv)
3560 DO_VPZ(FMAXV, fmaxv)
3561
3562 /*
3563  *** SVE Floating Point Unary Operations - Unpredicated Group
3564  */
3565
3566 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3567 {
3568     unsigned vsz = vec_full_reg_size(s);
3569     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3570
3571     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3572                        vec_full_reg_offset(s, a->rn),
3573                        status, vsz, vsz, 0, fn);
3574     tcg_temp_free_ptr(status);
3575 }
3576
3577 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3578 {
3579     static gen_helper_gvec_2_ptr * const fns[3] = {
3580         gen_helper_gvec_frecpe_h,
3581         gen_helper_gvec_frecpe_s,
3582         gen_helper_gvec_frecpe_d,
3583     };
3584     if (a->esz == 0) {
3585         return false;
3586     }
3587     if (sve_access_check(s)) {
3588         do_zz_fp(s, a, fns[a->esz - 1]);
3589     }
3590     return true;
3591 }
3592
3593 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3594 {
3595     static gen_helper_gvec_2_ptr * const fns[3] = {
3596         gen_helper_gvec_frsqrte_h,
3597         gen_helper_gvec_frsqrte_s,
3598         gen_helper_gvec_frsqrte_d,
3599     };
3600     if (a->esz == 0) {
3601         return false;
3602     }
3603     if (sve_access_check(s)) {
3604         do_zz_fp(s, a, fns[a->esz - 1]);
3605     }
3606     return true;
3607 }
3608
3609 /*
3610  *** SVE Floating Point Compare with Zero Group
3611  */
3612
3613 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3614                       gen_helper_gvec_3_ptr *fn)
3615 {
3616     unsigned vsz = vec_full_reg_size(s);
3617     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3618
3619     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3620                        vec_full_reg_offset(s, a->rn),
3621                        pred_full_reg_offset(s, a->pg),
3622                        status, vsz, vsz, 0, fn);
3623     tcg_temp_free_ptr(status);
3624 }
3625
3626 #define DO_PPZ(NAME, name) \
3627 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3628 {                                                                 \
3629     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3630         gen_helper_sve_##name##_h,                                \
3631         gen_helper_sve_##name##_s,                                \
3632         gen_helper_sve_##name##_d,                                \
3633     };                                                            \
3634     if (a->esz == 0) {                                            \
3635         return false;                                             \
3636     }                                                             \
3637     if (sve_access_check(s)) {                                    \
3638         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3639     }                                                             \
3640     return true;                                                  \
3641 }
3642
3643 DO_PPZ(FCMGE_ppz0, fcmge0)
3644 DO_PPZ(FCMGT_ppz0, fcmgt0)
3645 DO_PPZ(FCMLE_ppz0, fcmle0)
3646 DO_PPZ(FCMLT_ppz0, fcmlt0)
3647 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3648 DO_PPZ(FCMNE_ppz0, fcmne0)
3649
3650 #undef DO_PPZ
3651
3652 /*
3653  *** SVE floating-point trig multiply-add coefficient
3654  */
3655
3656 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3657 {
3658     static gen_helper_gvec_3_ptr * const fns[3] = {
3659         gen_helper_sve_ftmad_h,
3660         gen_helper_sve_ftmad_s,
3661         gen_helper_sve_ftmad_d,
3662     };
3663
3664     if (a->esz == 0) {
3665         return false;
3666     }
3667     if (sve_access_check(s)) {
3668         unsigned vsz = vec_full_reg_size(s);
3669         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3670         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3671                            vec_full_reg_offset(s, a->rn),
3672                            vec_full_reg_offset(s, a->rm),
3673                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3674         tcg_temp_free_ptr(status);
3675     }
3676     return true;
3677 }
3678
3679 /*
3680  *** SVE Floating Point Accumulating Reduction Group
3681  */
3682
3683 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3684 {
3685     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3686                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3687     static fadda_fn * const fns[3] = {
3688         gen_helper_sve_fadda_h,
3689         gen_helper_sve_fadda_s,
3690         gen_helper_sve_fadda_d,
3691     };
3692     unsigned vsz = vec_full_reg_size(s);
3693     TCGv_ptr t_rm, t_pg, t_fpst;
3694     TCGv_i64 t_val;
3695     TCGv_i32 t_desc;
3696
3697     if (a->esz == 0) {
3698         return false;
3699     }
3700     if (!sve_access_check(s)) {
3701         return true;
3702     }
3703
3704     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3705     t_rm = tcg_temp_new_ptr();
3706     t_pg = tcg_temp_new_ptr();
3707     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3708     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3709     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3710     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3711
3712     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3713
3714     tcg_temp_free_i32(t_desc);
3715     tcg_temp_free_ptr(t_fpst);
3716     tcg_temp_free_ptr(t_pg);
3717     tcg_temp_free_ptr(t_rm);
3718
3719     write_fp_dreg(s, a->rd, t_val);
3720     tcg_temp_free_i64(t_val);
3721     return true;
3722 }
3723
3724 /*
3725  *** SVE Floating Point Arithmetic - Unpredicated Group
3726  */
3727
3728 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3729                       gen_helper_gvec_3_ptr *fn)
3730 {
3731     if (fn == NULL) {
3732         return false;
3733     }
3734     if (sve_access_check(s)) {
3735         unsigned vsz = vec_full_reg_size(s);
3736         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3737         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3738                            vec_full_reg_offset(s, a->rn),
3739                            vec_full_reg_offset(s, a->rm),
3740                            status, vsz, vsz, 0, fn);
3741         tcg_temp_free_ptr(status);
3742     }
3743     return true;
3744 }
3745
3746
3747 #define DO_FP3(NAME, name) \
3748 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3749 {                                                                   \
3750     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3751         NULL, gen_helper_gvec_##name##_h,                           \
3752         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3753     };                                                              \
3754     return do_zzz_fp(s, a, fns[a->esz]);                            \
3755 }
3756
3757 DO_FP3(FADD_zzz, fadd)
3758 DO_FP3(FSUB_zzz, fsub)
3759 DO_FP3(FMUL_zzz, fmul)
3760 DO_FP3(FTSMUL, ftsmul)
3761 DO_FP3(FRECPS, recps)
3762 DO_FP3(FRSQRTS, rsqrts)
3763
3764 #undef DO_FP3
3765
3766 /*
3767  *** SVE Floating Point Arithmetic - Predicated Group
3768  */
3769
3770 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3771                        gen_helper_gvec_4_ptr *fn)
3772 {
3773     if (fn == NULL) {
3774         return false;
3775     }
3776     if (sve_access_check(s)) {
3777         unsigned vsz = vec_full_reg_size(s);
3778         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3779         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3780                            vec_full_reg_offset(s, a->rn),
3781                            vec_full_reg_offset(s, a->rm),
3782                            pred_full_reg_offset(s, a->pg),
3783                            status, vsz, vsz, 0, fn);
3784         tcg_temp_free_ptr(status);
3785     }
3786     return true;
3787 }
3788
3789 #define DO_FP3(NAME, name) \
3790 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3791 {                                                                   \
3792     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3793         NULL, gen_helper_sve_##name##_h,                            \
3794         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3795     };                                                              \
3796     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3797 }
3798
3799 DO_FP3(FADD_zpzz, fadd)
3800 DO_FP3(FSUB_zpzz, fsub)
3801 DO_FP3(FMUL_zpzz, fmul)
3802 DO_FP3(FMIN_zpzz, fmin)
3803 DO_FP3(FMAX_zpzz, fmax)
3804 DO_FP3(FMINNM_zpzz, fminnum)
3805 DO_FP3(FMAXNM_zpzz, fmaxnum)
3806 DO_FP3(FABD, fabd)
3807 DO_FP3(FSCALE, fscalbn)
3808 DO_FP3(FDIV, fdiv)
3809 DO_FP3(FMULX, fmulx)
3810
3811 #undef DO_FP3
3812
3813 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3814                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3815
3816 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3817                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3818 {
3819     unsigned vsz = vec_full_reg_size(s);
3820     TCGv_ptr t_zd, t_zn, t_pg, status;
3821     TCGv_i32 desc;
3822
3823     t_zd = tcg_temp_new_ptr();
3824     t_zn = tcg_temp_new_ptr();
3825     t_pg = tcg_temp_new_ptr();
3826     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3827     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3828     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3829
3830     status = get_fpstatus_ptr(is_fp16);
3831     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3832     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3833
3834     tcg_temp_free_i32(desc);
3835     tcg_temp_free_ptr(status);
3836     tcg_temp_free_ptr(t_pg);
3837     tcg_temp_free_ptr(t_zn);
3838     tcg_temp_free_ptr(t_zd);
3839 }
3840
3841 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3842                       gen_helper_sve_fp2scalar *fn)
3843 {
3844     TCGv_i64 temp = tcg_const_i64(imm);
3845     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3846     tcg_temp_free_i64(temp);
3847 }
3848
3849 #define DO_FP_IMM(NAME, name, const0, const1) \
3850 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3851                                 uint32_t insn)                            \
3852 {                                                                         \
3853     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3854         gen_helper_sve_##name##_h,                                        \
3855         gen_helper_sve_##name##_s,                                        \
3856         gen_helper_sve_##name##_d                                         \
3857     };                                                                    \
3858     static uint64_t const val[3][2] = {                                   \
3859         { float16_##const0, float16_##const1 },                           \
3860         { float32_##const0, float32_##const1 },                           \
3861         { float64_##const0, float64_##const1 },                           \
3862     };                                                                    \
3863     if (a->esz == 0) {                                                    \
3864         return false;                                                     \
3865     }                                                                     \
3866     if (sve_access_check(s)) {                                            \
3867         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3868     }                                                                     \
3869     return true;                                                          \
3870 }
3871
3872 #define float16_two  make_float16(0x4000)
3873 #define float32_two  make_float32(0x40000000)
3874 #define float64_two  make_float64(0x4000000000000000ULL)
3875
3876 DO_FP_IMM(FADD, fadds, half, one)
3877 DO_FP_IMM(FSUB, fsubs, half, one)
3878 DO_FP_IMM(FMUL, fmuls, half, two)
3879 DO_FP_IMM(FSUBR, fsubrs, half, one)
3880 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3881 DO_FP_IMM(FMINNM, fminnms, zero, one)
3882 DO_FP_IMM(FMAX, fmaxs, zero, one)
3883 DO_FP_IMM(FMIN, fmins, zero, one)
3884
3885 #undef DO_FP_IMM
3886
3887 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3888                       gen_helper_gvec_4_ptr *fn)
3889 {
3890     if (fn == NULL) {
3891         return false;
3892     }
3893     if (sve_access_check(s)) {
3894         unsigned vsz = vec_full_reg_size(s);
3895         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3896         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3897                            vec_full_reg_offset(s, a->rn),
3898                            vec_full_reg_offset(s, a->rm),
3899                            pred_full_reg_offset(s, a->pg),
3900                            status, vsz, vsz, 0, fn);
3901         tcg_temp_free_ptr(status);
3902     }
3903     return true;
3904 }
3905
3906 #define DO_FPCMP(NAME, name) \
3907 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3908                                 uint32_t insn)                        \
3909 {                                                                     \
3910     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3911         NULL, gen_helper_sve_##name##_h,                              \
3912         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3913     };                                                                \
3914     return do_fp_cmp(s, a, fns[a->esz]);                              \
3915 }
3916
3917 DO_FPCMP(FCMGE, fcmge)
3918 DO_FPCMP(FCMGT, fcmgt)
3919 DO_FPCMP(FCMEQ, fcmeq)
3920 DO_FPCMP(FCMNE, fcmne)
3921 DO_FPCMP(FCMUO, fcmuo)
3922 DO_FPCMP(FACGE, facge)
3923 DO_FPCMP(FACGT, facgt)
3924
3925 #undef DO_FPCMP
3926
3927 static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3928 {
3929     static gen_helper_gvec_4_ptr * const fns[3] = {
3930         gen_helper_sve_fcadd_h,
3931         gen_helper_sve_fcadd_s,
3932         gen_helper_sve_fcadd_d
3933     };
3934
3935     if (a->esz == 0) {
3936         return false;
3937     }
3938     if (sve_access_check(s)) {
3939         unsigned vsz = vec_full_reg_size(s);
3940         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3941         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3942                            vec_full_reg_offset(s, a->rn),
3943                            vec_full_reg_offset(s, a->rm),
3944                            pred_full_reg_offset(s, a->pg),
3945                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3946         tcg_temp_free_ptr(status);
3947     }
3948     return true;
3949 }
3950
3951 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3952
3953 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3954 {
3955     if (fn == NULL) {
3956         return false;
3957     }
3958     if (!sve_access_check(s)) {
3959         return true;
3960     }
3961
3962     unsigned vsz = vec_full_reg_size(s);
3963     unsigned desc;
3964     TCGv_i32 t_desc;
3965     TCGv_ptr pg = tcg_temp_new_ptr();
3966
3967     /* We would need 7 operands to pass these arguments "properly".
3968      * So we encode all the register numbers into the descriptor.
3969      */
3970     desc = deposit32(a->rd, 5, 5, a->rn);
3971     desc = deposit32(desc, 10, 5, a->rm);
3972     desc = deposit32(desc, 15, 5, a->ra);
3973     desc = simd_desc(vsz, vsz, desc);
3974
3975     t_desc = tcg_const_i32(desc);
3976     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3977     fn(cpu_env, pg, t_desc);
3978     tcg_temp_free_i32(t_desc);
3979     tcg_temp_free_ptr(pg);
3980     return true;
3981 }
3982
3983 #define DO_FMLA(NAME, name) \
3984 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3985 {                                                                    \
3986     static gen_helper_sve_fmla * const fns[4] = {                    \
3987         NULL, gen_helper_sve_##name##_h,                             \
3988         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3989     };                                                               \
3990     return do_fmla(s, a, fns[a->esz]);                               \
3991 }
3992
3993 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3994 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3995 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3996 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3997
3998 #undef DO_FMLA
3999
4000 static bool trans_FCMLA_zpzzz(DisasContext *s,
4001                               arg_FCMLA_zpzzz *a, uint32_t insn)
4002 {
4003     static gen_helper_sve_fmla * const fns[3] = {
4004         gen_helper_sve_fcmla_zpzzz_h,
4005         gen_helper_sve_fcmla_zpzzz_s,
4006         gen_helper_sve_fcmla_zpzzz_d,
4007     };
4008
4009     if (a->esz == 0) {
4010         return false;
4011     }
4012     if (sve_access_check(s)) {
4013         unsigned vsz = vec_full_reg_size(s);
4014         unsigned desc;
4015         TCGv_i32 t_desc;
4016         TCGv_ptr pg = tcg_temp_new_ptr();
4017
4018         /* We would need 7 operands to pass these arguments "properly".
4019          * So we encode all the register numbers into the descriptor.
4020          */
4021         desc = deposit32(a->rd, 5, 5, a->rn);
4022         desc = deposit32(desc, 10, 5, a->rm);
4023         desc = deposit32(desc, 15, 5, a->ra);
4024         desc = deposit32(desc, 20, 2, a->rot);
4025         desc = sextract32(desc, 0, 22);
4026         desc = simd_desc(vsz, vsz, desc);
4027
4028         t_desc = tcg_const_i32(desc);
4029         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4030         fns[a->esz - 1](cpu_env, pg, t_desc);
4031         tcg_temp_free_i32(t_desc);
4032         tcg_temp_free_ptr(pg);
4033     }
4034     return true;
4035 }
4036
4037 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4038 {
4039     static gen_helper_gvec_3_ptr * const fns[2] = {
4040         gen_helper_gvec_fcmlah_idx,
4041         gen_helper_gvec_fcmlas_idx,
4042     };
4043
4044     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4045     tcg_debug_assert(a->rd == a->ra);
4046     if (sve_access_check(s)) {
4047         unsigned vsz = vec_full_reg_size(s);
4048         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4049         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4050                            vec_full_reg_offset(s, a->rn),
4051                            vec_full_reg_offset(s, a->rm),
4052                            status, vsz, vsz,
4053                            a->index * 4 + a->rot,
4054                            fns[a->esz - 1]);
4055         tcg_temp_free_ptr(status);
4056     }
4057     return true;
4058 }
4059
4060 /*
4061  *** SVE Floating Point Unary Operations Predicated Group
4062  */
4063
4064 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4065                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4066 {
4067     if (sve_access_check(s)) {
4068         unsigned vsz = vec_full_reg_size(s);
4069         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4070         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4071                            vec_full_reg_offset(s, rn),
4072                            pred_full_reg_offset(s, pg),
4073                            status, vsz, vsz, 0, fn);
4074         tcg_temp_free_ptr(status);
4075     }
4076     return true;
4077 }
4078
4079 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4080 {
4081     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
4082 }
4083
4084 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4085 {
4086     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4087 }
4088
4089 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4090 {
4091     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4092 }
4093
4094 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4095 {
4096     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4097 }
4098
4099 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4100 {
4101     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4102 }
4103
4104 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4105 {
4106     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4107 }
4108
4109 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4110 {
4111     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4112 }
4113
4114 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4115 {
4116     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4117 }
4118
4119 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4120 {
4121     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4122 }
4123
4124 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4125 {
4126     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4127 }
4128
4129 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4130 {
4131     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4132 }
4133
4134 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4135 {
4136     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4137 }
4138
4139 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4140 {
4141     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4142 }
4143
4144 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4145 {
4146     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4147 }
4148
4149 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4150 {
4151     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4152 }
4153
4154 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4155 {
4156     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4157 }
4158
4159 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4160 {
4161     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4162 }
4163
4164 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4165 {
4166     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4167 }
4168
4169 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4170 {
4171     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4172 }
4173
4174 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4175 {
4176     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4177 }
4178
4179 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4180     gen_helper_sve_frint_h,
4181     gen_helper_sve_frint_s,
4182     gen_helper_sve_frint_d
4183 };
4184
4185 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4186 {
4187     if (a->esz == 0) {
4188         return false;
4189     }
4190     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4191                       frint_fns[a->esz - 1]);
4192 }
4193
4194 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4195 {
4196     static gen_helper_gvec_3_ptr * const fns[3] = {
4197         gen_helper_sve_frintx_h,
4198         gen_helper_sve_frintx_s,
4199         gen_helper_sve_frintx_d
4200     };
4201     if (a->esz == 0) {
4202         return false;
4203     }
4204     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4205 }
4206
4207 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4208 {
4209     if (a->esz == 0) {
4210         return false;
4211     }
4212     if (sve_access_check(s)) {
4213         unsigned vsz = vec_full_reg_size(s);
4214         TCGv_i32 tmode = tcg_const_i32(mode);
4215         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4216
4217         gen_helper_set_rmode(tmode, tmode, status);
4218
4219         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4220                            vec_full_reg_offset(s, a->rn),
4221                            pred_full_reg_offset(s, a->pg),
4222                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4223
4224         gen_helper_set_rmode(tmode, tmode, status);
4225         tcg_temp_free_i32(tmode);
4226         tcg_temp_free_ptr(status);
4227     }
4228     return true;
4229 }
4230
4231 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4232 {
4233     return do_frint_mode(s, a, float_round_nearest_even);
4234 }
4235
4236 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4237 {
4238     return do_frint_mode(s, a, float_round_up);
4239 }
4240
4241 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4242 {
4243     return do_frint_mode(s, a, float_round_down);
4244 }
4245
4246 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4247 {
4248     return do_frint_mode(s, a, float_round_to_zero);
4249 }
4250
4251 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4252 {
4253     return do_frint_mode(s, a, float_round_ties_away);
4254 }
4255
4256 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4257 {
4258     static gen_helper_gvec_3_ptr * const fns[3] = {
4259         gen_helper_sve_frecpx_h,
4260         gen_helper_sve_frecpx_s,
4261         gen_helper_sve_frecpx_d
4262     };
4263     if (a->esz == 0) {
4264         return false;
4265     }
4266     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4267 }
4268
4269 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4270 {
4271     static gen_helper_gvec_3_ptr * const fns[3] = {
4272         gen_helper_sve_fsqrt_h,
4273         gen_helper_sve_fsqrt_s,
4274         gen_helper_sve_fsqrt_d
4275     };
4276     if (a->esz == 0) {
4277         return false;
4278     }
4279     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4280 }
4281
4282 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4283 {
4284     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4285 }
4286
4287 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4288 {
4289     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4290 }
4291
4292 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4293 {
4294     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4295 }
4296
4297 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4298 {
4299     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4300 }
4301
4302 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4303 {
4304     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4305 }
4306
4307 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4308 {
4309     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4310 }
4311
4312 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4313 {
4314     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4315 }
4316
4317 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4318 {
4319     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4320 }
4321
4322 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4323 {
4324     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4325 }
4326
4327 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4328 {
4329     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4330 }
4331
4332 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4333 {
4334     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4335 }
4336
4337 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4338 {
4339     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4340 }
4341
4342 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4343 {
4344     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4345 }
4346
4347 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4348 {
4349     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4350 }
4351
4352 /*
4353  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4354  */
4355
4356 /* Subroutine loading a vector register at VOFS of LEN bytes.
4357  * The load should begin at the address Rn + IMM.
4358  */
4359
4360 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4361                    int rn, int imm)
4362 {
4363     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4364     uint32_t len_remain = len % 8;
4365     uint32_t nparts = len / 8 + ctpop8(len_remain);
4366     int midx = get_mem_index(s);
4367     TCGv_i64 addr, t0, t1;
4368
4369     addr = tcg_temp_new_i64();
4370     t0 = tcg_temp_new_i64();
4371
4372     /* Note that unpredicated load/store of vector/predicate registers
4373      * are defined as a stream of bytes, which equates to little-endian
4374      * operations on larger quantities.  There is no nice way to force
4375      * a little-endian load for aarch64_be-linux-user out of line.
4376      *
4377      * Attempt to keep code expansion to a minimum by limiting the
4378      * amount of unrolling done.
4379      */
4380     if (nparts <= 4) {
4381         int i;
4382
4383         for (i = 0; i < len_align; i += 8) {
4384             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4385             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4386             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4387         }
4388     } else {
4389         TCGLabel *loop = gen_new_label();
4390         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4391
4392         gen_set_label(loop);
4393
4394         /* Minimize the number of local temps that must be re-read from
4395          * the stack each iteration.  Instead, re-compute values other
4396          * than the loop counter.
4397          */
4398         tp = tcg_temp_new_ptr();
4399         tcg_gen_addi_ptr(tp, i, imm);
4400         tcg_gen_extu_ptr_i64(addr, tp);
4401         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4402
4403         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4404
4405         tcg_gen_add_ptr(tp, cpu_env, i);
4406         tcg_gen_addi_ptr(i, i, 8);
4407         tcg_gen_st_i64(t0, tp, vofs);
4408         tcg_temp_free_ptr(tp);
4409
4410         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4411         tcg_temp_free_ptr(i);
4412     }
4413
4414     /* Predicate register loads can be any multiple of 2.
4415      * Note that we still store the entire 64-bit unit into cpu_env.
4416      */
4417     if (len_remain) {
4418         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4419
4420         switch (len_remain) {
4421         case 2:
4422         case 4:
4423         case 8:
4424             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4425             break;
4426
4427         case 6:
4428             t1 = tcg_temp_new_i64();
4429             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4430             tcg_gen_addi_i64(addr, addr, 4);
4431             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4432             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4433             tcg_temp_free_i64(t1);
4434             break;
4435
4436         default:
4437             g_assert_not_reached();
4438         }
4439         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4440     }
4441     tcg_temp_free_i64(addr);
4442     tcg_temp_free_i64(t0);
4443 }
4444
4445 /* Similarly for stores.  */
4446 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4447                    int rn, int imm)
4448 {
4449     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4450     uint32_t len_remain = len % 8;
4451     uint32_t nparts = len / 8 + ctpop8(len_remain);
4452     int midx = get_mem_index(s);
4453     TCGv_i64 addr, t0;
4454
4455     addr = tcg_temp_new_i64();
4456     t0 = tcg_temp_new_i64();
4457
4458     /* Note that unpredicated load/store of vector/predicate registers
4459      * are defined as a stream of bytes, which equates to little-endian
4460      * operations on larger quantities.  There is no nice way to force
4461      * a little-endian store for aarch64_be-linux-user out of line.
4462      *
4463      * Attempt to keep code expansion to a minimum by limiting the
4464      * amount of unrolling done.
4465      */
4466     if (nparts <= 4) {
4467         int i;
4468
4469         for (i = 0; i < len_align; i += 8) {
4470             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4471             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4472             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4473         }
4474     } else {
4475         TCGLabel *loop = gen_new_label();
4476         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4477
4478         gen_set_label(loop);
4479
4480         t2 = tcg_temp_new_ptr();
4481         tcg_gen_add_ptr(t2, cpu_env, i);
4482         tcg_gen_ld_i64(t0, t2, vofs);
4483
4484         /* Minimize the number of local temps that must be re-read from
4485          * the stack each iteration.  Instead, re-compute values other
4486          * than the loop counter.
4487          */
4488         tcg_gen_addi_ptr(t2, i, imm);
4489         tcg_gen_extu_ptr_i64(addr, t2);
4490         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4491         tcg_temp_free_ptr(t2);
4492
4493         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4494
4495         tcg_gen_addi_ptr(i, i, 8);
4496
4497         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4498         tcg_temp_free_ptr(i);
4499     }
4500
4501     /* Predicate register stores can be any multiple of 2.  */
4502     if (len_remain) {
4503         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4504         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4505
4506         switch (len_remain) {
4507         case 2:
4508         case 4:
4509         case 8:
4510             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4511             break;
4512
4513         case 6:
4514             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4515             tcg_gen_addi_i64(addr, addr, 4);
4516             tcg_gen_shri_i64(t0, t0, 32);
4517             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4518             break;
4519
4520         default:
4521             g_assert_not_reached();
4522         }
4523     }
4524     tcg_temp_free_i64(addr);
4525     tcg_temp_free_i64(t0);
4526 }
4527
4528 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4529 {
4530     if (sve_access_check(s)) {
4531         int size = vec_full_reg_size(s);
4532         int off = vec_full_reg_offset(s, a->rd);
4533         do_ldr(s, off, size, a->rn, a->imm * size);
4534     }
4535     return true;
4536 }
4537
4538 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4539 {
4540     if (sve_access_check(s)) {
4541         int size = pred_full_reg_size(s);
4542         int off = pred_full_reg_offset(s, a->rd);
4543         do_ldr(s, off, size, a->rn, a->imm * size);
4544     }
4545     return true;
4546 }
4547
4548 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4549 {
4550     if (sve_access_check(s)) {
4551         int size = vec_full_reg_size(s);
4552         int off = vec_full_reg_offset(s, a->rd);
4553         do_str(s, off, size, a->rn, a->imm * size);
4554     }
4555     return true;
4556 }
4557
4558 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4559 {
4560     if (sve_access_check(s)) {
4561         int size = pred_full_reg_size(s);
4562         int off = pred_full_reg_offset(s, a->rd);
4563         do_str(s, off, size, a->rn, a->imm * size);
4564     }
4565     return true;
4566 }
4567
4568 /*
4569  *** SVE Memory - Contiguous Load Group
4570  */
4571
4572 /* The memory mode of the dtype.  */
4573 static const TCGMemOp dtype_mop[16] = {
4574     MO_UB, MO_UB, MO_UB, MO_UB,
4575     MO_SL, MO_UW, MO_UW, MO_UW,
4576     MO_SW, MO_SW, MO_UL, MO_UL,
4577     MO_SB, MO_SB, MO_SB, MO_Q
4578 };
4579
4580 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4581
4582 /* The vector element size of dtype.  */
4583 static const uint8_t dtype_esz[16] = {
4584     0, 1, 2, 3,
4585     3, 1, 2, 3,
4586     3, 2, 2, 3,
4587     3, 2, 1, 3
4588 };
4589
4590 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4591                        gen_helper_gvec_mem *fn)
4592 {
4593     unsigned vsz = vec_full_reg_size(s);
4594     TCGv_ptr t_pg;
4595     TCGv_i32 desc;
4596
4597     /* For e.g. LD4, there are not enough arguments to pass all 4
4598      * registers as pointers, so encode the regno into the data field.
4599      * For consistency, do this even for LD1.
4600      */
4601     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4602     t_pg = tcg_temp_new_ptr();
4603
4604     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4605     fn(cpu_env, t_pg, addr, desc);
4606
4607     tcg_temp_free_ptr(t_pg);
4608     tcg_temp_free_i32(desc);
4609 }
4610
4611 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4612                       TCGv_i64 addr, int dtype, int nreg)
4613 {
4614     static gen_helper_gvec_mem * const fns[16][4] = {
4615         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4616           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4617         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4618         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4619         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4620
4621         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4622         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4623           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4624         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4625         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4626
4627         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4628         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4629         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4630           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4631         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4632
4633         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4634         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4635         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4636         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4637           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4638     };
4639     gen_helper_gvec_mem *fn = fns[dtype][nreg];
4640
4641     /* While there are holes in the table, they are not
4642      * accessible via the instruction encoding.
4643      */
4644     assert(fn != NULL);
4645     do_mem_zpa(s, zt, pg, addr, fn);
4646 }
4647
4648 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4649 {
4650     if (a->rm == 31) {
4651         return false;
4652     }
4653     if (sve_access_check(s)) {
4654         TCGv_i64 addr = new_tmp_a64(s);
4655         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4656                          (a->nreg + 1) << dtype_msz(a->dtype));
4657         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4658         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4659     }
4660     return true;
4661 }
4662
4663 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4664 {
4665     if (sve_access_check(s)) {
4666         int vsz = vec_full_reg_size(s);
4667         int elements = vsz >> dtype_esz[a->dtype];
4668         TCGv_i64 addr = new_tmp_a64(s);
4669
4670         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4671                          (a->imm * elements * (a->nreg + 1))
4672                          << dtype_msz(a->dtype));
4673         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4674     }
4675     return true;
4676 }
4677
4678 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4679 {
4680     static gen_helper_gvec_mem * const fns[16] = {
4681         gen_helper_sve_ldff1bb_r,
4682         gen_helper_sve_ldff1bhu_r,
4683         gen_helper_sve_ldff1bsu_r,
4684         gen_helper_sve_ldff1bdu_r,
4685
4686         gen_helper_sve_ldff1sds_r,
4687         gen_helper_sve_ldff1hh_r,
4688         gen_helper_sve_ldff1hsu_r,
4689         gen_helper_sve_ldff1hdu_r,
4690
4691         gen_helper_sve_ldff1hds_r,
4692         gen_helper_sve_ldff1hss_r,
4693         gen_helper_sve_ldff1ss_r,
4694         gen_helper_sve_ldff1sdu_r,
4695
4696         gen_helper_sve_ldff1bds_r,
4697         gen_helper_sve_ldff1bss_r,
4698         gen_helper_sve_ldff1bhs_r,
4699         gen_helper_sve_ldff1dd_r,
4700     };
4701
4702     if (sve_access_check(s)) {
4703         TCGv_i64 addr = new_tmp_a64(s);
4704         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4705         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4706         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4707     }
4708     return true;
4709 }
4710
4711 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4712 {
4713     static gen_helper_gvec_mem * const fns[16] = {
4714         gen_helper_sve_ldnf1bb_r,
4715         gen_helper_sve_ldnf1bhu_r,
4716         gen_helper_sve_ldnf1bsu_r,
4717         gen_helper_sve_ldnf1bdu_r,
4718
4719         gen_helper_sve_ldnf1sds_r,
4720         gen_helper_sve_ldnf1hh_r,
4721         gen_helper_sve_ldnf1hsu_r,
4722         gen_helper_sve_ldnf1hdu_r,
4723
4724         gen_helper_sve_ldnf1hds_r,
4725         gen_helper_sve_ldnf1hss_r,
4726         gen_helper_sve_ldnf1ss_r,
4727         gen_helper_sve_ldnf1sdu_r,
4728
4729         gen_helper_sve_ldnf1bds_r,
4730         gen_helper_sve_ldnf1bss_r,
4731         gen_helper_sve_ldnf1bhs_r,
4732         gen_helper_sve_ldnf1dd_r,
4733     };
4734
4735     if (sve_access_check(s)) {
4736         int vsz = vec_full_reg_size(s);
4737         int elements = vsz >> dtype_esz[a->dtype];
4738         int off = (a->imm * elements) << dtype_msz(a->dtype);
4739         TCGv_i64 addr = new_tmp_a64(s);
4740
4741         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4742         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4743     }
4744     return true;
4745 }
4746
4747 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4748 {
4749     static gen_helper_gvec_mem * const fns[4] = {
4750         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4751         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4752     };
4753     unsigned vsz = vec_full_reg_size(s);
4754     TCGv_ptr t_pg;
4755     TCGv_i32 desc;
4756
4757     /* Load the first quadword using the normal predicated load helpers.  */
4758     desc = tcg_const_i32(simd_desc(16, 16, zt));
4759     t_pg = tcg_temp_new_ptr();
4760
4761     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4762     fns[msz](cpu_env, t_pg, addr, desc);
4763
4764     tcg_temp_free_ptr(t_pg);
4765     tcg_temp_free_i32(desc);
4766
4767     /* Replicate that first quadword.  */
4768     if (vsz > 16) {
4769         unsigned dofs = vec_full_reg_offset(s, zt);
4770         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4771     }
4772 }
4773
4774 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4775 {
4776     if (a->rm == 31) {
4777         return false;
4778     }
4779     if (sve_access_check(s)) {
4780         int msz = dtype_msz(a->dtype);
4781         TCGv_i64 addr = new_tmp_a64(s);
4782         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4783         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4784         do_ldrq(s, a->rd, a->pg, addr, msz);
4785     }
4786     return true;
4787 }
4788
4789 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4790 {
4791     if (sve_access_check(s)) {
4792         TCGv_i64 addr = new_tmp_a64(s);
4793         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4794         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4795     }
4796     return true;
4797 }
4798
4799 /* Load and broadcast element.  */
4800 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4801 {
4802     if (!sve_access_check(s)) {
4803         return true;
4804     }
4805
4806     unsigned vsz = vec_full_reg_size(s);
4807     unsigned psz = pred_full_reg_size(s);
4808     unsigned esz = dtype_esz[a->dtype];
4809     TCGLabel *over = gen_new_label();
4810     TCGv_i64 temp;
4811
4812     /* If the guarding predicate has no bits set, no load occurs.  */
4813     if (psz <= 8) {
4814         /* Reduce the pred_esz_masks value simply to reduce the
4815          * size of the code generated here.
4816          */
4817         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4818         temp = tcg_temp_new_i64();
4819         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4820         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4821         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4822         tcg_temp_free_i64(temp);
4823     } else {
4824         TCGv_i32 t32 = tcg_temp_new_i32();
4825         find_last_active(s, t32, esz, a->pg);
4826         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4827         tcg_temp_free_i32(t32);
4828     }
4829
4830     /* Load the data.  */
4831     temp = tcg_temp_new_i64();
4832     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4833     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4834                         s->be_data | dtype_mop[a->dtype]);
4835
4836     /* Broadcast to *all* elements.  */
4837     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4838                          vsz, vsz, temp);
4839     tcg_temp_free_i64(temp);
4840
4841     /* Zero the inactive elements.  */
4842     gen_set_label(over);
4843     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4844     return true;
4845 }
4846
4847 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4848                       int msz, int esz, int nreg)
4849 {
4850     static gen_helper_gvec_mem * const fn_single[4][4] = {
4851         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4852           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4853         { NULL,                   gen_helper_sve_st1hh_r,
4854           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4855         { NULL, NULL,
4856           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4857         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4858     };
4859     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4860         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4861           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4862         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4863           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4864         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4865           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4866     };
4867     gen_helper_gvec_mem *fn;
4868
4869     if (nreg == 0) {
4870         /* ST1 */
4871         fn = fn_single[msz][esz];
4872     } else {
4873         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4874         assert(msz == esz);
4875         fn = fn_multiple[nreg - 1][msz];
4876     }
4877     assert(fn != NULL);
4878     do_mem_zpa(s, zt, pg, addr, fn);
4879 }
4880
4881 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4882 {
4883     if (a->rm == 31 || a->msz > a->esz) {
4884         return false;
4885     }
4886     if (sve_access_check(s)) {
4887         TCGv_i64 addr = new_tmp_a64(s);
4888         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4889         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4890         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4891     }
4892     return true;
4893 }
4894
4895 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4896 {
4897     if (a->msz > a->esz) {
4898         return false;
4899     }
4900     if (sve_access_check(s)) {
4901         int vsz = vec_full_reg_size(s);
4902         int elements = vsz >> a->esz;
4903         TCGv_i64 addr = new_tmp_a64(s);
4904
4905         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4906                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4907         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4908     }
4909     return true;
4910 }
4911
4912 /*
4913  *** SVE gather loads / scatter stores
4914  */
4915
4916 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4917                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4918 {
4919     unsigned vsz = vec_full_reg_size(s);
4920     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4921     TCGv_ptr t_zm = tcg_temp_new_ptr();
4922     TCGv_ptr t_pg = tcg_temp_new_ptr();
4923     TCGv_ptr t_zt = tcg_temp_new_ptr();
4924
4925     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4926     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4927     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4928     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4929
4930     tcg_temp_free_ptr(t_zt);
4931     tcg_temp_free_ptr(t_zm);
4932     tcg_temp_free_ptr(t_pg);
4933     tcg_temp_free_i32(desc);
4934 }
4935
4936 /* Indexed by [ff][xs][u][msz].  */
4937 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4938     { { { gen_helper_sve_ldbss_zsu,
4939           gen_helper_sve_ldhss_zsu,
4940           NULL, },
4941         { gen_helper_sve_ldbsu_zsu,
4942           gen_helper_sve_ldhsu_zsu,
4943           gen_helper_sve_ldssu_zsu, } },
4944       { { gen_helper_sve_ldbss_zss,
4945           gen_helper_sve_ldhss_zss,
4946           NULL, },
4947         { gen_helper_sve_ldbsu_zss,
4948           gen_helper_sve_ldhsu_zss,
4949           gen_helper_sve_ldssu_zss, } } },
4950
4951     { { { gen_helper_sve_ldffbss_zsu,
4952           gen_helper_sve_ldffhss_zsu,
4953           NULL, },
4954         { gen_helper_sve_ldffbsu_zsu,
4955           gen_helper_sve_ldffhsu_zsu,
4956           gen_helper_sve_ldffssu_zsu, } },
4957       { { gen_helper_sve_ldffbss_zss,
4958           gen_helper_sve_ldffhss_zss,
4959           NULL, },
4960         { gen_helper_sve_ldffbsu_zss,
4961           gen_helper_sve_ldffhsu_zss,
4962           gen_helper_sve_ldffssu_zss, } } }
4963 };
4964
4965 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4966 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4967     { { { gen_helper_sve_ldbds_zsu,
4968           gen_helper_sve_ldhds_zsu,
4969           gen_helper_sve_ldsds_zsu,
4970           NULL, },
4971         { gen_helper_sve_ldbdu_zsu,
4972           gen_helper_sve_ldhdu_zsu,
4973           gen_helper_sve_ldsdu_zsu,
4974           gen_helper_sve_ldddu_zsu, } },
4975       { { gen_helper_sve_ldbds_zss,
4976           gen_helper_sve_ldhds_zss,
4977           gen_helper_sve_ldsds_zss,
4978           NULL, },
4979         { gen_helper_sve_ldbdu_zss,
4980           gen_helper_sve_ldhdu_zss,
4981           gen_helper_sve_ldsdu_zss,
4982           gen_helper_sve_ldddu_zss, } },
4983       { { gen_helper_sve_ldbds_zd,
4984           gen_helper_sve_ldhds_zd,
4985           gen_helper_sve_ldsds_zd,
4986           NULL, },
4987         { gen_helper_sve_ldbdu_zd,
4988           gen_helper_sve_ldhdu_zd,
4989           gen_helper_sve_ldsdu_zd,
4990           gen_helper_sve_ldddu_zd, } } },
4991
4992     { { { gen_helper_sve_ldffbds_zsu,
4993           gen_helper_sve_ldffhds_zsu,
4994           gen_helper_sve_ldffsds_zsu,
4995           NULL, },
4996         { gen_helper_sve_ldffbdu_zsu,
4997           gen_helper_sve_ldffhdu_zsu,
4998           gen_helper_sve_ldffsdu_zsu,
4999           gen_helper_sve_ldffddu_zsu, } },
5000       { { gen_helper_sve_ldffbds_zss,
5001           gen_helper_sve_ldffhds_zss,
5002           gen_helper_sve_ldffsds_zss,
5003           NULL, },
5004         { gen_helper_sve_ldffbdu_zss,
5005           gen_helper_sve_ldffhdu_zss,
5006           gen_helper_sve_ldffsdu_zss,
5007           gen_helper_sve_ldffddu_zss, } },
5008       { { gen_helper_sve_ldffbds_zd,
5009           gen_helper_sve_ldffhds_zd,
5010           gen_helper_sve_ldffsds_zd,
5011           NULL, },
5012         { gen_helper_sve_ldffbdu_zd,
5013           gen_helper_sve_ldffhdu_zd,
5014           gen_helper_sve_ldffsdu_zd,
5015           gen_helper_sve_ldffddu_zd, } } }
5016 };
5017
5018 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5019 {
5020     gen_helper_gvec_mem_scatter *fn = NULL;
5021
5022     if (!sve_access_check(s)) {
5023         return true;
5024     }
5025
5026     switch (a->esz) {
5027     case MO_32:
5028         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5029         break;
5030     case MO_64:
5031         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5032         break;
5033     }
5034     assert(fn != NULL);
5035
5036     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5037                cpu_reg_sp(s, a->rn), fn);
5038     return true;
5039 }
5040
5041 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5042 {
5043     gen_helper_gvec_mem_scatter *fn = NULL;
5044     TCGv_i64 imm;
5045
5046     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5047         return false;
5048     }
5049     if (!sve_access_check(s)) {
5050         return true;
5051     }
5052
5053     switch (a->esz) {
5054     case MO_32:
5055         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5056         break;
5057     case MO_64:
5058         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5059         break;
5060     }
5061     assert(fn != NULL);
5062
5063     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5064      * by loading the immediate into the scalar parameter.
5065      */
5066     imm = tcg_const_i64(a->imm << a->msz);
5067     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5068     tcg_temp_free_i64(imm);
5069     return true;
5070 }
5071
5072 /* Indexed by [xs][msz].  */
5073 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5074     { gen_helper_sve_stbs_zsu,
5075       gen_helper_sve_sths_zsu,
5076       gen_helper_sve_stss_zsu, },
5077     { gen_helper_sve_stbs_zss,
5078       gen_helper_sve_sths_zss,
5079       gen_helper_sve_stss_zss, },
5080 };
5081
5082 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5083 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5084     { gen_helper_sve_stbd_zsu,
5085       gen_helper_sve_sthd_zsu,
5086       gen_helper_sve_stsd_zsu,
5087       gen_helper_sve_stdd_zsu, },
5088     { gen_helper_sve_stbd_zss,
5089       gen_helper_sve_sthd_zss,
5090       gen_helper_sve_stsd_zss,
5091       gen_helper_sve_stdd_zss, },
5092     { gen_helper_sve_stbd_zd,
5093       gen_helper_sve_sthd_zd,
5094       gen_helper_sve_stsd_zd,
5095       gen_helper_sve_stdd_zd, },
5096 };
5097
5098 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5099 {
5100     gen_helper_gvec_mem_scatter *fn;
5101
5102     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5103         return false;
5104     }
5105     if (!sve_access_check(s)) {
5106         return true;
5107     }
5108     switch (a->esz) {
5109     case MO_32:
5110         fn = scatter_store_fn32[a->xs][a->msz];
5111         break;
5112     case MO_64:
5113         fn = scatter_store_fn64[a->xs][a->msz];
5114         break;
5115     default:
5116         g_assert_not_reached();
5117     }
5118     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5119                cpu_reg_sp(s, a->rn), fn);
5120     return true;
5121 }
5122
5123 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5124 {
5125     gen_helper_gvec_mem_scatter *fn = NULL;
5126     TCGv_i64 imm;
5127
5128     if (a->esz < a->msz) {
5129         return false;
5130     }
5131     if (!sve_access_check(s)) {
5132         return true;
5133     }
5134
5135     switch (a->esz) {
5136     case MO_32:
5137         fn = scatter_store_fn32[0][a->msz];
5138         break;
5139     case MO_64:
5140         fn = scatter_store_fn64[2][a->msz];
5141         break;
5142     }
5143     assert(fn != NULL);
5144
5145     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5146      * by loading the immediate into the scalar parameter.
5147      */
5148     imm = tcg_const_i64(a->imm << a->msz);
5149     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5150     tcg_temp_free_i64(imm);
5151     return true;
5152 }
5153
5154 /*
5155  * Prefetches
5156  */
5157
5158 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5159 {
5160     /* Prefetch is a nop within QEMU.  */
5161     (void)sve_access_check(s);
5162     return true;
5163 }
5164
5165 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5166 {
5167     if (a->rm == 31) {
5168         return false;
5169     }
5170     /* Prefetch is a nop within QEMU.  */
5171     (void)sve_access_check(s);
5172     return true;
5173 }
5174
5175 /*
5176  * Move Prefix
5177  *
5178  * TODO: The implementation so far could handle predicated merging movprfx.
5179  * The helper functions as written take an extra source register to
5180  * use in the operation, but the result is only written when predication
5181  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5182  * to allow the final write back to the destination to be unconditional.
5183  * For predicated zeroing movprfx, we need to rearrange the helpers to
5184  * allow the final write back to zero inactives.
5185  *
5186  * In the meantime, just emit the moves.
5187  */
5188
5189 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5190 {
5191     return do_mov_z(s, a->rd, a->rn);
5192 }
5193
5194 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5195 {
5196     if (sve_access_check(s)) {
5197         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5198     }
5199     return true;
5200 }
5201
5202 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5203 {
5204     if (sve_access_check(s)) {
5205         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5206     }
5207     return true;
5208 }