target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg/tcg-op.h"
  24 #include "tcg/tcg-op-gvec.h"
  25 #include "tcg/tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(DisasContext *s, int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(DisasContext *s, int x)
  64 {
  65     return (16 << tszimm_esz(s, x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(DisasContext *s, int x)
  70 {
  71     return x - (8 << tszimm_esz(s, x));
  72 }
  73
  74 static inline int plus1(DisasContext *s, int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(DisasContext *s, int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(DisasContext *s, int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(DisasContext *s, int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282 {
 283     return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284 }
 285
 286 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287 {
 288     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289 }
 290
 291 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292 {
 293     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294 }
 295
 296 /*
 297  *** SVE Integer Arithmetic - Unpredicated Group
 298  */
 299
 300 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301 {
 302     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303 }
 304
 305 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306 {
 307     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308 }
 309
 310 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311 {
 312     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313 }
 314
 315 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316 {
 317     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318 }
 319
 320 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321 {
 322     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323 }
 324
 325 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326 {
 327     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328 }
 329
 330 /*
 331  *** SVE Integer Arithmetic - Binary Predicated Group
 332  */
 333
 334 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335 {
 336     unsigned vsz = vec_full_reg_size(s);
 337     if (fn == NULL) {
 338         return false;
 339     }
 340     if (sve_access_check(s)) {
 341         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                            vec_full_reg_offset(s, a->rn),
 343                            vec_full_reg_offset(s, a->rm),
 344                            pred_full_reg_offset(s, a->pg),
 345                            vsz, vsz, 0, fn);
 346     }
 347     return true;
 348 }
 349
 350 /* Select active elememnts from Zn and inactive elements from Zm,
 351  * storing the result in Zd.
 352  */
 353 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354 {
 355     static gen_helper_gvec_4 * const fns[4] = {
 356         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358     };
 359     unsigned vsz = vec_full_reg_size(s);
 360     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                        vec_full_reg_offset(s, rn),
 362                        vec_full_reg_offset(s, rm),
 363                        pred_full_reg_offset(s, pg),
 364                        vsz, vsz, 0, fns[esz]);
 365 }
 366
 367 #define DO_ZPZZ(NAME, name) \
 368 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369 {                                                                         \
 370     static gen_helper_gvec_4 * const fns[4] = {                           \
 371         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373     };                                                                    \
 374     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375 }
 376
 377 DO_ZPZZ(AND, and)
 378 DO_ZPZZ(EOR, eor)
 379 DO_ZPZZ(ORR, orr)
 380 DO_ZPZZ(BIC, bic)
 381
 382 DO_ZPZZ(ADD, add)
 383 DO_ZPZZ(SUB, sub)
 384
 385 DO_ZPZZ(SMAX, smax)
 386 DO_ZPZZ(UMAX, umax)
 387 DO_ZPZZ(SMIN, smin)
 388 DO_ZPZZ(UMIN, umin)
 389 DO_ZPZZ(SABD, sabd)
 390 DO_ZPZZ(UABD, uabd)
 391
 392 DO_ZPZZ(MUL, mul)
 393 DO_ZPZZ(SMULH, smulh)
 394 DO_ZPZZ(UMULH, umulh)
 395
 396 DO_ZPZZ(ASR, asr)
 397 DO_ZPZZ(LSR, lsr)
 398 DO_ZPZZ(LSL, lsl)
 399
 400 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401 {
 402     static gen_helper_gvec_4 * const fns[4] = {
 403         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404     };
 405     return do_zpzz_ool(s, a, fns[a->esz]);
 406 }
 407
 408 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409 {
 410     static gen_helper_gvec_4 * const fns[4] = {
 411         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412     };
 413     return do_zpzz_ool(s, a, fns[a->esz]);
 414 }
 415
 416 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417 {
 418     if (sve_access_check(s)) {
 419         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420     }
 421     return true;
 422 }
 423
 424 #undef DO_ZPZZ
 425
 426 /*
 427  *** SVE Integer Arithmetic - Unary Predicated Group
 428  */
 429
 430 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431 {
 432     if (fn == NULL) {
 433         return false;
 434     }
 435     if (sve_access_check(s)) {
 436         unsigned vsz = vec_full_reg_size(s);
 437         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                            vec_full_reg_offset(s, a->rn),
 439                            pred_full_reg_offset(s, a->pg),
 440                            vsz, vsz, 0, fn);
 441     }
 442     return true;
 443 }
 444
 445 #define DO_ZPZ(NAME, name) \
 446 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447 {                                                                   \
 448     static gen_helper_gvec_3 * const fns[4] = {                     \
 449         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451     };                                                              \
 452     return do_zpz_ool(s, a, fns[a->esz]);                           \
 453 }
 454
 455 DO_ZPZ(CLS, cls)
 456 DO_ZPZ(CLZ, clz)
 457 DO_ZPZ(CNT_zpz, cnt_zpz)
 458 DO_ZPZ(CNOT, cnot)
 459 DO_ZPZ(NOT_zpz, not_zpz)
 460 DO_ZPZ(ABS, abs)
 461 DO_ZPZ(NEG, neg)
 462
 463 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464 {
 465     static gen_helper_gvec_3 * const fns[4] = {
 466         NULL,
 467         gen_helper_sve_fabs_h,
 468         gen_helper_sve_fabs_s,
 469         gen_helper_sve_fabs_d
 470     };
 471     return do_zpz_ool(s, a, fns[a->esz]);
 472 }
 473
 474 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475 {
 476     static gen_helper_gvec_3 * const fns[4] = {
 477         NULL,
 478         gen_helper_sve_fneg_h,
 479         gen_helper_sve_fneg_s,
 480         gen_helper_sve_fneg_d
 481     };
 482     return do_zpz_ool(s, a, fns[a->esz]);
 483 }
 484
 485 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486 {
 487     static gen_helper_gvec_3 * const fns[4] = {
 488         NULL,
 489         gen_helper_sve_sxtb_h,
 490         gen_helper_sve_sxtb_s,
 491         gen_helper_sve_sxtb_d
 492     };
 493     return do_zpz_ool(s, a, fns[a->esz]);
 494 }
 495
 496 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497 {
 498     static gen_helper_gvec_3 * const fns[4] = {
 499         NULL,
 500         gen_helper_sve_uxtb_h,
 501         gen_helper_sve_uxtb_s,
 502         gen_helper_sve_uxtb_d
 503     };
 504     return do_zpz_ool(s, a, fns[a->esz]);
 505 }
 506
 507 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508 {
 509     static gen_helper_gvec_3 * const fns[4] = {
 510         NULL, NULL,
 511         gen_helper_sve_sxth_s,
 512         gen_helper_sve_sxth_d
 513     };
 514     return do_zpz_ool(s, a, fns[a->esz]);
 515 }
 516
 517 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518 {
 519     static gen_helper_gvec_3 * const fns[4] = {
 520         NULL, NULL,
 521         gen_helper_sve_uxth_s,
 522         gen_helper_sve_uxth_d
 523     };
 524     return do_zpz_ool(s, a, fns[a->esz]);
 525 }
 526
 527 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528 {
 529     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530 }
 531
 532 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535 }
 536
 537 #undef DO_ZPZ
 538
 539 /*
 540  *** SVE Integer Reduction Group
 541  */
 542
 543 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                        gen_helper_gvec_reduc *fn)
 546 {
 547     unsigned vsz = vec_full_reg_size(s);
 548     TCGv_ptr t_zn, t_pg;
 549     TCGv_i32 desc;
 550     TCGv_i64 temp;
 551
 552     if (fn == NULL) {
 553         return false;
 554     }
 555     if (!sve_access_check(s)) {
 556         return true;
 557     }
 558
 559     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560     temp = tcg_temp_new_i64();
 561     t_zn = tcg_temp_new_ptr();
 562     t_pg = tcg_temp_new_ptr();
 563
 564     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566     fn(temp, t_zn, t_pg, desc);
 567     tcg_temp_free_ptr(t_zn);
 568     tcg_temp_free_ptr(t_pg);
 569     tcg_temp_free_i32(desc);
 570
 571     write_fp_dreg(s, a->rd, temp);
 572     tcg_temp_free_i64(temp);
 573     return true;
 574 }
 575
 576 #define DO_VPZ(NAME, name) \
 577 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578 {                                                                        \
 579     static gen_helper_gvec_reduc * const fns[4] = {                      \
 580         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582     };                                                                   \
 583     return do_vpz_ool(s, a, fns[a->esz]);                                \
 584 }
 585
 586 DO_VPZ(ORV, orv)
 587 DO_VPZ(ANDV, andv)
 588 DO_VPZ(EORV, eorv)
 589
 590 DO_VPZ(UADDV, uaddv)
 591 DO_VPZ(SMAXV, smaxv)
 592 DO_VPZ(UMAXV, umaxv)
 593 DO_VPZ(SMINV, sminv)
 594 DO_VPZ(UMINV, uminv)
 595
 596 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597 {
 598     static gen_helper_gvec_reduc * const fns[4] = {
 599         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600         gen_helper_sve_saddv_s, NULL
 601     };
 602     return do_vpz_ool(s, a, fns[a->esz]);
 603 }
 604
 605 #undef DO_VPZ
 606
 607 /*
 608  *** SVE Shift by Immediate - Predicated Group
 609  */
 610
 611 /* Store zero into every active element of Zd.  We will use this for two
 612  * and three-operand predicated instructions for which logic dictates a
 613  * zero result.
 614  */
 615 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616 {
 617     static gen_helper_gvec_2 * const fns[4] = {
 618         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620     };
 621     if (sve_access_check(s)) {
 622         unsigned vsz = vec_full_reg_size(s);
 623         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                            pred_full_reg_offset(s, pg),
 625                            vsz, vsz, 0, fns[esz]);
 626     }
 627     return true;
 628 }
 629
 630 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 631 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632 {
 633     static gen_helper_gvec_3 * const fns[4] = {
 634         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636     };
 637     unsigned vsz = vec_full_reg_size(s);
 638     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                        vec_full_reg_offset(s, rn),
 640                        pred_full_reg_offset(s, pg),
 641                        vsz, vsz, 0, fns[esz]);
 642 }
 643
 644 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                         gen_helper_gvec_3 *fn)
 646 {
 647     if (sve_access_check(s)) {
 648         unsigned vsz = vec_full_reg_size(s);
 649         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                            vec_full_reg_offset(s, a->rn),
 651                            pred_full_reg_offset(s, a->pg),
 652                            vsz, vsz, a->imm, fn);
 653     }
 654     return true;
 655 }
 656
 657 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658 {
 659     static gen_helper_gvec_3 * const fns[4] = {
 660         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662     };
 663     if (a->esz < 0) {
 664         /* Invalid tsz encoding -- see tszimm_esz. */
 665         return false;
 666     }
 667     /* Shift by element size is architecturally valid.  For
 668        arithmetic right-shift, it's the same as by one less. */
 669     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670     return do_zpzi_ool(s, a, fns[a->esz]);
 671 }
 672
 673 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674 {
 675     static gen_helper_gvec_3 * const fns[4] = {
 676         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678     };
 679     if (a->esz < 0) {
 680         return false;
 681     }
 682     /* Shift by element size is architecturally valid.
 683        For logical shifts, it is a zeroing operation.  */
 684     if (a->imm >= (8 << a->esz)) {
 685         return do_clr_zp(s, a->rd, a->pg, a->esz);
 686     } else {
 687         return do_zpzi_ool(s, a, fns[a->esz]);
 688     }
 689 }
 690
 691 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692 {
 693     static gen_helper_gvec_3 * const fns[4] = {
 694         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696     };
 697     if (a->esz < 0) {
 698         return false;
 699     }
 700     /* Shift by element size is architecturally valid.
 701        For logical shifts, it is a zeroing operation.  */
 702     if (a->imm >= (8 << a->esz)) {
 703         return do_clr_zp(s, a->rd, a->pg, a->esz);
 704     } else {
 705         return do_zpzi_ool(s, a, fns[a->esz]);
 706     }
 707 }
 708
 709 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710 {
 711     static gen_helper_gvec_3 * const fns[4] = {
 712         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714     };
 715     if (a->esz < 0) {
 716         return false;
 717     }
 718     /* Shift by element size is architecturally valid.  For arithmetic
 719        right shift for division, it is a zeroing operation.  */
 720     if (a->imm >= (8 << a->esz)) {
 721         return do_clr_zp(s, a->rd, a->pg, a->esz);
 722     } else {
 723         return do_zpzi_ool(s, a, fns[a->esz]);
 724     }
 725 }
 726
 727 /*
 728  *** SVE Bitwise Shift - Predicated Group
 729  */
 730
 731 #define DO_ZPZW(NAME, name) \
 732 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733 {                                                                         \
 734     static gen_helper_gvec_4 * const fns[3] = {                           \
 735         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736         gen_helper_sve_##name##_zpzw_s,                                   \
 737     };                                                                    \
 738     if (a->esz < 0 || a->esz >= 3) {                                      \
 739         return false;                                                     \
 740     }                                                                     \
 741     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742 }
 743
 744 DO_ZPZW(ASR, asr)
 745 DO_ZPZW(LSR, lsr)
 746 DO_ZPZW(LSL, lsl)
 747
 748 #undef DO_ZPZW
 749
 750 /*
 751  *** SVE Bitwise Shift - Unpredicated Group
 752  */
 753
 754 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                          int64_t, uint32_t, uint32_t))
 757 {
 758     if (a->esz < 0) {
 759         /* Invalid tsz encoding -- see tszimm_esz. */
 760         return false;
 761     }
 762     if (sve_access_check(s)) {
 763         unsigned vsz = vec_full_reg_size(s);
 764         /* Shift by element size is architecturally valid.  For
 765            arithmetic right-shift, it's the same as by one less.
 766            Otherwise it is a zeroing operation.  */
 767         if (a->imm >= 8 << a->esz) {
 768             if (asr) {
 769                 a->imm = (8 << a->esz) - 1;
 770             } else {
 771                 do_dupi_z(s, a->rd, 0);
 772                 return true;
 773             }
 774         }
 775         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777     }
 778     return true;
 779 }
 780
 781 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782 {
 783     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784 }
 785
 786 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787 {
 788     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789 }
 790
 791 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792 {
 793     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794 }
 795
 796 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797 {
 798     if (fn == NULL) {
 799         return false;
 800     }
 801     if (sve_access_check(s)) {
 802         unsigned vsz = vec_full_reg_size(s);
 803         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                            vec_full_reg_offset(s, a->rn),
 805                            vec_full_reg_offset(s, a->rm),
 806                            vsz, vsz, 0, fn);
 807     }
 808     return true;
 809 }
 810
 811 #define DO_ZZW(NAME, name) \
 812 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813 {                                                                         \
 814     static gen_helper_gvec_3 * const fns[4] = {                           \
 815         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816         gen_helper_sve_##name##_zzw_s, NULL                               \
 817     };                                                                    \
 818     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819 }
 820
 821 DO_ZZW(ASR, asr)
 822 DO_ZZW(LSR, lsr)
 823 DO_ZZW(LSL, lsl)
 824
 825 #undef DO_ZZW
 826
 827 /*
 828  *** SVE Integer Multiply-Add Group
 829  */
 830
 831 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                          gen_helper_gvec_5 *fn)
 833 {
 834     if (sve_access_check(s)) {
 835         unsigned vsz = vec_full_reg_size(s);
 836         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                            vec_full_reg_offset(s, a->ra),
 838                            vec_full_reg_offset(s, a->rn),
 839                            vec_full_reg_offset(s, a->rm),
 840                            pred_full_reg_offset(s, a->pg),
 841                            vsz, vsz, 0, fn);
 842     }
 843     return true;
 844 }
 845
 846 #define DO_ZPZZZ(NAME, name) \
 847 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848 {                                                                    \
 849     static gen_helper_gvec_5 * const fns[4] = {                      \
 850         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852     };                                                               \
 853     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854 }
 855
 856 DO_ZPZZZ(MLA, mla)
 857 DO_ZPZZZ(MLS, mls)
 858
 859 #undef DO_ZPZZZ
 860
 861 /*
 862  *** SVE Index Generation Group
 863  */
 864
 865 static void do_index(DisasContext *s, int esz, int rd,
 866                      TCGv_i64 start, TCGv_i64 incr)
 867 {
 868     unsigned vsz = vec_full_reg_size(s);
 869     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870     TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873     if (esz == 3) {
 874         gen_helper_sve_index_d(t_zd, start, incr, desc);
 875     } else {
 876         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877         static index_fn * const fns[3] = {
 878             gen_helper_sve_index_b,
 879             gen_helper_sve_index_h,
 880             gen_helper_sve_index_s,
 881         };
 882         TCGv_i32 s32 = tcg_temp_new_i32();
 883         TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885         tcg_gen_extrl_i64_i32(s32, start);
 886         tcg_gen_extrl_i64_i32(i32, incr);
 887         fns[esz](t_zd, s32, i32, desc);
 888
 889         tcg_temp_free_i32(s32);
 890         tcg_temp_free_i32(i32);
 891     }
 892     tcg_temp_free_ptr(t_zd);
 893     tcg_temp_free_i32(desc);
 894 }
 895
 896 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897 {
 898     if (sve_access_check(s)) {
 899         TCGv_i64 start = tcg_const_i64(a->imm1);
 900         TCGv_i64 incr = tcg_const_i64(a->imm2);
 901         do_index(s, a->esz, a->rd, start, incr);
 902         tcg_temp_free_i64(start);
 903         tcg_temp_free_i64(incr);
 904     }
 905     return true;
 906 }
 907
 908 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909 {
 910     if (sve_access_check(s)) {
 911         TCGv_i64 start = tcg_const_i64(a->imm);
 912         TCGv_i64 incr = cpu_reg(s, a->rm);
 913         do_index(s, a->esz, a->rd, start, incr);
 914         tcg_temp_free_i64(start);
 915     }
 916     return true;
 917 }
 918
 919 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920 {
 921     if (sve_access_check(s)) {
 922         TCGv_i64 start = cpu_reg(s, a->rn);
 923         TCGv_i64 incr = tcg_const_i64(a->imm);
 924         do_index(s, a->esz, a->rd, start, incr);
 925         tcg_temp_free_i64(incr);
 926     }
 927     return true;
 928 }
 929
 930 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931 {
 932     if (sve_access_check(s)) {
 933         TCGv_i64 start = cpu_reg(s, a->rn);
 934         TCGv_i64 incr = cpu_reg(s, a->rm);
 935         do_index(s, a->esz, a->rd, start, incr);
 936     }
 937     return true;
 938 }
 939
 940 /*
 941  *** SVE Stack Allocation Group
 942  */
 943
 944 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945 {
 946     if (sve_access_check(s)) {
 947         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 948         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 949         tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 950     }
 951     return true;
 952 }
 953
 954 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 955 {
 956     if (sve_access_check(s)) {
 957         TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 958         TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 959         tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 960     }
 961     return true;
 962 }
 963
 964 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965 {
 966     if (sve_access_check(s)) {
 967         TCGv_i64 reg = cpu_reg(s, a->rd);
 968         tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 969     }
 970     return true;
 971 }
 972
 973 /*
 974  *** SVE Compute Vector Address Group
 975  */
 976
 977 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 978 {
 979     if (sve_access_check(s)) {
 980         unsigned vsz = vec_full_reg_size(s);
 981         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 982                            vec_full_reg_offset(s, a->rn),
 983                            vec_full_reg_offset(s, a->rm),
 984                            vsz, vsz, a->imm, fn);
 985     }
 986     return true;
 987 }
 988
 989 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 990 {
 991     return do_adr(s, a, gen_helper_sve_adr_p32);
 992 }
 993
 994 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 995 {
 996     return do_adr(s, a, gen_helper_sve_adr_p64);
 997 }
 998
 999 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1000 {
1001     return do_adr(s, a, gen_helper_sve_adr_s32);
1002 }
1003
1004 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1005 {
1006     return do_adr(s, a, gen_helper_sve_adr_u32);
1007 }
1008
1009 /*
1010  *** SVE Integer Misc - Unpredicated Group
1011  */
1012
1013 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1014 {
1015     static gen_helper_gvec_2 * const fns[4] = {
1016         NULL,
1017         gen_helper_sve_fexpa_h,
1018         gen_helper_sve_fexpa_s,
1019         gen_helper_sve_fexpa_d,
1020     };
1021     if (a->esz == 0) {
1022         return false;
1023     }
1024     if (sve_access_check(s)) {
1025         unsigned vsz = vec_full_reg_size(s);
1026         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027                            vec_full_reg_offset(s, a->rn),
1028                            vsz, vsz, 0, fns[a->esz]);
1029     }
1030     return true;
1031 }
1032
1033 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1034 {
1035     static gen_helper_gvec_3 * const fns[4] = {
1036         NULL,
1037         gen_helper_sve_ftssel_h,
1038         gen_helper_sve_ftssel_s,
1039         gen_helper_sve_ftssel_d,
1040     };
1041     if (a->esz == 0) {
1042         return false;
1043     }
1044     if (sve_access_check(s)) {
1045         unsigned vsz = vec_full_reg_size(s);
1046         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047                            vec_full_reg_offset(s, a->rn),
1048                            vec_full_reg_offset(s, a->rm),
1049                            vsz, vsz, 0, fns[a->esz]);
1050     }
1051     return true;
1052 }
1053
1054 /*
1055  *** SVE Predicate Logical Operations Group
1056  */
1057
1058 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059                           const GVecGen4 *gvec_op)
1060 {
1061     if (!sve_access_check(s)) {
1062         return true;
1063     }
1064
1065     unsigned psz = pred_gvec_reg_size(s);
1066     int dofs = pred_full_reg_offset(s, a->rd);
1067     int nofs = pred_full_reg_offset(s, a->rn);
1068     int mofs = pred_full_reg_offset(s, a->rm);
1069     int gofs = pred_full_reg_offset(s, a->pg);
1070
1071     if (psz == 8) {
1072         /* Do the operation and the flags generation in temps.  */
1073         TCGv_i64 pd = tcg_temp_new_i64();
1074         TCGv_i64 pn = tcg_temp_new_i64();
1075         TCGv_i64 pm = tcg_temp_new_i64();
1076         TCGv_i64 pg = tcg_temp_new_i64();
1077
1078         tcg_gen_ld_i64(pn, cpu_env, nofs);
1079         tcg_gen_ld_i64(pm, cpu_env, mofs);
1080         tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082         gvec_op->fni8(pd, pn, pm, pg);
1083         tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085         do_predtest1(pd, pg);
1086
1087         tcg_temp_free_i64(pd);
1088         tcg_temp_free_i64(pn);
1089         tcg_temp_free_i64(pm);
1090         tcg_temp_free_i64(pg);
1091     } else {
1092         /* The operation and flags generation is large.  The computation
1093          * of the flags depends on the original contents of the guarding
1094          * predicate.  If the destination overwrites the guarding predicate,
1095          * then the easiest way to get this right is to save a copy.
1096           */
1097         int tofs = gofs;
1098         if (a->rd == a->pg) {
1099             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101         }
1102
1103         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104         do_predtest(s, dofs, tofs, psz / 8);
1105     }
1106     return true;
1107 }
1108
1109 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110 {
1111     tcg_gen_and_i64(pd, pn, pm);
1112     tcg_gen_and_i64(pd, pd, pg);
1113 }
1114
1115 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                            TCGv_vec pm, TCGv_vec pg)
1117 {
1118     tcg_gen_and_vec(vece, pd, pn, pm);
1119     tcg_gen_and_vec(vece, pd, pd, pg);
1120 }
1121
1122 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1123 {
1124     static const GVecGen4 op = {
1125         .fni8 = gen_and_pg_i64,
1126         .fniv = gen_and_pg_vec,
1127         .fno = gen_helper_sve_and_pppp,
1128         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129     };
1130     if (a->s) {
1131         return do_pppp_flags(s, a, &op);
1132     } else if (a->rn == a->rm) {
1133         if (a->pg == a->rn) {
1134             return do_mov_p(s, a->rd, a->rn);
1135         } else {
1136             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137         }
1138     } else if (a->pg == a->rn || a->pg == a->rm) {
1139         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140     } else {
1141         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142     }
1143 }
1144
1145 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146 {
1147     tcg_gen_andc_i64(pd, pn, pm);
1148     tcg_gen_and_i64(pd, pd, pg);
1149 }
1150
1151 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152                            TCGv_vec pm, TCGv_vec pg)
1153 {
1154     tcg_gen_andc_vec(vece, pd, pn, pm);
1155     tcg_gen_and_vec(vece, pd, pd, pg);
1156 }
1157
1158 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1159 {
1160     static const GVecGen4 op = {
1161         .fni8 = gen_bic_pg_i64,
1162         .fniv = gen_bic_pg_vec,
1163         .fno = gen_helper_sve_bic_pppp,
1164         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165     };
1166     if (a->s) {
1167         return do_pppp_flags(s, a, &op);
1168     } else if (a->pg == a->rn) {
1169         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170     } else {
1171         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172     }
1173 }
1174
1175 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176 {
1177     tcg_gen_xor_i64(pd, pn, pm);
1178     tcg_gen_and_i64(pd, pd, pg);
1179 }
1180
1181 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182                            TCGv_vec pm, TCGv_vec pg)
1183 {
1184     tcg_gen_xor_vec(vece, pd, pn, pm);
1185     tcg_gen_and_vec(vece, pd, pd, pg);
1186 }
1187
1188 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1189 {
1190     static const GVecGen4 op = {
1191         .fni8 = gen_eor_pg_i64,
1192         .fniv = gen_eor_pg_vec,
1193         .fno = gen_helper_sve_eor_pppp,
1194         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195     };
1196     if (a->s) {
1197         return do_pppp_flags(s, a, &op);
1198     } else {
1199         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200     }
1201 }
1202
1203 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204 {
1205     tcg_gen_and_i64(pn, pn, pg);
1206     tcg_gen_andc_i64(pm, pm, pg);
1207     tcg_gen_or_i64(pd, pn, pm);
1208 }
1209
1210 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211                            TCGv_vec pm, TCGv_vec pg)
1212 {
1213     tcg_gen_and_vec(vece, pn, pn, pg);
1214     tcg_gen_andc_vec(vece, pm, pm, pg);
1215     tcg_gen_or_vec(vece, pd, pn, pm);
1216 }
1217
1218 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1219 {
1220     static const GVecGen4 op = {
1221         .fni8 = gen_sel_pg_i64,
1222         .fniv = gen_sel_pg_vec,
1223         .fno = gen_helper_sve_sel_pppp,
1224         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225     };
1226     if (a->s) {
1227         return false;
1228     } else {
1229         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230     }
1231 }
1232
1233 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234 {
1235     tcg_gen_or_i64(pd, pn, pm);
1236     tcg_gen_and_i64(pd, pd, pg);
1237 }
1238
1239 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240                            TCGv_vec pm, TCGv_vec pg)
1241 {
1242     tcg_gen_or_vec(vece, pd, pn, pm);
1243     tcg_gen_and_vec(vece, pd, pd, pg);
1244 }
1245
1246 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1247 {
1248     static const GVecGen4 op = {
1249         .fni8 = gen_orr_pg_i64,
1250         .fniv = gen_orr_pg_vec,
1251         .fno = gen_helper_sve_orr_pppp,
1252         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253     };
1254     if (a->s) {
1255         return do_pppp_flags(s, a, &op);
1256     } else if (a->pg == a->rn && a->rn == a->rm) {
1257         return do_mov_p(s, a->rd, a->rn);
1258     } else {
1259         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260     }
1261 }
1262
1263 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264 {
1265     tcg_gen_orc_i64(pd, pn, pm);
1266     tcg_gen_and_i64(pd, pd, pg);
1267 }
1268
1269 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270                            TCGv_vec pm, TCGv_vec pg)
1271 {
1272     tcg_gen_orc_vec(vece, pd, pn, pm);
1273     tcg_gen_and_vec(vece, pd, pd, pg);
1274 }
1275
1276 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1277 {
1278     static const GVecGen4 op = {
1279         .fni8 = gen_orn_pg_i64,
1280         .fniv = gen_orn_pg_vec,
1281         .fno = gen_helper_sve_orn_pppp,
1282         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283     };
1284     if (a->s) {
1285         return do_pppp_flags(s, a, &op);
1286     } else {
1287         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288     }
1289 }
1290
1291 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292 {
1293     tcg_gen_or_i64(pd, pn, pm);
1294     tcg_gen_andc_i64(pd, pg, pd);
1295 }
1296
1297 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298                            TCGv_vec pm, TCGv_vec pg)
1299 {
1300     tcg_gen_or_vec(vece, pd, pn, pm);
1301     tcg_gen_andc_vec(vece, pd, pg, pd);
1302 }
1303
1304 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1305 {
1306     static const GVecGen4 op = {
1307         .fni8 = gen_nor_pg_i64,
1308         .fniv = gen_nor_pg_vec,
1309         .fno = gen_helper_sve_nor_pppp,
1310         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311     };
1312     if (a->s) {
1313         return do_pppp_flags(s, a, &op);
1314     } else {
1315         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316     }
1317 }
1318
1319 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320 {
1321     tcg_gen_and_i64(pd, pn, pm);
1322     tcg_gen_andc_i64(pd, pg, pd);
1323 }
1324
1325 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                            TCGv_vec pm, TCGv_vec pg)
1327 {
1328     tcg_gen_and_vec(vece, pd, pn, pm);
1329     tcg_gen_andc_vec(vece, pd, pg, pd);
1330 }
1331
1332 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1333 {
1334     static const GVecGen4 op = {
1335         .fni8 = gen_nand_pg_i64,
1336         .fniv = gen_nand_pg_vec,
1337         .fno = gen_helper_sve_nand_pppp,
1338         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339     };
1340     if (a->s) {
1341         return do_pppp_flags(s, a, &op);
1342     } else {
1343         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344     }
1345 }
1346
1347 /*
1348  *** SVE Predicate Misc Group
1349  */
1350
1351 static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1352 {
1353     if (sve_access_check(s)) {
1354         int nofs = pred_full_reg_offset(s, a->rn);
1355         int gofs = pred_full_reg_offset(s, a->pg);
1356         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358         if (words == 1) {
1359             TCGv_i64 pn = tcg_temp_new_i64();
1360             TCGv_i64 pg = tcg_temp_new_i64();
1361
1362             tcg_gen_ld_i64(pn, cpu_env, nofs);
1363             tcg_gen_ld_i64(pg, cpu_env, gofs);
1364             do_predtest1(pn, pg);
1365
1366             tcg_temp_free_i64(pn);
1367             tcg_temp_free_i64(pg);
1368         } else {
1369             do_predtest(s, nofs, gofs, words);
1370         }
1371     }
1372     return true;
1373 }
1374
1375 /* See the ARM pseudocode DecodePredCount.  */
1376 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377 {
1378     unsigned elements = fullsz >> esz;
1379     unsigned bound;
1380
1381     switch (pattern) {
1382     case 0x0: /* POW2 */
1383         return pow2floor(elements);
1384     case 0x1: /* VL1 */
1385     case 0x2: /* VL2 */
1386     case 0x3: /* VL3 */
1387     case 0x4: /* VL4 */
1388     case 0x5: /* VL5 */
1389     case 0x6: /* VL6 */
1390     case 0x7: /* VL7 */
1391     case 0x8: /* VL8 */
1392         bound = pattern;
1393         break;
1394     case 0x9: /* VL16 */
1395     case 0xa: /* VL32 */
1396     case 0xb: /* VL64 */
1397     case 0xc: /* VL128 */
1398     case 0xd: /* VL256 */
1399         bound = 16 << (pattern - 9);
1400         break;
1401     case 0x1d: /* MUL4 */
1402         return elements - elements % 4;
1403     case 0x1e: /* MUL3 */
1404         return elements - elements % 3;
1405     case 0x1f: /* ALL */
1406         return elements;
1407     default:   /* #uimm5 */
1408         return 0;
1409     }
1410     return elements >= bound ? bound : 0;
1411 }
1412
1413 /* This handles all of the predicate initialization instructions,
1414  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1415  * so that decode_pred_count returns 0.  For SETFFR, we will have
1416  * set RD == 16 == FFR.
1417  */
1418 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419 {
1420     if (!sve_access_check(s)) {
1421         return true;
1422     }
1423
1424     unsigned fullsz = vec_full_reg_size(s);
1425     unsigned ofs = pred_full_reg_offset(s, rd);
1426     unsigned numelem, setsz, i;
1427     uint64_t word, lastword;
1428     TCGv_i64 t;
1429
1430     numelem = decode_pred_count(fullsz, pat, esz);
1431
1432     /* Determine what we must store into each bit, and how many.  */
1433     if (numelem == 0) {
1434         lastword = word = 0;
1435         setsz = fullsz;
1436     } else {
1437         setsz = numelem << esz;
1438         lastword = word = pred_esz_masks[esz];
1439         if (setsz % 64) {
1440             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1441         }
1442     }
1443
1444     t = tcg_temp_new_i64();
1445     if (fullsz <= 64) {
1446         tcg_gen_movi_i64(t, lastword);
1447         tcg_gen_st_i64(t, cpu_env, ofs);
1448         goto done;
1449     }
1450
1451     if (word == lastword) {
1452         unsigned maxsz = size_for_gvec(fullsz / 8);
1453         unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455         if (oprsz * 8 == setsz) {
1456             tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1457             goto done;
1458         }
1459     }
1460
1461     setsz /= 8;
1462     fullsz /= 8;
1463
1464     tcg_gen_movi_i64(t, word);
1465     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466         tcg_gen_st_i64(t, cpu_env, ofs + i);
1467     }
1468     if (lastword != word) {
1469         tcg_gen_movi_i64(t, lastword);
1470         tcg_gen_st_i64(t, cpu_env, ofs + i);
1471         i += 8;
1472     }
1473     if (i < fullsz) {
1474         tcg_gen_movi_i64(t, 0);
1475         for (; i < fullsz; i += 8) {
1476             tcg_gen_st_i64(t, cpu_env, ofs + i);
1477         }
1478     }
1479
1480  done:
1481     tcg_temp_free_i64(t);
1482
1483     /* PTRUES */
1484     if (setflag) {
1485         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486         tcg_gen_movi_i32(cpu_CF, word == 0);
1487         tcg_gen_movi_i32(cpu_VF, 0);
1488         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489     }
1490     return true;
1491 }
1492
1493 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1494 {
1495     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496 }
1497
1498 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1499 {
1500     /* Note pat == 31 is #all, to set all elements.  */
1501     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502 }
1503
1504 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1505 {
1506     /* Note pat == 32 is #unimp, to set no elements.  */
1507     return do_predset(s, 0, a->rd, 32, false);
1508 }
1509
1510 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1511 {
1512     /* The path through do_pppp_flags is complicated enough to want to avoid
1513      * duplication.  Frob the arguments into the form of a predicated AND.
1514      */
1515     arg_rprr_s alt_a = {
1516         .rd = a->rd, .pg = a->pg, .s = a->s,
1517         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518     };
1519     return trans_AND_pppp(s, &alt_a);
1520 }
1521
1522 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1523 {
1524     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525 }
1526
1527 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1528 {
1529     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530 }
1531
1532 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534                                            TCGv_ptr, TCGv_i32))
1535 {
1536     if (!sve_access_check(s)) {
1537         return true;
1538     }
1539
1540     TCGv_ptr t_pd = tcg_temp_new_ptr();
1541     TCGv_ptr t_pg = tcg_temp_new_ptr();
1542     TCGv_i32 t;
1543     unsigned desc;
1544
1545     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550     t = tcg_const_i32(desc);
1551
1552     gen_fn(t, t_pd, t_pg, t);
1553     tcg_temp_free_ptr(t_pd);
1554     tcg_temp_free_ptr(t_pg);
1555
1556     do_pred_flags(t);
1557     tcg_temp_free_i32(t);
1558     return true;
1559 }
1560
1561 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1562 {
1563     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564 }
1565
1566 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1567 {
1568     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569 }
1570
1571 /*
1572  *** SVE Element Count Group
1573  */
1574
1575 /* Perform an inline saturating addition of a 32-bit value within
1576  * a 64-bit register.  The second operand is known to be positive,
1577  * which halves the comparisions we must perform to bound the result.
1578  */
1579 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580 {
1581     int64_t ibound;
1582     TCGv_i64 bound;
1583     TCGCond cond;
1584
1585     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1586     if (u) {
1587         tcg_gen_ext32u_i64(reg, reg);
1588     } else {
1589         tcg_gen_ext32s_i64(reg, reg);
1590     }
1591     if (d) {
1592         tcg_gen_sub_i64(reg, reg, val);
1593         ibound = (u ? 0 : INT32_MIN);
1594         cond = TCG_COND_LT;
1595     } else {
1596         tcg_gen_add_i64(reg, reg, val);
1597         ibound = (u ? UINT32_MAX : INT32_MAX);
1598         cond = TCG_COND_GT;
1599     }
1600     bound = tcg_const_i64(ibound);
1601     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602     tcg_temp_free_i64(bound);
1603 }
1604
1605 /* Similarly with 64-bit values.  */
1606 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607 {
1608     TCGv_i64 t0 = tcg_temp_new_i64();
1609     TCGv_i64 t1 = tcg_temp_new_i64();
1610     TCGv_i64 t2;
1611
1612     if (u) {
1613         if (d) {
1614             tcg_gen_sub_i64(t0, reg, val);
1615             tcg_gen_movi_i64(t1, 0);
1616             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617         } else {
1618             tcg_gen_add_i64(t0, reg, val);
1619             tcg_gen_movi_i64(t1, -1);
1620             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621         }
1622     } else {
1623         if (d) {
1624             /* Detect signed overflow for subtraction.  */
1625             tcg_gen_xor_i64(t0, reg, val);
1626             tcg_gen_sub_i64(t1, reg, val);
1627             tcg_gen_xor_i64(reg, reg, t1);
1628             tcg_gen_and_i64(t0, t0, reg);
1629
1630             /* Bound the result.  */
1631             tcg_gen_movi_i64(reg, INT64_MIN);
1632             t2 = tcg_const_i64(0);
1633             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634         } else {
1635             /* Detect signed overflow for addition.  */
1636             tcg_gen_xor_i64(t0, reg, val);
1637             tcg_gen_add_i64(reg, reg, val);
1638             tcg_gen_xor_i64(t1, reg, val);
1639             tcg_gen_andc_i64(t0, t1, t0);
1640
1641             /* Bound the result.  */
1642             tcg_gen_movi_i64(t1, INT64_MAX);
1643             t2 = tcg_const_i64(0);
1644             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645         }
1646         tcg_temp_free_i64(t2);
1647     }
1648     tcg_temp_free_i64(t0);
1649     tcg_temp_free_i64(t1);
1650 }
1651
1652 /* Similarly with a vector and a scalar operand.  */
1653 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654                               TCGv_i64 val, bool u, bool d)
1655 {
1656     unsigned vsz = vec_full_reg_size(s);
1657     TCGv_ptr dptr, nptr;
1658     TCGv_i32 t32, desc;
1659     TCGv_i64 t64;
1660
1661     dptr = tcg_temp_new_ptr();
1662     nptr = tcg_temp_new_ptr();
1663     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667     switch (esz) {
1668     case MO_8:
1669         t32 = tcg_temp_new_i32();
1670         tcg_gen_extrl_i64_i32(t32, val);
1671         if (d) {
1672             tcg_gen_neg_i32(t32, t32);
1673         }
1674         if (u) {
1675             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676         } else {
1677             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678         }
1679         tcg_temp_free_i32(t32);
1680         break;
1681
1682     case MO_16:
1683         t32 = tcg_temp_new_i32();
1684         tcg_gen_extrl_i64_i32(t32, val);
1685         if (d) {
1686             tcg_gen_neg_i32(t32, t32);
1687         }
1688         if (u) {
1689             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690         } else {
1691             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692         }
1693         tcg_temp_free_i32(t32);
1694         break;
1695
1696     case MO_32:
1697         t64 = tcg_temp_new_i64();
1698         if (d) {
1699             tcg_gen_neg_i64(t64, val);
1700         } else {
1701             tcg_gen_mov_i64(t64, val);
1702         }
1703         if (u) {
1704             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705         } else {
1706             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707         }
1708         tcg_temp_free_i64(t64);
1709         break;
1710
1711     case MO_64:
1712         if (u) {
1713             if (d) {
1714                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715             } else {
1716                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717             }
1718         } else if (d) {
1719             t64 = tcg_temp_new_i64();
1720             tcg_gen_neg_i64(t64, val);
1721             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722             tcg_temp_free_i64(t64);
1723         } else {
1724             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725         }
1726         break;
1727
1728     default:
1729         g_assert_not_reached();
1730     }
1731
1732     tcg_temp_free_ptr(dptr);
1733     tcg_temp_free_ptr(nptr);
1734     tcg_temp_free_i32(desc);
1735 }
1736
1737 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1738 {
1739     if (sve_access_check(s)) {
1740         unsigned fullsz = vec_full_reg_size(s);
1741         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743     }
1744     return true;
1745 }
1746
1747 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1748 {
1749     if (sve_access_check(s)) {
1750         unsigned fullsz = vec_full_reg_size(s);
1751         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752         int inc = numelem * a->imm * (a->d ? -1 : 1);
1753         TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755         tcg_gen_addi_i64(reg, reg, inc);
1756     }
1757     return true;
1758 }
1759
1760 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1761 {
1762     if (!sve_access_check(s)) {
1763         return true;
1764     }
1765
1766     unsigned fullsz = vec_full_reg_size(s);
1767     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768     int inc = numelem * a->imm;
1769     TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1772     if (inc == 0) {
1773         if (a->u) {
1774             tcg_gen_ext32u_i64(reg, reg);
1775         } else {
1776             tcg_gen_ext32s_i64(reg, reg);
1777         }
1778     } else {
1779         TCGv_i64 t = tcg_const_i64(inc);
1780         do_sat_addsub_32(reg, t, a->u, a->d);
1781         tcg_temp_free_i64(t);
1782     }
1783     return true;
1784 }
1785
1786 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1787 {
1788     if (!sve_access_check(s)) {
1789         return true;
1790     }
1791
1792     unsigned fullsz = vec_full_reg_size(s);
1793     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794     int inc = numelem * a->imm;
1795     TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797     if (inc != 0) {
1798         TCGv_i64 t = tcg_const_i64(inc);
1799         do_sat_addsub_64(reg, t, a->u, a->d);
1800         tcg_temp_free_i64(t);
1801     }
1802     return true;
1803 }
1804
1805 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1806 {
1807     if (a->esz == 0) {
1808         return false;
1809     }
1810
1811     unsigned fullsz = vec_full_reg_size(s);
1812     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813     int inc = numelem * a->imm;
1814
1815     if (inc != 0) {
1816         if (sve_access_check(s)) {
1817             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819                               vec_full_reg_offset(s, a->rn),
1820                               t, fullsz, fullsz);
1821             tcg_temp_free_i64(t);
1822         }
1823     } else {
1824         do_mov_z(s, a->rd, a->rn);
1825     }
1826     return true;
1827 }
1828
1829 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1830 {
1831     if (a->esz == 0) {
1832         return false;
1833     }
1834
1835     unsigned fullsz = vec_full_reg_size(s);
1836     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837     int inc = numelem * a->imm;
1838
1839     if (inc != 0) {
1840         if (sve_access_check(s)) {
1841             TCGv_i64 t = tcg_const_i64(inc);
1842             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843             tcg_temp_free_i64(t);
1844         }
1845     } else {
1846         do_mov_z(s, a->rd, a->rn);
1847     }
1848     return true;
1849 }
1850
1851 /*
1852  *** SVE Bitwise Immediate Group
1853  */
1854
1855 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856 {
1857     uint64_t imm;
1858     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859                                 extract32(a->dbm, 0, 6),
1860                                 extract32(a->dbm, 6, 6))) {
1861         return false;
1862     }
1863     if (sve_access_check(s)) {
1864         unsigned vsz = vec_full_reg_size(s);
1865         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867     }
1868     return true;
1869 }
1870
1871 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1872 {
1873     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874 }
1875
1876 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1877 {
1878     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879 }
1880
1881 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1882 {
1883     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884 }
1885
1886 static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1887 {
1888     uint64_t imm;
1889     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890                                 extract32(a->dbm, 0, 6),
1891                                 extract32(a->dbm, 6, 6))) {
1892         return false;
1893     }
1894     if (sve_access_check(s)) {
1895         do_dupi_z(s, a->rd, imm);
1896     }
1897     return true;
1898 }
1899
1900 /*
1901  *** SVE Integer Wide Immediate - Predicated Group
1902  */
1903
1904 /* Implement all merging copies.  This is used for CPY (immediate),
1905  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906  */
1907 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908                      TCGv_i64 val)
1909 {
1910     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911     static gen_cpy * const fns[4] = {
1912         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914     };
1915     unsigned vsz = vec_full_reg_size(s);
1916     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917     TCGv_ptr t_zd = tcg_temp_new_ptr();
1918     TCGv_ptr t_zn = tcg_temp_new_ptr();
1919     TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925     fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927     tcg_temp_free_ptr(t_zd);
1928     tcg_temp_free_ptr(t_zn);
1929     tcg_temp_free_ptr(t_pg);
1930     tcg_temp_free_i32(desc);
1931 }
1932
1933 static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1934 {
1935     if (a->esz == 0) {
1936         return false;
1937     }
1938     if (sve_access_check(s)) {
1939         /* Decode the VFP immediate.  */
1940         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941         TCGv_i64 t_imm = tcg_const_i64(imm);
1942         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943         tcg_temp_free_i64(t_imm);
1944     }
1945     return true;
1946 }
1947
1948 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1949 {
1950     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951         return false;
1952     }
1953     if (sve_access_check(s)) {
1954         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956         tcg_temp_free_i64(t_imm);
1957     }
1958     return true;
1959 }
1960
1961 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1962 {
1963     static gen_helper_gvec_2i * const fns[4] = {
1964         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966     };
1967
1968     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969         return false;
1970     }
1971     if (sve_access_check(s)) {
1972         unsigned vsz = vec_full_reg_size(s);
1973         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975                             pred_full_reg_offset(s, a->pg),
1976                             t_imm, vsz, vsz, 0, fns[a->esz]);
1977         tcg_temp_free_i64(t_imm);
1978     }
1979     return true;
1980 }
1981
1982 /*
1983  *** SVE Permute Extract Group
1984  */
1985
1986 static bool trans_EXT(DisasContext *s, arg_EXT *a)
1987 {
1988     if (!sve_access_check(s)) {
1989         return true;
1990     }
1991
1992     unsigned vsz = vec_full_reg_size(s);
1993     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994     unsigned n_siz = vsz - n_ofs;
1995     unsigned d = vec_full_reg_offset(s, a->rd);
1996     unsigned n = vec_full_reg_offset(s, a->rn);
1997     unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999     /* Use host vector move insns if we have appropriate sizes
2000      * and no unfortunate overlap.
2001      */
2002     if (m != d
2003         && n_ofs == size_for_gvec(n_ofs)
2004         && n_siz == size_for_gvec(n_siz)
2005         && (d != n || n_siz <= n_ofs)) {
2006         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007         if (n_ofs != 0) {
2008             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009         }
2010     } else {
2011         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012     }
2013     return true;
2014 }
2015
2016 /*
2017  *** SVE Permute - Unpredicated Group
2018  */
2019
2020 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2021 {
2022     if (sve_access_check(s)) {
2023         unsigned vsz = vec_full_reg_size(s);
2024         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025                              vsz, vsz, cpu_reg_sp(s, a->rn));
2026     }
2027     return true;
2028 }
2029
2030 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2031 {
2032     if ((a->imm & 0x1f) == 0) {
2033         return false;
2034     }
2035     if (sve_access_check(s)) {
2036         unsigned vsz = vec_full_reg_size(s);
2037         unsigned dofs = vec_full_reg_offset(s, a->rd);
2038         unsigned esz, index;
2039
2040         esz = ctz32(a->imm);
2041         index = a->imm >> (esz + 1);
2042
2043         if ((index << esz) < vsz) {
2044             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046         } else {
2047             /*
2048              * While dup_mem handles 128-bit elements, dup_imm does not.
2049              * Thankfully element size doesn't matter for splatting zero.
2050              */
2051             tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2052         }
2053     }
2054     return true;
2055 }
2056
2057 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058 {
2059     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060     static gen_insr * const fns[4] = {
2061         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063     };
2064     unsigned vsz = vec_full_reg_size(s);
2065     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066     TCGv_ptr t_zd = tcg_temp_new_ptr();
2067     TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072     fns[a->esz](t_zd, t_zn, val, desc);
2073
2074     tcg_temp_free_ptr(t_zd);
2075     tcg_temp_free_ptr(t_zn);
2076     tcg_temp_free_i32(desc);
2077 }
2078
2079 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2080 {
2081     if (sve_access_check(s)) {
2082         TCGv_i64 t = tcg_temp_new_i64();
2083         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084         do_insr_i64(s, a, t);
2085         tcg_temp_free_i64(t);
2086     }
2087     return true;
2088 }
2089
2090 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2091 {
2092     if (sve_access_check(s)) {
2093         do_insr_i64(s, a, cpu_reg(s, a->rm));
2094     }
2095     return true;
2096 }
2097
2098 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2099 {
2100     static gen_helper_gvec_2 * const fns[4] = {
2101         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103     };
2104
2105     if (sve_access_check(s)) {
2106         unsigned vsz = vec_full_reg_size(s);
2107         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108                            vec_full_reg_offset(s, a->rn),
2109                            vsz, vsz, 0, fns[a->esz]);
2110     }
2111     return true;
2112 }
2113
2114 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2115 {
2116     static gen_helper_gvec_3 * const fns[4] = {
2117         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119     };
2120
2121     if (sve_access_check(s)) {
2122         unsigned vsz = vec_full_reg_size(s);
2123         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124                            vec_full_reg_offset(s, a->rn),
2125                            vec_full_reg_offset(s, a->rm),
2126                            vsz, vsz, 0, fns[a->esz]);
2127     }
2128     return true;
2129 }
2130
2131 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2132 {
2133     static gen_helper_gvec_2 * const fns[4][2] = {
2134         { NULL, NULL },
2135         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138     };
2139
2140     if (a->esz == 0) {
2141         return false;
2142     }
2143     if (sve_access_check(s)) {
2144         unsigned vsz = vec_full_reg_size(s);
2145         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146                            vec_full_reg_offset(s, a->rn)
2147                            + (a->h ? vsz / 2 : 0),
2148                            vsz, vsz, 0, fns[a->esz][a->u]);
2149     }
2150     return true;
2151 }
2152
2153 /*
2154  *** SVE Permute - Predicates Group
2155  */
2156
2157 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158                           gen_helper_gvec_3 *fn)
2159 {
2160     if (!sve_access_check(s)) {
2161         return true;
2162     }
2163
2164     unsigned vsz = pred_full_reg_size(s);
2165
2166     /* Predicate sizes may be smaller and cannot use simd_desc.
2167        We cannot round up, as we do elsewhere, because we need
2168        the exact size for ZIP2 and REV.  We retain the style for
2169        the other helpers for consistency.  */
2170     TCGv_ptr t_d = tcg_temp_new_ptr();
2171     TCGv_ptr t_n = tcg_temp_new_ptr();
2172     TCGv_ptr t_m = tcg_temp_new_ptr();
2173     TCGv_i32 t_desc;
2174     int desc;
2175
2176     desc = vsz - 2;
2177     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183     t_desc = tcg_const_i32(desc);
2184
2185     fn(t_d, t_n, t_m, t_desc);
2186
2187     tcg_temp_free_ptr(t_d);
2188     tcg_temp_free_ptr(t_n);
2189     tcg_temp_free_ptr(t_m);
2190     tcg_temp_free_i32(t_desc);
2191     return true;
2192 }
2193
2194 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195                           gen_helper_gvec_2 *fn)
2196 {
2197     if (!sve_access_check(s)) {
2198         return true;
2199     }
2200
2201     unsigned vsz = pred_full_reg_size(s);
2202     TCGv_ptr t_d = tcg_temp_new_ptr();
2203     TCGv_ptr t_n = tcg_temp_new_ptr();
2204     TCGv_i32 t_desc;
2205     int desc;
2206
2207     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210     /* Predicate sizes may be smaller and cannot use simd_desc.
2211        We cannot round up, as we do elsewhere, because we need
2212        the exact size for ZIP2 and REV.  We retain the style for
2213        the other helpers for consistency.  */
2214
2215     desc = vsz - 2;
2216     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218     t_desc = tcg_const_i32(desc);
2219
2220     fn(t_d, t_n, t_desc);
2221
2222     tcg_temp_free_i32(t_desc);
2223     tcg_temp_free_ptr(t_d);
2224     tcg_temp_free_ptr(t_n);
2225     return true;
2226 }
2227
2228 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2229 {
2230     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231 }
2232
2233 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2234 {
2235     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236 }
2237
2238 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2239 {
2240     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241 }
2242
2243 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2244 {
2245     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246 }
2247
2248 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2249 {
2250     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251 }
2252
2253 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2254 {
2255     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256 }
2257
2258 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2259 {
2260     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261 }
2262
2263 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2264 {
2265     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266 }
2267
2268 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2269 {
2270     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271 }
2272
2273 /*
2274  *** SVE Permute - Interleaving Group
2275  */
2276
2277 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278 {
2279     static gen_helper_gvec_3 * const fns[4] = {
2280         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282     };
2283
2284     if (sve_access_check(s)) {
2285         unsigned vsz = vec_full_reg_size(s);
2286         unsigned high_ofs = high ? vsz / 2 : 0;
2287         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288                            vec_full_reg_offset(s, a->rn) + high_ofs,
2289                            vec_full_reg_offset(s, a->rm) + high_ofs,
2290                            vsz, vsz, 0, fns[a->esz]);
2291     }
2292     return true;
2293 }
2294
2295 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296                             gen_helper_gvec_3 *fn)
2297 {
2298     if (sve_access_check(s)) {
2299         unsigned vsz = vec_full_reg_size(s);
2300         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301                            vec_full_reg_offset(s, a->rn),
2302                            vec_full_reg_offset(s, a->rm),
2303                            vsz, vsz, data, fn);
2304     }
2305     return true;
2306 }
2307
2308 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2309 {
2310     return do_zip(s, a, false);
2311 }
2312
2313 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2314 {
2315     return do_zip(s, a, true);
2316 }
2317
2318 static gen_helper_gvec_3 * const uzp_fns[4] = {
2319     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321 };
2322
2323 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2324 {
2325     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326 }
2327
2328 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2329 {
2330     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331 }
2332
2333 static gen_helper_gvec_3 * const trn_fns[4] = {
2334     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336 };
2337
2338 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2339 {
2340     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341 }
2342
2343 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2344 {
2345     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346 }
2347
2348 /*
2349  *** SVE Permute Vector - Predicated Group
2350  */
2351
2352 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2353 {
2354     static gen_helper_gvec_3 * const fns[4] = {
2355         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356     };
2357     return do_zpz_ool(s, a, fns[a->esz]);
2358 }
2359
2360 /* Call the helper that computes the ARM LastActiveElement pseudocode
2361  * function, scaled by the element size.  This includes the not found
2362  * indication; e.g. not found for esz=3 is -8.
2363  */
2364 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365 {
2366     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2367      * round up, as we do elsewhere, because we need the exact size.
2368      */
2369     TCGv_ptr t_p = tcg_temp_new_ptr();
2370     TCGv_i32 t_desc;
2371     unsigned vsz = pred_full_reg_size(s);
2372     unsigned desc;
2373
2374     desc = vsz - 2;
2375     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378     t_desc = tcg_const_i32(desc);
2379
2380     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382     tcg_temp_free_i32(t_desc);
2383     tcg_temp_free_ptr(t_p);
2384 }
2385
2386 /* Increment LAST to the offset of the next element in the vector,
2387  * wrapping around to 0.
2388  */
2389 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391     unsigned vsz = vec_full_reg_size(s);
2392
2393     tcg_gen_addi_i32(last, last, 1 << esz);
2394     if (is_power_of_2(vsz)) {
2395         tcg_gen_andi_i32(last, last, vsz - 1);
2396     } else {
2397         TCGv_i32 max = tcg_const_i32(vsz);
2398         TCGv_i32 zero = tcg_const_i32(0);
2399         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400         tcg_temp_free_i32(max);
2401         tcg_temp_free_i32(zero);
2402     }
2403 }
2404
2405 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2406 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407 {
2408     unsigned vsz = vec_full_reg_size(s);
2409
2410     if (is_power_of_2(vsz)) {
2411         tcg_gen_andi_i32(last, last, vsz - 1);
2412     } else {
2413         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414         TCGv_i32 zero = tcg_const_i32(0);
2415         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416         tcg_temp_free_i32(max);
2417         tcg_temp_free_i32(zero);
2418     }
2419 }
2420
2421 /* Load an unsigned element of ESZ from BASE+OFS.  */
2422 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423 {
2424     TCGv_i64 r = tcg_temp_new_i64();
2425
2426     switch (esz) {
2427     case 0:
2428         tcg_gen_ld8u_i64(r, base, ofs);
2429         break;
2430     case 1:
2431         tcg_gen_ld16u_i64(r, base, ofs);
2432         break;
2433     case 2:
2434         tcg_gen_ld32u_i64(r, base, ofs);
2435         break;
2436     case 3:
2437         tcg_gen_ld_i64(r, base, ofs);
2438         break;
2439     default:
2440         g_assert_not_reached();
2441     }
2442     return r;
2443 }
2444
2445 /* Load an unsigned element of ESZ from RM[LAST].  */
2446 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447                                  int rm, int esz)
2448 {
2449     TCGv_ptr p = tcg_temp_new_ptr();
2450     TCGv_i64 r;
2451
2452     /* Convert offset into vector into offset into ENV.
2453      * The final adjustment for the vector register base
2454      * is added via constant offset to the load.
2455      */
2456 #ifdef HOST_WORDS_BIGENDIAN
2457     /* Adjust for element ordering.  See vec_reg_offset.  */
2458     if (esz < 3) {
2459         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460     }
2461 #endif
2462     tcg_gen_ext_i32_ptr(p, last);
2463     tcg_gen_add_ptr(p, p, cpu_env);
2464
2465     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466     tcg_temp_free_ptr(p);
2467
2468     return r;
2469 }
2470
2471 /* Compute CLAST for a Zreg.  */
2472 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473 {
2474     TCGv_i32 last;
2475     TCGLabel *over;
2476     TCGv_i64 ele;
2477     unsigned vsz, esz = a->esz;
2478
2479     if (!sve_access_check(s)) {
2480         return true;
2481     }
2482
2483     last = tcg_temp_local_new_i32();
2484     over = gen_new_label();
2485
2486     find_last_active(s, last, esz, a->pg);
2487
2488     /* There is of course no movcond for a 2048-bit vector,
2489      * so we must branch over the actual store.
2490      */
2491     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493     if (!before) {
2494         incr_last_active(s, last, esz);
2495     }
2496
2497     ele = load_last_active(s, last, a->rm, esz);
2498     tcg_temp_free_i32(last);
2499
2500     vsz = vec_full_reg_size(s);
2501     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502     tcg_temp_free_i64(ele);
2503
2504     /* If this insn used MOVPRFX, we may need a second move.  */
2505     if (a->rd != a->rn) {
2506         TCGLabel *done = gen_new_label();
2507         tcg_gen_br(done);
2508
2509         gen_set_label(over);
2510         do_mov_z(s, a->rd, a->rn);
2511
2512         gen_set_label(done);
2513     } else {
2514         gen_set_label(over);
2515     }
2516     return true;
2517 }
2518
2519 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2520 {
2521     return do_clast_vector(s, a, false);
2522 }
2523
2524 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2525 {
2526     return do_clast_vector(s, a, true);
2527 }
2528
2529 /* Compute CLAST for a scalar.  */
2530 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531                             bool before, TCGv_i64 reg_val)
2532 {
2533     TCGv_i32 last = tcg_temp_new_i32();
2534     TCGv_i64 ele, cmp, zero;
2535
2536     find_last_active(s, last, esz, pg);
2537
2538     /* Extend the original value of last prior to incrementing.  */
2539     cmp = tcg_temp_new_i64();
2540     tcg_gen_ext_i32_i64(cmp, last);
2541
2542     if (!before) {
2543         incr_last_active(s, last, esz);
2544     }
2545
2546     /* The conceit here is that while last < 0 indicates not found, after
2547      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548      * from which we can load garbage.  We then discard the garbage with
2549      * a conditional move.
2550      */
2551     ele = load_last_active(s, last, rm, esz);
2552     tcg_temp_free_i32(last);
2553
2554     zero = tcg_const_i64(0);
2555     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557     tcg_temp_free_i64(zero);
2558     tcg_temp_free_i64(cmp);
2559     tcg_temp_free_i64(ele);
2560 }
2561
2562 /* Compute CLAST for a Vreg.  */
2563 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564 {
2565     if (sve_access_check(s)) {
2566         int esz = a->esz;
2567         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571         write_fp_dreg(s, a->rd, reg);
2572         tcg_temp_free_i64(reg);
2573     }
2574     return true;
2575 }
2576
2577 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2578 {
2579     return do_clast_fp(s, a, false);
2580 }
2581
2582 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2583 {
2584     return do_clast_fp(s, a, true);
2585 }
2586
2587 /* Compute CLAST for a Xreg.  */
2588 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589 {
2590     TCGv_i64 reg;
2591
2592     if (!sve_access_check(s)) {
2593         return true;
2594     }
2595
2596     reg = cpu_reg(s, a->rd);
2597     switch (a->esz) {
2598     case 0:
2599         tcg_gen_ext8u_i64(reg, reg);
2600         break;
2601     case 1:
2602         tcg_gen_ext16u_i64(reg, reg);
2603         break;
2604     case 2:
2605         tcg_gen_ext32u_i64(reg, reg);
2606         break;
2607     case 3:
2608         break;
2609     default:
2610         g_assert_not_reached();
2611     }
2612
2613     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614     return true;
2615 }
2616
2617 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2618 {
2619     return do_clast_general(s, a, false);
2620 }
2621
2622 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2623 {
2624     return do_clast_general(s, a, true);
2625 }
2626
2627 /* Compute LAST for a scalar.  */
2628 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629                                int pg, int rm, bool before)
2630 {
2631     TCGv_i32 last = tcg_temp_new_i32();
2632     TCGv_i64 ret;
2633
2634     find_last_active(s, last, esz, pg);
2635     if (before) {
2636         wrap_last_active(s, last, esz);
2637     } else {
2638         incr_last_active(s, last, esz);
2639     }
2640
2641     ret = load_last_active(s, last, rm, esz);
2642     tcg_temp_free_i32(last);
2643     return ret;
2644 }
2645
2646 /* Compute LAST for a Vreg.  */
2647 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648 {
2649     if (sve_access_check(s)) {
2650         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651         write_fp_dreg(s, a->rd, val);
2652         tcg_temp_free_i64(val);
2653     }
2654     return true;
2655 }
2656
2657 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2658 {
2659     return do_last_fp(s, a, false);
2660 }
2661
2662 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2663 {
2664     return do_last_fp(s, a, true);
2665 }
2666
2667 /* Compute LAST for a Xreg.  */
2668 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669 {
2670     if (sve_access_check(s)) {
2671         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673         tcg_temp_free_i64(val);
2674     }
2675     return true;
2676 }
2677
2678 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2679 {
2680     return do_last_general(s, a, false);
2681 }
2682
2683 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2684 {
2685     return do_last_general(s, a, true);
2686 }
2687
2688 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2689 {
2690     if (sve_access_check(s)) {
2691         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692     }
2693     return true;
2694 }
2695
2696 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2697 {
2698     if (sve_access_check(s)) {
2699         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702         tcg_temp_free_i64(t);
2703     }
2704     return true;
2705 }
2706
2707 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2708 {
2709     static gen_helper_gvec_3 * const fns[4] = {
2710         NULL,
2711         gen_helper_sve_revb_h,
2712         gen_helper_sve_revb_s,
2713         gen_helper_sve_revb_d,
2714     };
2715     return do_zpz_ool(s, a, fns[a->esz]);
2716 }
2717
2718 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2719 {
2720     static gen_helper_gvec_3 * const fns[4] = {
2721         NULL,
2722         NULL,
2723         gen_helper_sve_revh_s,
2724         gen_helper_sve_revh_d,
2725     };
2726     return do_zpz_ool(s, a, fns[a->esz]);
2727 }
2728
2729 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2730 {
2731     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732 }
2733
2734 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2735 {
2736     static gen_helper_gvec_3 * const fns[4] = {
2737         gen_helper_sve_rbit_b,
2738         gen_helper_sve_rbit_h,
2739         gen_helper_sve_rbit_s,
2740         gen_helper_sve_rbit_d,
2741     };
2742     return do_zpz_ool(s, a, fns[a->esz]);
2743 }
2744
2745 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2746 {
2747     if (sve_access_check(s)) {
2748         unsigned vsz = vec_full_reg_size(s);
2749         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750                            vec_full_reg_offset(s, a->rn),
2751                            vec_full_reg_offset(s, a->rm),
2752                            pred_full_reg_offset(s, a->pg),
2753                            vsz, vsz, a->esz, gen_helper_sve_splice);
2754     }
2755     return true;
2756 }
2757
2758 /*
2759  *** SVE Integer Compare - Vectors Group
2760  */
2761
2762 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763                           gen_helper_gvec_flags_4 *gen_fn)
2764 {
2765     TCGv_ptr pd, zn, zm, pg;
2766     unsigned vsz;
2767     TCGv_i32 t;
2768
2769     if (gen_fn == NULL) {
2770         return false;
2771     }
2772     if (!sve_access_check(s)) {
2773         return true;
2774     }
2775
2776     vsz = vec_full_reg_size(s);
2777     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778     pd = tcg_temp_new_ptr();
2779     zn = tcg_temp_new_ptr();
2780     zm = tcg_temp_new_ptr();
2781     pg = tcg_temp_new_ptr();
2782
2783     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788     gen_fn(t, pd, zn, zm, pg, t);
2789
2790     tcg_temp_free_ptr(pd);
2791     tcg_temp_free_ptr(zn);
2792     tcg_temp_free_ptr(zm);
2793     tcg_temp_free_ptr(pg);
2794
2795     do_pred_flags(t);
2796
2797     tcg_temp_free_i32(t);
2798     return true;
2799 }
2800
2801 #define DO_PPZZ(NAME, name) \
2802 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2803 {                                                                         \
2804     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2805         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2806         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2807     };                                                                    \
2808     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2809 }
2810
2811 DO_PPZZ(CMPEQ, cmpeq)
2812 DO_PPZZ(CMPNE, cmpne)
2813 DO_PPZZ(CMPGT, cmpgt)
2814 DO_PPZZ(CMPGE, cmpge)
2815 DO_PPZZ(CMPHI, cmphi)
2816 DO_PPZZ(CMPHS, cmphs)
2817
2818 #undef DO_PPZZ
2819
2820 #define DO_PPZW(NAME, name) \
2821 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2822 {                                                                         \
2823     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2824         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2825         gen_helper_sve_##name##_ppzw_s, NULL                              \
2826     };                                                                    \
2827     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2828 }
2829
2830 DO_PPZW(CMPEQ, cmpeq)
2831 DO_PPZW(CMPNE, cmpne)
2832 DO_PPZW(CMPGT, cmpgt)
2833 DO_PPZW(CMPGE, cmpge)
2834 DO_PPZW(CMPHI, cmphi)
2835 DO_PPZW(CMPHS, cmphs)
2836 DO_PPZW(CMPLT, cmplt)
2837 DO_PPZW(CMPLE, cmple)
2838 DO_PPZW(CMPLO, cmplo)
2839 DO_PPZW(CMPLS, cmpls)
2840
2841 #undef DO_PPZW
2842
2843 /*
2844  *** SVE Integer Compare - Immediate Groups
2845  */
2846
2847 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2848                           gen_helper_gvec_flags_3 *gen_fn)
2849 {
2850     TCGv_ptr pd, zn, pg;
2851     unsigned vsz;
2852     TCGv_i32 t;
2853
2854     if (gen_fn == NULL) {
2855         return false;
2856     }
2857     if (!sve_access_check(s)) {
2858         return true;
2859     }
2860
2861     vsz = vec_full_reg_size(s);
2862     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2863     pd = tcg_temp_new_ptr();
2864     zn = tcg_temp_new_ptr();
2865     pg = tcg_temp_new_ptr();
2866
2867     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2868     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2869     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2870
2871     gen_fn(t, pd, zn, pg, t);
2872
2873     tcg_temp_free_ptr(pd);
2874     tcg_temp_free_ptr(zn);
2875     tcg_temp_free_ptr(pg);
2876
2877     do_pred_flags(t);
2878
2879     tcg_temp_free_i32(t);
2880     return true;
2881 }
2882
2883 #define DO_PPZI(NAME, name) \
2884 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2885 {                                                                         \
2886     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2887         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2888         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2889     };                                                                    \
2890     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2891 }
2892
2893 DO_PPZI(CMPEQ, cmpeq)
2894 DO_PPZI(CMPNE, cmpne)
2895 DO_PPZI(CMPGT, cmpgt)
2896 DO_PPZI(CMPGE, cmpge)
2897 DO_PPZI(CMPHI, cmphi)
2898 DO_PPZI(CMPHS, cmphs)
2899 DO_PPZI(CMPLT, cmplt)
2900 DO_PPZI(CMPLE, cmple)
2901 DO_PPZI(CMPLO, cmplo)
2902 DO_PPZI(CMPLS, cmpls)
2903
2904 #undef DO_PPZI
2905
2906 /*
2907  *** SVE Partition Break Group
2908  */
2909
2910 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2911                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2912 {
2913     if (!sve_access_check(s)) {
2914         return true;
2915     }
2916
2917     unsigned vsz = pred_full_reg_size(s);
2918
2919     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2920     TCGv_ptr d = tcg_temp_new_ptr();
2921     TCGv_ptr n = tcg_temp_new_ptr();
2922     TCGv_ptr m = tcg_temp_new_ptr();
2923     TCGv_ptr g = tcg_temp_new_ptr();
2924     TCGv_i32 t = tcg_const_i32(vsz - 2);
2925
2926     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2927     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2928     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2929     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2930
2931     if (a->s) {
2932         fn_s(t, d, n, m, g, t);
2933         do_pred_flags(t);
2934     } else {
2935         fn(d, n, m, g, t);
2936     }
2937     tcg_temp_free_ptr(d);
2938     tcg_temp_free_ptr(n);
2939     tcg_temp_free_ptr(m);
2940     tcg_temp_free_ptr(g);
2941     tcg_temp_free_i32(t);
2942     return true;
2943 }
2944
2945 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2946                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2947 {
2948     if (!sve_access_check(s)) {
2949         return true;
2950     }
2951
2952     unsigned vsz = pred_full_reg_size(s);
2953
2954     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2955     TCGv_ptr d = tcg_temp_new_ptr();
2956     TCGv_ptr n = tcg_temp_new_ptr();
2957     TCGv_ptr g = tcg_temp_new_ptr();
2958     TCGv_i32 t = tcg_const_i32(vsz - 2);
2959
2960     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2961     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2962     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2963
2964     if (a->s) {
2965         fn_s(t, d, n, g, t);
2966         do_pred_flags(t);
2967     } else {
2968         fn(d, n, g, t);
2969     }
2970     tcg_temp_free_ptr(d);
2971     tcg_temp_free_ptr(n);
2972     tcg_temp_free_ptr(g);
2973     tcg_temp_free_i32(t);
2974     return true;
2975 }
2976
2977 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2978 {
2979     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2980 }
2981
2982 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2983 {
2984     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2985 }
2986
2987 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2988 {
2989     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2990 }
2991
2992 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2993 {
2994     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2995 }
2996
2997 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2998 {
2999     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3000 }
3001
3002 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
3003 {
3004     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3005 }
3006
3007 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3008 {
3009     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3010 }
3011
3012 /*
3013  *** SVE Predicate Count Group
3014  */
3015
3016 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3017 {
3018     unsigned psz = pred_full_reg_size(s);
3019
3020     if (psz <= 8) {
3021         uint64_t psz_mask;
3022
3023         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3024         if (pn != pg) {
3025             TCGv_i64 g = tcg_temp_new_i64();
3026             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3027             tcg_gen_and_i64(val, val, g);
3028             tcg_temp_free_i64(g);
3029         }
3030
3031         /* Reduce the pred_esz_masks value simply to reduce the
3032          * size of the code generated here.
3033          */
3034         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3035         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3036
3037         tcg_gen_ctpop_i64(val, val);
3038     } else {
3039         TCGv_ptr t_pn = tcg_temp_new_ptr();
3040         TCGv_ptr t_pg = tcg_temp_new_ptr();
3041         unsigned desc;
3042         TCGv_i32 t_desc;
3043
3044         desc = psz - 2;
3045         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3046
3047         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3048         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3049         t_desc = tcg_const_i32(desc);
3050
3051         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3052         tcg_temp_free_ptr(t_pn);
3053         tcg_temp_free_ptr(t_pg);
3054         tcg_temp_free_i32(t_desc);
3055     }
3056 }
3057
3058 static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3059 {
3060     if (sve_access_check(s)) {
3061         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3062     }
3063     return true;
3064 }
3065
3066 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3067 {
3068     if (sve_access_check(s)) {
3069         TCGv_i64 reg = cpu_reg(s, a->rd);
3070         TCGv_i64 val = tcg_temp_new_i64();
3071
3072         do_cntp(s, val, a->esz, a->pg, a->pg);
3073         if (a->d) {
3074             tcg_gen_sub_i64(reg, reg, val);
3075         } else {
3076             tcg_gen_add_i64(reg, reg, val);
3077         }
3078         tcg_temp_free_i64(val);
3079     }
3080     return true;
3081 }
3082
3083 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3084 {
3085     if (a->esz == 0) {
3086         return false;
3087     }
3088     if (sve_access_check(s)) {
3089         unsigned vsz = vec_full_reg_size(s);
3090         TCGv_i64 val = tcg_temp_new_i64();
3091         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3092
3093         do_cntp(s, val, a->esz, a->pg, a->pg);
3094         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3095                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3096     }
3097     return true;
3098 }
3099
3100 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3101 {
3102     if (sve_access_check(s)) {
3103         TCGv_i64 reg = cpu_reg(s, a->rd);
3104         TCGv_i64 val = tcg_temp_new_i64();
3105
3106         do_cntp(s, val, a->esz, a->pg, a->pg);
3107         do_sat_addsub_32(reg, val, a->u, a->d);
3108     }
3109     return true;
3110 }
3111
3112 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3113 {
3114     if (sve_access_check(s)) {
3115         TCGv_i64 reg = cpu_reg(s, a->rd);
3116         TCGv_i64 val = tcg_temp_new_i64();
3117
3118         do_cntp(s, val, a->esz, a->pg, a->pg);
3119         do_sat_addsub_64(reg, val, a->u, a->d);
3120     }
3121     return true;
3122 }
3123
3124 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3125 {
3126     if (a->esz == 0) {
3127         return false;
3128     }
3129     if (sve_access_check(s)) {
3130         TCGv_i64 val = tcg_temp_new_i64();
3131         do_cntp(s, val, a->esz, a->pg, a->pg);
3132         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3133     }
3134     return true;
3135 }
3136
3137 /*
3138  *** SVE Integer Compare Scalars Group
3139  */
3140
3141 static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3142 {
3143     if (!sve_access_check(s)) {
3144         return true;
3145     }
3146
3147     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3148     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3149     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3150     TCGv_i64 cmp = tcg_temp_new_i64();
3151
3152     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3153     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3154     tcg_temp_free_i64(cmp);
3155
3156     /* VF = !NF & !CF.  */
3157     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3158     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3159
3160     /* Both NF and VF actually look at bit 31.  */
3161     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3162     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3163     return true;
3164 }
3165
3166 static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3167 {
3168     TCGv_i64 op0, op1, t0, t1, tmax;
3169     TCGv_i32 t2, t3;
3170     TCGv_ptr ptr;
3171     unsigned desc, vsz = vec_full_reg_size(s);
3172     TCGCond cond;
3173
3174     if (!sve_access_check(s)) {
3175         return true;
3176     }
3177
3178     op0 = read_cpu_reg(s, a->rn, 1);
3179     op1 = read_cpu_reg(s, a->rm, 1);
3180
3181     if (!a->sf) {
3182         if (a->u) {
3183             tcg_gen_ext32u_i64(op0, op0);
3184             tcg_gen_ext32u_i64(op1, op1);
3185         } else {
3186             tcg_gen_ext32s_i64(op0, op0);
3187             tcg_gen_ext32s_i64(op1, op1);
3188         }
3189     }
3190
3191     /* For the helper, compress the different conditions into a computation
3192      * of how many iterations for which the condition is true.
3193      */
3194     t0 = tcg_temp_new_i64();
3195     t1 = tcg_temp_new_i64();
3196     tcg_gen_sub_i64(t0, op1, op0);
3197
3198     tmax = tcg_const_i64(vsz >> a->esz);
3199     if (a->eq) {
3200         /* Equality means one more iteration.  */
3201         tcg_gen_addi_i64(t0, t0, 1);
3202
3203         /* If op1 is max (un)signed integer (and the only time the addition
3204          * above could overflow), then we produce an all-true predicate by
3205          * setting the count to the vector length.  This is because the
3206          * pseudocode is described as an increment + compare loop, and the
3207          * max integer would always compare true.
3208          */
3209         tcg_gen_movi_i64(t1, (a->sf
3210                               ? (a->u ? UINT64_MAX : INT64_MAX)
3211                               : (a->u ? UINT32_MAX : INT32_MAX)));
3212         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3213     }
3214
3215     /* Bound to the maximum.  */
3216     tcg_gen_umin_i64(t0, t0, tmax);
3217     tcg_temp_free_i64(tmax);
3218
3219     /* Set the count to zero if the condition is false.  */
3220     cond = (a->u
3221             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3222             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3223     tcg_gen_movi_i64(t1, 0);
3224     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3225     tcg_temp_free_i64(t1);
3226
3227     /* Since we're bounded, pass as a 32-bit type.  */
3228     t2 = tcg_temp_new_i32();
3229     tcg_gen_extrl_i64_i32(t2, t0);
3230     tcg_temp_free_i64(t0);
3231
3232     /* Scale elements to bits.  */
3233     tcg_gen_shli_i32(t2, t2, a->esz);
3234
3235     desc = (vsz / 8) - 2;
3236     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3237     t3 = tcg_const_i32(desc);
3238
3239     ptr = tcg_temp_new_ptr();
3240     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3241
3242     gen_helper_sve_while(t2, ptr, t2, t3);
3243     do_pred_flags(t2);
3244
3245     tcg_temp_free_ptr(ptr);
3246     tcg_temp_free_i32(t2);
3247     tcg_temp_free_i32(t3);
3248     return true;
3249 }
3250
3251 /*
3252  *** SVE Integer Wide Immediate - Unpredicated Group
3253  */
3254
3255 static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3256 {
3257     if (a->esz == 0) {
3258         return false;
3259     }
3260     if (sve_access_check(s)) {
3261         unsigned vsz = vec_full_reg_size(s);
3262         int dofs = vec_full_reg_offset(s, a->rd);
3263         uint64_t imm;
3264
3265         /* Decode the VFP immediate.  */
3266         imm = vfp_expand_imm(a->esz, a->imm);
3267         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3268     }
3269     return true;
3270 }
3271
3272 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3273 {
3274     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3275         return false;
3276     }
3277     if (sve_access_check(s)) {
3278         unsigned vsz = vec_full_reg_size(s);
3279         int dofs = vec_full_reg_offset(s, a->rd);
3280
3281         tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3282     }
3283     return true;
3284 }
3285
3286 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3287 {
3288     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3289         return false;
3290     }
3291     if (sve_access_check(s)) {
3292         unsigned vsz = vec_full_reg_size(s);
3293         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3294                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3295     }
3296     return true;
3297 }
3298
3299 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3300 {
3301     a->imm = -a->imm;
3302     return trans_ADD_zzi(s, a);
3303 }
3304
3305 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3306 {
3307     static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3308     static const GVecGen2s op[4] = {
3309         { .fni8 = tcg_gen_vec_sub8_i64,
3310           .fniv = tcg_gen_sub_vec,
3311           .fno = gen_helper_sve_subri_b,
3312           .opt_opc = vecop_list,
3313           .vece = MO_8,
3314           .scalar_first = true },
3315         { .fni8 = tcg_gen_vec_sub16_i64,
3316           .fniv = tcg_gen_sub_vec,
3317           .fno = gen_helper_sve_subri_h,
3318           .opt_opc = vecop_list,
3319           .vece = MO_16,
3320           .scalar_first = true },
3321         { .fni4 = tcg_gen_sub_i32,
3322           .fniv = tcg_gen_sub_vec,
3323           .fno = gen_helper_sve_subri_s,
3324           .opt_opc = vecop_list,
3325           .vece = MO_32,
3326           .scalar_first = true },
3327         { .fni8 = tcg_gen_sub_i64,
3328           .fniv = tcg_gen_sub_vec,
3329           .fno = gen_helper_sve_subri_d,
3330           .opt_opc = vecop_list,
3331           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3332           .vece = MO_64,
3333           .scalar_first = true }
3334     };
3335
3336     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3337         return false;
3338     }
3339     if (sve_access_check(s)) {
3340         unsigned vsz = vec_full_reg_size(s);
3341         TCGv_i64 c = tcg_const_i64(a->imm);
3342         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3343                         vec_full_reg_offset(s, a->rn),
3344                         vsz, vsz, c, &op[a->esz]);
3345         tcg_temp_free_i64(c);
3346     }
3347     return true;
3348 }
3349
3350 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3351 {
3352     if (sve_access_check(s)) {
3353         unsigned vsz = vec_full_reg_size(s);
3354         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3355                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3356     }
3357     return true;
3358 }
3359
3360 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3361 {
3362     if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3363         return false;
3364     }
3365     if (sve_access_check(s)) {
3366         TCGv_i64 val = tcg_const_i64(a->imm);
3367         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3368         tcg_temp_free_i64(val);
3369     }
3370     return true;
3371 }
3372
3373 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3374 {
3375     return do_zzi_sat(s, a, false, false);
3376 }
3377
3378 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3379 {
3380     return do_zzi_sat(s, a, true, false);
3381 }
3382
3383 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3384 {
3385     return do_zzi_sat(s, a, false, true);
3386 }
3387
3388 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3389 {
3390     return do_zzi_sat(s, a, true, true);
3391 }
3392
3393 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3394 {
3395     if (sve_access_check(s)) {
3396         unsigned vsz = vec_full_reg_size(s);
3397         TCGv_i64 c = tcg_const_i64(a->imm);
3398
3399         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3400                             vec_full_reg_offset(s, a->rn),
3401                             c, vsz, vsz, 0, fn);
3402         tcg_temp_free_i64(c);
3403     }
3404     return true;
3405 }
3406
3407 #define DO_ZZI(NAME, name) \
3408 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3409 {                                                                       \
3410     static gen_helper_gvec_2i * const fns[4] = {                        \
3411         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3412         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3413     };                                                                  \
3414     return do_zzi_ool(s, a, fns[a->esz]);                               \
3415 }
3416
3417 DO_ZZI(SMAX, smax)
3418 DO_ZZI(UMAX, umax)
3419 DO_ZZI(SMIN, smin)
3420 DO_ZZI(UMIN, umin)
3421
3422 #undef DO_ZZI
3423
3424 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3425 {
3426     static gen_helper_gvec_3 * const fns[2][2] = {
3427         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3428         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3429     };
3430
3431     if (sve_access_check(s)) {
3432         unsigned vsz = vec_full_reg_size(s);
3433         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3434                            vec_full_reg_offset(s, a->rn),
3435                            vec_full_reg_offset(s, a->rm),
3436                            vsz, vsz, 0, fns[a->u][a->sz]);
3437     }
3438     return true;
3439 }
3440
3441 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3442 {
3443     static gen_helper_gvec_3 * const fns[2][2] = {
3444         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3445         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3446     };
3447
3448     if (sve_access_check(s)) {
3449         unsigned vsz = vec_full_reg_size(s);
3450         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3451                            vec_full_reg_offset(s, a->rn),
3452                            vec_full_reg_offset(s, a->rm),
3453                            vsz, vsz, a->index, fns[a->u][a->sz]);
3454     }
3455     return true;
3456 }
3457
3458
3459 /*
3460  *** SVE Floating Point Multiply-Add Indexed Group
3461  */
3462
3463 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3464 {
3465     static gen_helper_gvec_4_ptr * const fns[3] = {
3466         gen_helper_gvec_fmla_idx_h,
3467         gen_helper_gvec_fmla_idx_s,
3468         gen_helper_gvec_fmla_idx_d,
3469     };
3470
3471     if (sve_access_check(s)) {
3472         unsigned vsz = vec_full_reg_size(s);
3473         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3474         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3475                            vec_full_reg_offset(s, a->rn),
3476                            vec_full_reg_offset(s, a->rm),
3477                            vec_full_reg_offset(s, a->ra),
3478                            status, vsz, vsz, (a->index << 1) | a->sub,
3479                            fns[a->esz - 1]);
3480         tcg_temp_free_ptr(status);
3481     }
3482     return true;
3483 }
3484
3485 /*
3486  *** SVE Floating Point Multiply Indexed Group
3487  */
3488
3489 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3490 {
3491     static gen_helper_gvec_3_ptr * const fns[3] = {
3492         gen_helper_gvec_fmul_idx_h,
3493         gen_helper_gvec_fmul_idx_s,
3494         gen_helper_gvec_fmul_idx_d,
3495     };
3496
3497     if (sve_access_check(s)) {
3498         unsigned vsz = vec_full_reg_size(s);
3499         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3500         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3501                            vec_full_reg_offset(s, a->rn),
3502                            vec_full_reg_offset(s, a->rm),
3503                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3504         tcg_temp_free_ptr(status);
3505     }
3506     return true;
3507 }
3508
3509 /*
3510  *** SVE Floating Point Fast Reduction Group
3511  */
3512
3513 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3514                                   TCGv_ptr, TCGv_i32);
3515
3516 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3517                       gen_helper_fp_reduce *fn)
3518 {
3519     unsigned vsz = vec_full_reg_size(s);
3520     unsigned p2vsz = pow2ceil(vsz);
3521     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3522     TCGv_ptr t_zn, t_pg, status;
3523     TCGv_i64 temp;
3524
3525     temp = tcg_temp_new_i64();
3526     t_zn = tcg_temp_new_ptr();
3527     t_pg = tcg_temp_new_ptr();
3528
3529     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3530     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3531     status = get_fpstatus_ptr(a->esz == MO_16);
3532
3533     fn(temp, t_zn, t_pg, status, t_desc);
3534     tcg_temp_free_ptr(t_zn);
3535     tcg_temp_free_ptr(t_pg);
3536     tcg_temp_free_ptr(status);
3537     tcg_temp_free_i32(t_desc);
3538
3539     write_fp_dreg(s, a->rd, temp);
3540     tcg_temp_free_i64(temp);
3541 }
3542
3543 #define DO_VPZ(NAME, name) \
3544 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3545 {                                                                        \
3546     static gen_helper_fp_reduce * const fns[3] = {                       \
3547         gen_helper_sve_##name##_h,                                       \
3548         gen_helper_sve_##name##_s,                                       \
3549         gen_helper_sve_##name##_d,                                       \
3550     };                                                                   \
3551     if (a->esz == 0) {                                                   \
3552         return false;                                                    \
3553     }                                                                    \
3554     if (sve_access_check(s)) {                                           \
3555         do_reduce(s, a, fns[a->esz - 1]);                                \
3556     }                                                                    \
3557     return true;                                                         \
3558 }
3559
3560 DO_VPZ(FADDV, faddv)
3561 DO_VPZ(FMINNMV, fminnmv)
3562 DO_VPZ(FMAXNMV, fmaxnmv)
3563 DO_VPZ(FMINV, fminv)
3564 DO_VPZ(FMAXV, fmaxv)
3565
3566 /*
3567  *** SVE Floating Point Unary Operations - Unpredicated Group
3568  */
3569
3570 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3571 {
3572     unsigned vsz = vec_full_reg_size(s);
3573     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3574
3575     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3576                        vec_full_reg_offset(s, a->rn),
3577                        status, vsz, vsz, 0, fn);
3578     tcg_temp_free_ptr(status);
3579 }
3580
3581 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3582 {
3583     static gen_helper_gvec_2_ptr * const fns[3] = {
3584         gen_helper_gvec_frecpe_h,
3585         gen_helper_gvec_frecpe_s,
3586         gen_helper_gvec_frecpe_d,
3587     };
3588     if (a->esz == 0) {
3589         return false;
3590     }
3591     if (sve_access_check(s)) {
3592         do_zz_fp(s, a, fns[a->esz - 1]);
3593     }
3594     return true;
3595 }
3596
3597 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3598 {
3599     static gen_helper_gvec_2_ptr * const fns[3] = {
3600         gen_helper_gvec_frsqrte_h,
3601         gen_helper_gvec_frsqrte_s,
3602         gen_helper_gvec_frsqrte_d,
3603     };
3604     if (a->esz == 0) {
3605         return false;
3606     }
3607     if (sve_access_check(s)) {
3608         do_zz_fp(s, a, fns[a->esz - 1]);
3609     }
3610     return true;
3611 }
3612
3613 /*
3614  *** SVE Floating Point Compare with Zero Group
3615  */
3616
3617 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3618                       gen_helper_gvec_3_ptr *fn)
3619 {
3620     unsigned vsz = vec_full_reg_size(s);
3621     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3622
3623     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3624                        vec_full_reg_offset(s, a->rn),
3625                        pred_full_reg_offset(s, a->pg),
3626                        status, vsz, vsz, 0, fn);
3627     tcg_temp_free_ptr(status);
3628 }
3629
3630 #define DO_PPZ(NAME, name) \
3631 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3632 {                                                                 \
3633     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3634         gen_helper_sve_##name##_h,                                \
3635         gen_helper_sve_##name##_s,                                \
3636         gen_helper_sve_##name##_d,                                \
3637     };                                                            \
3638     if (a->esz == 0) {                                            \
3639         return false;                                             \
3640     }                                                             \
3641     if (sve_access_check(s)) {                                    \
3642         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3643     }                                                             \
3644     return true;                                                  \
3645 }
3646
3647 DO_PPZ(FCMGE_ppz0, fcmge0)
3648 DO_PPZ(FCMGT_ppz0, fcmgt0)
3649 DO_PPZ(FCMLE_ppz0, fcmle0)
3650 DO_PPZ(FCMLT_ppz0, fcmlt0)
3651 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3652 DO_PPZ(FCMNE_ppz0, fcmne0)
3653
3654 #undef DO_PPZ
3655
3656 /*
3657  *** SVE floating-point trig multiply-add coefficient
3658  */
3659
3660 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3661 {
3662     static gen_helper_gvec_3_ptr * const fns[3] = {
3663         gen_helper_sve_ftmad_h,
3664         gen_helper_sve_ftmad_s,
3665         gen_helper_sve_ftmad_d,
3666     };
3667
3668     if (a->esz == 0) {
3669         return false;
3670     }
3671     if (sve_access_check(s)) {
3672         unsigned vsz = vec_full_reg_size(s);
3673         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3674         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3675                            vec_full_reg_offset(s, a->rn),
3676                            vec_full_reg_offset(s, a->rm),
3677                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3678         tcg_temp_free_ptr(status);
3679     }
3680     return true;
3681 }
3682
3683 /*
3684  *** SVE Floating Point Accumulating Reduction Group
3685  */
3686
3687 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3688 {
3689     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3690                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3691     static fadda_fn * const fns[3] = {
3692         gen_helper_sve_fadda_h,
3693         gen_helper_sve_fadda_s,
3694         gen_helper_sve_fadda_d,
3695     };
3696     unsigned vsz = vec_full_reg_size(s);
3697     TCGv_ptr t_rm, t_pg, t_fpst;
3698     TCGv_i64 t_val;
3699     TCGv_i32 t_desc;
3700
3701     if (a->esz == 0) {
3702         return false;
3703     }
3704     if (!sve_access_check(s)) {
3705         return true;
3706     }
3707
3708     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3709     t_rm = tcg_temp_new_ptr();
3710     t_pg = tcg_temp_new_ptr();
3711     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3712     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3713     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3714     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3715
3716     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3717
3718     tcg_temp_free_i32(t_desc);
3719     tcg_temp_free_ptr(t_fpst);
3720     tcg_temp_free_ptr(t_pg);
3721     tcg_temp_free_ptr(t_rm);
3722
3723     write_fp_dreg(s, a->rd, t_val);
3724     tcg_temp_free_i64(t_val);
3725     return true;
3726 }
3727
3728 /*
3729  *** SVE Floating Point Arithmetic - Unpredicated Group
3730  */
3731
3732 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3733                       gen_helper_gvec_3_ptr *fn)
3734 {
3735     if (fn == NULL) {
3736         return false;
3737     }
3738     if (sve_access_check(s)) {
3739         unsigned vsz = vec_full_reg_size(s);
3740         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3741         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3742                            vec_full_reg_offset(s, a->rn),
3743                            vec_full_reg_offset(s, a->rm),
3744                            status, vsz, vsz, 0, fn);
3745         tcg_temp_free_ptr(status);
3746     }
3747     return true;
3748 }
3749
3750
3751 #define DO_FP3(NAME, name) \
3752 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3753 {                                                                   \
3754     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3755         NULL, gen_helper_gvec_##name##_h,                           \
3756         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3757     };                                                              \
3758     return do_zzz_fp(s, a, fns[a->esz]);                            \
3759 }
3760
3761 DO_FP3(FADD_zzz, fadd)
3762 DO_FP3(FSUB_zzz, fsub)
3763 DO_FP3(FMUL_zzz, fmul)
3764 DO_FP3(FTSMUL, ftsmul)
3765 DO_FP3(FRECPS, recps)
3766 DO_FP3(FRSQRTS, rsqrts)
3767
3768 #undef DO_FP3
3769
3770 /*
3771  *** SVE Floating Point Arithmetic - Predicated Group
3772  */
3773
3774 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3775                        gen_helper_gvec_4_ptr *fn)
3776 {
3777     if (fn == NULL) {
3778         return false;
3779     }
3780     if (sve_access_check(s)) {
3781         unsigned vsz = vec_full_reg_size(s);
3782         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3783         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3784                            vec_full_reg_offset(s, a->rn),
3785                            vec_full_reg_offset(s, a->rm),
3786                            pred_full_reg_offset(s, a->pg),
3787                            status, vsz, vsz, 0, fn);
3788         tcg_temp_free_ptr(status);
3789     }
3790     return true;
3791 }
3792
3793 #define DO_FP3(NAME, name) \
3794 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3795 {                                                                   \
3796     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3797         NULL, gen_helper_sve_##name##_h,                            \
3798         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3799     };                                                              \
3800     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3801 }
3802
3803 DO_FP3(FADD_zpzz, fadd)
3804 DO_FP3(FSUB_zpzz, fsub)
3805 DO_FP3(FMUL_zpzz, fmul)
3806 DO_FP3(FMIN_zpzz, fmin)
3807 DO_FP3(FMAX_zpzz, fmax)
3808 DO_FP3(FMINNM_zpzz, fminnum)
3809 DO_FP3(FMAXNM_zpzz, fmaxnum)
3810 DO_FP3(FABD, fabd)
3811 DO_FP3(FSCALE, fscalbn)
3812 DO_FP3(FDIV, fdiv)
3813 DO_FP3(FMULX, fmulx)
3814
3815 #undef DO_FP3
3816
3817 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3818                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3819
3820 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3821                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3822 {
3823     unsigned vsz = vec_full_reg_size(s);
3824     TCGv_ptr t_zd, t_zn, t_pg, status;
3825     TCGv_i32 desc;
3826
3827     t_zd = tcg_temp_new_ptr();
3828     t_zn = tcg_temp_new_ptr();
3829     t_pg = tcg_temp_new_ptr();
3830     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3831     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3832     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3833
3834     status = get_fpstatus_ptr(is_fp16);
3835     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3836     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3837
3838     tcg_temp_free_i32(desc);
3839     tcg_temp_free_ptr(status);
3840     tcg_temp_free_ptr(t_pg);
3841     tcg_temp_free_ptr(t_zn);
3842     tcg_temp_free_ptr(t_zd);
3843 }
3844
3845 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3846                       gen_helper_sve_fp2scalar *fn)
3847 {
3848     TCGv_i64 temp = tcg_const_i64(imm);
3849     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3850     tcg_temp_free_i64(temp);
3851 }
3852
3853 #define DO_FP_IMM(NAME, name, const0, const1) \
3854 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3855 {                                                                         \
3856     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3857         gen_helper_sve_##name##_h,                                        \
3858         gen_helper_sve_##name##_s,                                        \
3859         gen_helper_sve_##name##_d                                         \
3860     };                                                                    \
3861     static uint64_t const val[3][2] = {                                   \
3862         { float16_##const0, float16_##const1 },                           \
3863         { float32_##const0, float32_##const1 },                           \
3864         { float64_##const0, float64_##const1 },                           \
3865     };                                                                    \
3866     if (a->esz == 0) {                                                    \
3867         return false;                                                     \
3868     }                                                                     \
3869     if (sve_access_check(s)) {                                            \
3870         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3871     }                                                                     \
3872     return true;                                                          \
3873 }
3874
3875 #define float16_two  make_float16(0x4000)
3876 #define float32_two  make_float32(0x40000000)
3877 #define float64_two  make_float64(0x4000000000000000ULL)
3878
3879 DO_FP_IMM(FADD, fadds, half, one)
3880 DO_FP_IMM(FSUB, fsubs, half, one)
3881 DO_FP_IMM(FMUL, fmuls, half, two)
3882 DO_FP_IMM(FSUBR, fsubrs, half, one)
3883 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3884 DO_FP_IMM(FMINNM, fminnms, zero, one)
3885 DO_FP_IMM(FMAX, fmaxs, zero, one)
3886 DO_FP_IMM(FMIN, fmins, zero, one)
3887
3888 #undef DO_FP_IMM
3889
3890 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3891                       gen_helper_gvec_4_ptr *fn)
3892 {
3893     if (fn == NULL) {
3894         return false;
3895     }
3896     if (sve_access_check(s)) {
3897         unsigned vsz = vec_full_reg_size(s);
3898         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3899         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3900                            vec_full_reg_offset(s, a->rn),
3901                            vec_full_reg_offset(s, a->rm),
3902                            pred_full_reg_offset(s, a->pg),
3903                            status, vsz, vsz, 0, fn);
3904         tcg_temp_free_ptr(status);
3905     }
3906     return true;
3907 }
3908
3909 #define DO_FPCMP(NAME, name) \
3910 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3911 {                                                                     \
3912     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3913         NULL, gen_helper_sve_##name##_h,                              \
3914         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3915     };                                                                \
3916     return do_fp_cmp(s, a, fns[a->esz]);                              \
3917 }
3918
3919 DO_FPCMP(FCMGE, fcmge)
3920 DO_FPCMP(FCMGT, fcmgt)
3921 DO_FPCMP(FCMEQ, fcmeq)
3922 DO_FPCMP(FCMNE, fcmne)
3923 DO_FPCMP(FCMUO, fcmuo)
3924 DO_FPCMP(FACGE, facge)
3925 DO_FPCMP(FACGT, facgt)
3926
3927 #undef DO_FPCMP
3928
3929 static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3930 {
3931     static gen_helper_gvec_4_ptr * const fns[3] = {
3932         gen_helper_sve_fcadd_h,
3933         gen_helper_sve_fcadd_s,
3934         gen_helper_sve_fcadd_d
3935     };
3936
3937     if (a->esz == 0) {
3938         return false;
3939     }
3940     if (sve_access_check(s)) {
3941         unsigned vsz = vec_full_reg_size(s);
3942         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3943         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3944                            vec_full_reg_offset(s, a->rn),
3945                            vec_full_reg_offset(s, a->rm),
3946                            pred_full_reg_offset(s, a->pg),
3947                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3948         tcg_temp_free_ptr(status);
3949     }
3950     return true;
3951 }
3952
3953 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
3954                     gen_helper_gvec_5_ptr *fn)
3955 {
3956     if (a->esz == 0) {
3957         return false;
3958     }
3959     if (sve_access_check(s)) {
3960         unsigned vsz = vec_full_reg_size(s);
3961         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3962         tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
3963                            vec_full_reg_offset(s, a->rn),
3964                            vec_full_reg_offset(s, a->rm),
3965                            vec_full_reg_offset(s, a->ra),
3966                            pred_full_reg_offset(s, a->pg),
3967                            status, vsz, vsz, 0, fn);
3968         tcg_temp_free_ptr(status);
3969     }
3970     return true;
3971 }
3972
3973 #define DO_FMLA(NAME, name) \
3974 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3975 {                                                                    \
3976     static gen_helper_gvec_5_ptr * const fns[4] = {                  \
3977         NULL, gen_helper_sve_##name##_h,                             \
3978         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3979     };                                                               \
3980     return do_fmla(s, a, fns[a->esz]);                               \
3981 }
3982
3983 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3984 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3985 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3986 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3987
3988 #undef DO_FMLA
3989
3990 static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3991 {
3992     static gen_helper_gvec_5_ptr * const fns[4] = {
3993         NULL,
3994         gen_helper_sve_fcmla_zpzzz_h,
3995         gen_helper_sve_fcmla_zpzzz_s,
3996         gen_helper_sve_fcmla_zpzzz_d,
3997     };
3998
3999     if (a->esz == 0) {
4000         return false;
4001     }
4002     if (sve_access_check(s)) {
4003         unsigned vsz = vec_full_reg_size(s);
4004         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4005         tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4006                            vec_full_reg_offset(s, a->rn),
4007                            vec_full_reg_offset(s, a->rm),
4008                            vec_full_reg_offset(s, a->ra),
4009                            pred_full_reg_offset(s, a->pg),
4010                            status, vsz, vsz, a->rot, fns[a->esz]);
4011         tcg_temp_free_ptr(status);
4012     }
4013     return true;
4014 }
4015
4016 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4017 {
4018     static gen_helper_gvec_3_ptr * const fns[2] = {
4019         gen_helper_gvec_fcmlah_idx,
4020         gen_helper_gvec_fcmlas_idx,
4021     };
4022
4023     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4024     tcg_debug_assert(a->rd == a->ra);
4025     if (sve_access_check(s)) {
4026         unsigned vsz = vec_full_reg_size(s);
4027         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4028         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4029                            vec_full_reg_offset(s, a->rn),
4030                            vec_full_reg_offset(s, a->rm),
4031                            status, vsz, vsz,
4032                            a->index * 4 + a->rot,
4033                            fns[a->esz - 1]);
4034         tcg_temp_free_ptr(status);
4035     }
4036     return true;
4037 }
4038
4039 /*
4040  *** SVE Floating Point Unary Operations Predicated Group
4041  */
4042
4043 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4044                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4045 {
4046     if (sve_access_check(s)) {
4047         unsigned vsz = vec_full_reg_size(s);
4048         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4049         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4050                            vec_full_reg_offset(s, rn),
4051                            pred_full_reg_offset(s, pg),
4052                            status, vsz, vsz, 0, fn);
4053         tcg_temp_free_ptr(status);
4054     }
4055     return true;
4056 }
4057
4058 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4059 {
4060     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4061 }
4062
4063 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4064 {
4065     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4066 }
4067
4068 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4069 {
4070     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4071 }
4072
4073 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4074 {
4075     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4076 }
4077
4078 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4079 {
4080     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4081 }
4082
4083 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4084 {
4085     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4086 }
4087
4088 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4089 {
4090     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4091 }
4092
4093 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4094 {
4095     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4096 }
4097
4098 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4099 {
4100     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4101 }
4102
4103 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4104 {
4105     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4106 }
4107
4108 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4109 {
4110     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4111 }
4112
4113 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4114 {
4115     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4116 }
4117
4118 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4119 {
4120     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4121 }
4122
4123 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4124 {
4125     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4126 }
4127
4128 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4129 {
4130     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4131 }
4132
4133 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4134 {
4135     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4136 }
4137
4138 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4139 {
4140     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4141 }
4142
4143 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4144 {
4145     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4146 }
4147
4148 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4149 {
4150     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4151 }
4152
4153 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4154 {
4155     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4156 }
4157
4158 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4159     gen_helper_sve_frint_h,
4160     gen_helper_sve_frint_s,
4161     gen_helper_sve_frint_d
4162 };
4163
4164 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4165 {
4166     if (a->esz == 0) {
4167         return false;
4168     }
4169     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4170                       frint_fns[a->esz - 1]);
4171 }
4172
4173 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4174 {
4175     static gen_helper_gvec_3_ptr * const fns[3] = {
4176         gen_helper_sve_frintx_h,
4177         gen_helper_sve_frintx_s,
4178         gen_helper_sve_frintx_d
4179     };
4180     if (a->esz == 0) {
4181         return false;
4182     }
4183     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4184 }
4185
4186 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4187 {
4188     if (a->esz == 0) {
4189         return false;
4190     }
4191     if (sve_access_check(s)) {
4192         unsigned vsz = vec_full_reg_size(s);
4193         TCGv_i32 tmode = tcg_const_i32(mode);
4194         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4195
4196         gen_helper_set_rmode(tmode, tmode, status);
4197
4198         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4199                            vec_full_reg_offset(s, a->rn),
4200                            pred_full_reg_offset(s, a->pg),
4201                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4202
4203         gen_helper_set_rmode(tmode, tmode, status);
4204         tcg_temp_free_i32(tmode);
4205         tcg_temp_free_ptr(status);
4206     }
4207     return true;
4208 }
4209
4210 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4211 {
4212     return do_frint_mode(s, a, float_round_nearest_even);
4213 }
4214
4215 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4216 {
4217     return do_frint_mode(s, a, float_round_up);
4218 }
4219
4220 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4221 {
4222     return do_frint_mode(s, a, float_round_down);
4223 }
4224
4225 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4226 {
4227     return do_frint_mode(s, a, float_round_to_zero);
4228 }
4229
4230 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4231 {
4232     return do_frint_mode(s, a, float_round_ties_away);
4233 }
4234
4235 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4236 {
4237     static gen_helper_gvec_3_ptr * const fns[3] = {
4238         gen_helper_sve_frecpx_h,
4239         gen_helper_sve_frecpx_s,
4240         gen_helper_sve_frecpx_d
4241     };
4242     if (a->esz == 0) {
4243         return false;
4244     }
4245     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4246 }
4247
4248 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4249 {
4250     static gen_helper_gvec_3_ptr * const fns[3] = {
4251         gen_helper_sve_fsqrt_h,
4252         gen_helper_sve_fsqrt_s,
4253         gen_helper_sve_fsqrt_d
4254     };
4255     if (a->esz == 0) {
4256         return false;
4257     }
4258     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4259 }
4260
4261 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4262 {
4263     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4264 }
4265
4266 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4267 {
4268     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4269 }
4270
4271 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4272 {
4273     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4274 }
4275
4276 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4277 {
4278     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4279 }
4280
4281 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4282 {
4283     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4284 }
4285
4286 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4287 {
4288     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4289 }
4290
4291 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4292 {
4293     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4294 }
4295
4296 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4297 {
4298     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4299 }
4300
4301 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4302 {
4303     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4304 }
4305
4306 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4307 {
4308     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4309 }
4310
4311 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4312 {
4313     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4314 }
4315
4316 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4317 {
4318     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4319 }
4320
4321 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4322 {
4323     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4324 }
4325
4326 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4327 {
4328     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4329 }
4330
4331 /*
4332  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4333  */
4334
4335 /* Subroutine loading a vector register at VOFS of LEN bytes.
4336  * The load should begin at the address Rn + IMM.
4337  */
4338
4339 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4340 {
4341     int len_align = QEMU_ALIGN_DOWN(len, 8);
4342     int len_remain = len % 8;
4343     int nparts = len / 8 + ctpop8(len_remain);
4344     int midx = get_mem_index(s);
4345     TCGv_i64 dirty_addr, clean_addr, t0, t1;
4346
4347     dirty_addr = tcg_temp_new_i64();
4348     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4349     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4350     tcg_temp_free_i64(dirty_addr);
4351
4352     /*
4353      * Note that unpredicated load/store of vector/predicate registers
4354      * are defined as a stream of bytes, which equates to little-endian
4355      * operations on larger quantities.
4356      * Attempt to keep code expansion to a minimum by limiting the
4357      * amount of unrolling done.
4358      */
4359     if (nparts <= 4) {
4360         int i;
4361
4362         t0 = tcg_temp_new_i64();
4363         for (i = 0; i < len_align; i += 8) {
4364             tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4365             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4366             tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
4367         }
4368         tcg_temp_free_i64(t0);
4369     } else {
4370         TCGLabel *loop = gen_new_label();
4371         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4372
4373         /* Copy the clean address into a local temp, live across the loop. */
4374         t0 = clean_addr;
4375         clean_addr = new_tmp_a64_local(s);
4376         tcg_gen_mov_i64(clean_addr, t0);
4377
4378         gen_set_label(loop);
4379
4380         t0 = tcg_temp_new_i64();
4381         tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
4382         tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4383
4384         tp = tcg_temp_new_ptr();
4385         tcg_gen_add_ptr(tp, cpu_env, i);
4386         tcg_gen_addi_ptr(i, i, 8);
4387         tcg_gen_st_i64(t0, tp, vofs);
4388         tcg_temp_free_ptr(tp);
4389         tcg_temp_free_i64(t0);
4390
4391         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4392         tcg_temp_free_ptr(i);
4393     }
4394
4395     /*
4396      * Predicate register loads can be any multiple of 2.
4397      * Note that we still store the entire 64-bit unit into cpu_env.
4398      */
4399     if (len_remain) {
4400         t0 = tcg_temp_new_i64();
4401         switch (len_remain) {
4402         case 2:
4403         case 4:
4404         case 8:
4405             tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
4406                                 MO_LE | ctz32(len_remain));
4407             break;
4408
4409         case 6:
4410             t1 = tcg_temp_new_i64();
4411             tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
4412             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4413             tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
4414             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4415             tcg_temp_free_i64(t1);
4416             break;
4417
4418         default:
4419             g_assert_not_reached();
4420         }
4421         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4422         tcg_temp_free_i64(t0);
4423     }
4424 }
4425
4426 /* Similarly for stores.  */
4427 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4428 {
4429     int len_align = QEMU_ALIGN_DOWN(len, 8);
4430     int len_remain = len % 8;
4431     int nparts = len / 8 + ctpop8(len_remain);
4432     int midx = get_mem_index(s);
4433     TCGv_i64 dirty_addr, clean_addr, t0;
4434
4435     dirty_addr = tcg_temp_new_i64();
4436     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
4437     clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
4438     tcg_temp_free_i64(dirty_addr);
4439
4440     /* Note that unpredicated load/store of vector/predicate registers
4441      * are defined as a stream of bytes, which equates to little-endian
4442      * operations on larger quantities.  There is no nice way to force
4443      * a little-endian store for aarch64_be-linux-user out of line.
4444      *
4445      * Attempt to keep code expansion to a minimum by limiting the
4446      * amount of unrolling done.
4447      */
4448     if (nparts <= 4) {
4449         int i;
4450
4451         t0 = tcg_temp_new_i64();
4452         for (i = 0; i < len_align; i += 8) {
4453             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4454             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4455             tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
4456         }
4457         tcg_temp_free_i64(t0);
4458     } else {
4459         TCGLabel *loop = gen_new_label();
4460         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4461
4462         /* Copy the clean address into a local temp, live across the loop. */
4463         t0 = clean_addr;
4464         clean_addr = new_tmp_a64_local(s);
4465         tcg_gen_mov_i64(clean_addr, t0);
4466
4467         gen_set_label(loop);
4468
4469         t0 = tcg_temp_new_i64();
4470         tp = tcg_temp_new_ptr();
4471         tcg_gen_add_ptr(tp, cpu_env, i);
4472         tcg_gen_ld_i64(t0, tp, vofs);
4473         tcg_gen_addi_ptr(i, i, 8);
4474         tcg_temp_free_ptr(tp);
4475
4476         tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
4477         tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4478         tcg_temp_free_i64(t0);
4479
4480         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4481         tcg_temp_free_ptr(i);
4482     }
4483
4484     /* Predicate register stores can be any multiple of 2.  */
4485     if (len_remain) {
4486         t0 = tcg_temp_new_i64();
4487         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4488
4489         switch (len_remain) {
4490         case 2:
4491         case 4:
4492         case 8:
4493             tcg_gen_qemu_st_i64(t0, clean_addr, midx,
4494                                 MO_LE | ctz32(len_remain));
4495             break;
4496
4497         case 6:
4498             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
4499             tcg_gen_addi_i64(clean_addr, clean_addr, 4);
4500             tcg_gen_shri_i64(t0, t0, 32);
4501             tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
4502             break;
4503
4504         default:
4505             g_assert_not_reached();
4506         }
4507         tcg_temp_free_i64(t0);
4508     }
4509 }
4510
4511 static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4512 {
4513     if (sve_access_check(s)) {
4514         int size = vec_full_reg_size(s);
4515         int off = vec_full_reg_offset(s, a->rd);
4516         do_ldr(s, off, size, a->rn, a->imm * size);
4517     }
4518     return true;
4519 }
4520
4521 static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4522 {
4523     if (sve_access_check(s)) {
4524         int size = pred_full_reg_size(s);
4525         int off = pred_full_reg_offset(s, a->rd);
4526         do_ldr(s, off, size, a->rn, a->imm * size);
4527     }
4528     return true;
4529 }
4530
4531 static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4532 {
4533     if (sve_access_check(s)) {
4534         int size = vec_full_reg_size(s);
4535         int off = vec_full_reg_offset(s, a->rd);
4536         do_str(s, off, size, a->rn, a->imm * size);
4537     }
4538     return true;
4539 }
4540
4541 static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4542 {
4543     if (sve_access_check(s)) {
4544         int size = pred_full_reg_size(s);
4545         int off = pred_full_reg_offset(s, a->rd);
4546         do_str(s, off, size, a->rn, a->imm * size);
4547     }
4548     return true;
4549 }
4550
4551 /*
4552  *** SVE Memory - Contiguous Load Group
4553  */
4554
4555 /* The memory mode of the dtype.  */
4556 static const MemOp dtype_mop[16] = {
4557     MO_UB, MO_UB, MO_UB, MO_UB,
4558     MO_SL, MO_UW, MO_UW, MO_UW,
4559     MO_SW, MO_SW, MO_UL, MO_UL,
4560     MO_SB, MO_SB, MO_SB, MO_Q
4561 };
4562
4563 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4564
4565 /* The vector element size of dtype.  */
4566 static const uint8_t dtype_esz[16] = {
4567     0, 1, 2, 3,
4568     3, 1, 2, 3,
4569     3, 2, 2, 3,
4570     3, 2, 1, 3
4571 };
4572
4573 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4574                        int dtype, uint32_t mte_n, bool is_write,
4575                        gen_helper_gvec_mem *fn)
4576 {
4577     unsigned vsz = vec_full_reg_size(s);
4578     TCGv_ptr t_pg;
4579     TCGv_i32 t_desc;
4580     int desc = 0;
4581
4582     /*
4583      * For e.g. LD4, there are not enough arguments to pass all 4
4584      * registers as pointers, so encode the regno into the data field.
4585      * For consistency, do this even for LD1.
4586      */
4587     if (s->mte_active[0]) {
4588         int msz = dtype_msz(dtype);
4589
4590         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
4591         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4592         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4593         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
4594         desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
4595         desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
4596         desc <<= SVE_MTEDESC_SHIFT;
4597     } else {
4598         addr = clean_data_tbi(s, addr);
4599     }
4600
4601     desc = simd_desc(vsz, vsz, zt | desc);
4602     t_desc = tcg_const_i32(desc);
4603     t_pg = tcg_temp_new_ptr();
4604
4605     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4606     fn(cpu_env, t_pg, addr, t_desc);
4607
4608     tcg_temp_free_ptr(t_pg);
4609     tcg_temp_free_i32(t_desc);
4610 }
4611
4612 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4613                       TCGv_i64 addr, int dtype, int nreg)
4614 {
4615     static gen_helper_gvec_mem * const fns[2][2][16][4] = {
4616         { /* mte inactive, little-endian */
4617           { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4618             gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4619             { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4620             { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4621             { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4622
4623             { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4624             { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4625               gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4626             { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4627             { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4628
4629             { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4630             { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4631             { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4632               gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4633             { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4634
4635             { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4636             { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4637             { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4638             { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4639               gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4640
4641           /* mte inactive, big-endian */
4642           { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4643               gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4644             { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4645             { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4646             { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4647
4648             { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4649             { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4650               gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4651             { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4652             { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4653
4654             { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4655             { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4656             { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4657               gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4658             { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4659
4660             { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4661             { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4662             { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4663             { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4664               gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
4665
4666         { /* mte active, little-endian */
4667           { { gen_helper_sve_ld1bb_r_mte,
4668               gen_helper_sve_ld2bb_r_mte,
4669               gen_helper_sve_ld3bb_r_mte,
4670               gen_helper_sve_ld4bb_r_mte },
4671             { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4672             { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4673             { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4674
4675             { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
4676             { gen_helper_sve_ld1hh_le_r_mte,
4677               gen_helper_sve_ld2hh_le_r_mte,
4678               gen_helper_sve_ld3hh_le_r_mte,
4679               gen_helper_sve_ld4hh_le_r_mte },
4680             { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
4681             { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
4682
4683             { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
4684             { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
4685             { gen_helper_sve_ld1ss_le_r_mte,
4686               gen_helper_sve_ld2ss_le_r_mte,
4687               gen_helper_sve_ld3ss_le_r_mte,
4688               gen_helper_sve_ld4ss_le_r_mte },
4689             { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
4690
4691             { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4692             { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4693             { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4694             { gen_helper_sve_ld1dd_le_r_mte,
4695               gen_helper_sve_ld2dd_le_r_mte,
4696               gen_helper_sve_ld3dd_le_r_mte,
4697               gen_helper_sve_ld4dd_le_r_mte } },
4698
4699           /* mte active, big-endian */
4700           { { gen_helper_sve_ld1bb_r_mte,
4701               gen_helper_sve_ld2bb_r_mte,
4702               gen_helper_sve_ld3bb_r_mte,
4703               gen_helper_sve_ld4bb_r_mte },
4704             { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
4705             { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
4706             { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
4707
4708             { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
4709             { gen_helper_sve_ld1hh_be_r_mte,
4710               gen_helper_sve_ld2hh_be_r_mte,
4711               gen_helper_sve_ld3hh_be_r_mte,
4712               gen_helper_sve_ld4hh_be_r_mte },
4713             { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
4714             { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
4715
4716             { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
4717             { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
4718             { gen_helper_sve_ld1ss_be_r_mte,
4719               gen_helper_sve_ld2ss_be_r_mte,
4720               gen_helper_sve_ld3ss_be_r_mte,
4721               gen_helper_sve_ld4ss_be_r_mte },
4722             { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
4723
4724             { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
4725             { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
4726             { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
4727             { gen_helper_sve_ld1dd_be_r_mte,
4728               gen_helper_sve_ld2dd_be_r_mte,
4729               gen_helper_sve_ld3dd_be_r_mte,
4730               gen_helper_sve_ld4dd_be_r_mte } } },
4731     };
4732     gen_helper_gvec_mem *fn
4733         = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
4734
4735     /*
4736      * While there are holes in the table, they are not
4737      * accessible via the instruction encoding.
4738      */
4739     assert(fn != NULL);
4740     do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
4741 }
4742
4743 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4744 {
4745     if (a->rm == 31) {
4746         return false;
4747     }
4748     if (sve_access_check(s)) {
4749         TCGv_i64 addr = new_tmp_a64(s);
4750         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4751         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4752         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4753     }
4754     return true;
4755 }
4756
4757 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4758 {
4759     if (sve_access_check(s)) {
4760         int vsz = vec_full_reg_size(s);
4761         int elements = vsz >> dtype_esz[a->dtype];
4762         TCGv_i64 addr = new_tmp_a64(s);
4763
4764         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4765                          (a->imm * elements * (a->nreg + 1))
4766                          << dtype_msz(a->dtype));
4767         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4768     }
4769     return true;
4770 }
4771
4772 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4773 {
4774     static gen_helper_gvec_mem * const fns[2][2][16] = {
4775         { /* mte inactive, little-endian */
4776           { gen_helper_sve_ldff1bb_r,
4777             gen_helper_sve_ldff1bhu_r,
4778             gen_helper_sve_ldff1bsu_r,
4779             gen_helper_sve_ldff1bdu_r,
4780
4781             gen_helper_sve_ldff1sds_le_r,
4782             gen_helper_sve_ldff1hh_le_r,
4783             gen_helper_sve_ldff1hsu_le_r,
4784             gen_helper_sve_ldff1hdu_le_r,
4785
4786             gen_helper_sve_ldff1hds_le_r,
4787             gen_helper_sve_ldff1hss_le_r,
4788             gen_helper_sve_ldff1ss_le_r,
4789             gen_helper_sve_ldff1sdu_le_r,
4790
4791             gen_helper_sve_ldff1bds_r,
4792             gen_helper_sve_ldff1bss_r,
4793             gen_helper_sve_ldff1bhs_r,
4794             gen_helper_sve_ldff1dd_le_r },
4795
4796           /* mte inactive, big-endian */
4797           { gen_helper_sve_ldff1bb_r,
4798             gen_helper_sve_ldff1bhu_r,
4799             gen_helper_sve_ldff1bsu_r,
4800             gen_helper_sve_ldff1bdu_r,
4801
4802             gen_helper_sve_ldff1sds_be_r,
4803             gen_helper_sve_ldff1hh_be_r,
4804             gen_helper_sve_ldff1hsu_be_r,
4805             gen_helper_sve_ldff1hdu_be_r,
4806
4807             gen_helper_sve_ldff1hds_be_r,
4808             gen_helper_sve_ldff1hss_be_r,
4809             gen_helper_sve_ldff1ss_be_r,
4810             gen_helper_sve_ldff1sdu_be_r,
4811
4812             gen_helper_sve_ldff1bds_r,
4813             gen_helper_sve_ldff1bss_r,
4814             gen_helper_sve_ldff1bhs_r,
4815             gen_helper_sve_ldff1dd_be_r } },
4816
4817         { /* mte active, little-endian */
4818           { gen_helper_sve_ldff1bb_r_mte,
4819             gen_helper_sve_ldff1bhu_r_mte,
4820             gen_helper_sve_ldff1bsu_r_mte,
4821             gen_helper_sve_ldff1bdu_r_mte,
4822
4823             gen_helper_sve_ldff1sds_le_r_mte,
4824             gen_helper_sve_ldff1hh_le_r_mte,
4825             gen_helper_sve_ldff1hsu_le_r_mte,
4826             gen_helper_sve_ldff1hdu_le_r_mte,
4827
4828             gen_helper_sve_ldff1hds_le_r_mte,
4829             gen_helper_sve_ldff1hss_le_r_mte,
4830             gen_helper_sve_ldff1ss_le_r_mte,
4831             gen_helper_sve_ldff1sdu_le_r_mte,
4832
4833             gen_helper_sve_ldff1bds_r_mte,
4834             gen_helper_sve_ldff1bss_r_mte,
4835             gen_helper_sve_ldff1bhs_r_mte,
4836             gen_helper_sve_ldff1dd_le_r_mte },
4837
4838           /* mte active, big-endian */
4839           { gen_helper_sve_ldff1bb_r_mte,
4840             gen_helper_sve_ldff1bhu_r_mte,
4841             gen_helper_sve_ldff1bsu_r_mte,
4842             gen_helper_sve_ldff1bdu_r_mte,
4843
4844             gen_helper_sve_ldff1sds_be_r_mte,
4845             gen_helper_sve_ldff1hh_be_r_mte,
4846             gen_helper_sve_ldff1hsu_be_r_mte,
4847             gen_helper_sve_ldff1hdu_be_r_mte,
4848
4849             gen_helper_sve_ldff1hds_be_r_mte,
4850             gen_helper_sve_ldff1hss_be_r_mte,
4851             gen_helper_sve_ldff1ss_be_r_mte,
4852             gen_helper_sve_ldff1sdu_be_r_mte,
4853
4854             gen_helper_sve_ldff1bds_r_mte,
4855             gen_helper_sve_ldff1bss_r_mte,
4856             gen_helper_sve_ldff1bhs_r_mte,
4857             gen_helper_sve_ldff1dd_be_r_mte } },
4858     };
4859
4860     if (sve_access_check(s)) {
4861         TCGv_i64 addr = new_tmp_a64(s);
4862         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4863         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4864         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4865                    fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4866     }
4867     return true;
4868 }
4869
4870 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4871 {
4872     static gen_helper_gvec_mem * const fns[2][2][16] = {
4873         { /* mte inactive, little-endian */
4874           { gen_helper_sve_ldnf1bb_r,
4875             gen_helper_sve_ldnf1bhu_r,
4876             gen_helper_sve_ldnf1bsu_r,
4877             gen_helper_sve_ldnf1bdu_r,
4878
4879             gen_helper_sve_ldnf1sds_le_r,
4880             gen_helper_sve_ldnf1hh_le_r,
4881             gen_helper_sve_ldnf1hsu_le_r,
4882             gen_helper_sve_ldnf1hdu_le_r,
4883
4884             gen_helper_sve_ldnf1hds_le_r,
4885             gen_helper_sve_ldnf1hss_le_r,
4886             gen_helper_sve_ldnf1ss_le_r,
4887             gen_helper_sve_ldnf1sdu_le_r,
4888
4889             gen_helper_sve_ldnf1bds_r,
4890             gen_helper_sve_ldnf1bss_r,
4891             gen_helper_sve_ldnf1bhs_r,
4892             gen_helper_sve_ldnf1dd_le_r },
4893
4894           /* mte inactive, big-endian */
4895           { gen_helper_sve_ldnf1bb_r,
4896             gen_helper_sve_ldnf1bhu_r,
4897             gen_helper_sve_ldnf1bsu_r,
4898             gen_helper_sve_ldnf1bdu_r,
4899
4900             gen_helper_sve_ldnf1sds_be_r,
4901             gen_helper_sve_ldnf1hh_be_r,
4902             gen_helper_sve_ldnf1hsu_be_r,
4903             gen_helper_sve_ldnf1hdu_be_r,
4904
4905             gen_helper_sve_ldnf1hds_be_r,
4906             gen_helper_sve_ldnf1hss_be_r,
4907             gen_helper_sve_ldnf1ss_be_r,
4908             gen_helper_sve_ldnf1sdu_be_r,
4909
4910             gen_helper_sve_ldnf1bds_r,
4911             gen_helper_sve_ldnf1bss_r,
4912             gen_helper_sve_ldnf1bhs_r,
4913             gen_helper_sve_ldnf1dd_be_r } },
4914
4915         { /* mte inactive, little-endian */
4916           { gen_helper_sve_ldnf1bb_r_mte,
4917             gen_helper_sve_ldnf1bhu_r_mte,
4918             gen_helper_sve_ldnf1bsu_r_mte,
4919             gen_helper_sve_ldnf1bdu_r_mte,
4920
4921             gen_helper_sve_ldnf1sds_le_r_mte,
4922             gen_helper_sve_ldnf1hh_le_r_mte,
4923             gen_helper_sve_ldnf1hsu_le_r_mte,
4924             gen_helper_sve_ldnf1hdu_le_r_mte,
4925
4926             gen_helper_sve_ldnf1hds_le_r_mte,
4927             gen_helper_sve_ldnf1hss_le_r_mte,
4928             gen_helper_sve_ldnf1ss_le_r_mte,
4929             gen_helper_sve_ldnf1sdu_le_r_mte,
4930
4931             gen_helper_sve_ldnf1bds_r_mte,
4932             gen_helper_sve_ldnf1bss_r_mte,
4933             gen_helper_sve_ldnf1bhs_r_mte,
4934             gen_helper_sve_ldnf1dd_le_r_mte },
4935
4936           /* mte inactive, big-endian */
4937           { gen_helper_sve_ldnf1bb_r_mte,
4938             gen_helper_sve_ldnf1bhu_r_mte,
4939             gen_helper_sve_ldnf1bsu_r_mte,
4940             gen_helper_sve_ldnf1bdu_r_mte,
4941
4942             gen_helper_sve_ldnf1sds_be_r_mte,
4943             gen_helper_sve_ldnf1hh_be_r_mte,
4944             gen_helper_sve_ldnf1hsu_be_r_mte,
4945             gen_helper_sve_ldnf1hdu_be_r_mte,
4946
4947             gen_helper_sve_ldnf1hds_be_r_mte,
4948             gen_helper_sve_ldnf1hss_be_r_mte,
4949             gen_helper_sve_ldnf1ss_be_r_mte,
4950             gen_helper_sve_ldnf1sdu_be_r_mte,
4951
4952             gen_helper_sve_ldnf1bds_r_mte,
4953             gen_helper_sve_ldnf1bss_r_mte,
4954             gen_helper_sve_ldnf1bhs_r_mte,
4955             gen_helper_sve_ldnf1dd_be_r_mte } },
4956     };
4957
4958     if (sve_access_check(s)) {
4959         int vsz = vec_full_reg_size(s);
4960         int elements = vsz >> dtype_esz[a->dtype];
4961         int off = (a->imm * elements) << dtype_msz(a->dtype);
4962         TCGv_i64 addr = new_tmp_a64(s);
4963
4964         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4965         do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
4966                    fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
4967     }
4968     return true;
4969 }
4970
4971 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4972 {
4973     static gen_helper_gvec_mem * const fns[2][4] = {
4974         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4975           gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4976         { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4977           gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4978     };
4979     unsigned vsz = vec_full_reg_size(s);
4980     TCGv_ptr t_pg;
4981     TCGv_i32 t_desc;
4982     int desc, poff;
4983
4984     /* Load the first quadword using the normal predicated load helpers.  */
4985     desc = simd_desc(16, 16, zt);
4986     t_desc = tcg_const_i32(desc);
4987
4988     poff = pred_full_reg_offset(s, pg);
4989     if (vsz > 16) {
4990         /*
4991          * Zero-extend the first 16 bits of the predicate into a temporary.
4992          * This avoids triggering an assert making sure we don't have bits
4993          * set within a predicate beyond VQ, but we have lowered VQ to 1
4994          * for this load operation.
4995          */
4996         TCGv_i64 tmp = tcg_temp_new_i64();
4997 #ifdef HOST_WORDS_BIGENDIAN
4998         poff += 6;
4999 #endif
5000         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5001
5002         poff = offsetof(CPUARMState, vfp.preg_tmp);
5003         tcg_gen_st_i64(tmp, cpu_env, poff);
5004         tcg_temp_free_i64(tmp);
5005     }
5006
5007     t_pg = tcg_temp_new_ptr();
5008     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5009
5010     fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
5011
5012     tcg_temp_free_ptr(t_pg);
5013     tcg_temp_free_i32(t_desc);
5014
5015     /* Replicate that first quadword.  */
5016     if (vsz > 16) {
5017         unsigned dofs = vec_full_reg_offset(s, zt);
5018         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
5019     }
5020 }
5021
5022 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
5023 {
5024     if (a->rm == 31) {
5025         return false;
5026     }
5027     if (sve_access_check(s)) {
5028         int msz = dtype_msz(a->dtype);
5029         TCGv_i64 addr = new_tmp_a64(s);
5030         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5031         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5032         do_ldrq(s, a->rd, a->pg, addr, msz);
5033     }
5034     return true;
5035 }
5036
5037 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
5038 {
5039     if (sve_access_check(s)) {
5040         TCGv_i64 addr = new_tmp_a64(s);
5041         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5042         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
5043     }
5044     return true;
5045 }
5046
5047 /* Load and broadcast element.  */
5048 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
5049 {
5050     unsigned vsz = vec_full_reg_size(s);
5051     unsigned psz = pred_full_reg_size(s);
5052     unsigned esz = dtype_esz[a->dtype];
5053     unsigned msz = dtype_msz(a->dtype);
5054     TCGLabel *over;
5055     TCGv_i64 temp, clean_addr;
5056
5057     if (!sve_access_check(s)) {
5058         return true;
5059     }
5060
5061     over = gen_new_label();
5062
5063     /* If the guarding predicate has no bits set, no load occurs.  */
5064     if (psz <= 8) {
5065         /* Reduce the pred_esz_masks value simply to reduce the
5066          * size of the code generated here.
5067          */
5068         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5069         temp = tcg_temp_new_i64();
5070         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5071         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5072         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5073         tcg_temp_free_i64(temp);
5074     } else {
5075         TCGv_i32 t32 = tcg_temp_new_i32();
5076         find_last_active(s, t32, esz, a->pg);
5077         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5078         tcg_temp_free_i32(t32);
5079     }
5080
5081     /* Load the data.  */
5082     temp = tcg_temp_new_i64();
5083     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5084     clean_addr = gen_mte_check1(s, temp, false, true, msz);
5085
5086     tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5087                         s->be_data | dtype_mop[a->dtype]);
5088
5089     /* Broadcast to *all* elements.  */
5090     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5091                          vsz, vsz, temp);
5092     tcg_temp_free_i64(temp);
5093
5094     /* Zero the inactive elements.  */
5095     gen_set_label(over);
5096     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
5097     return true;
5098 }
5099
5100 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5101                       int msz, int esz, int nreg)
5102 {
5103     static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5104         { { { gen_helper_sve_st1bb_r,
5105               gen_helper_sve_st1bh_r,
5106               gen_helper_sve_st1bs_r,
5107               gen_helper_sve_st1bd_r },
5108             { NULL,
5109               gen_helper_sve_st1hh_le_r,
5110               gen_helper_sve_st1hs_le_r,
5111               gen_helper_sve_st1hd_le_r },
5112             { NULL, NULL,
5113               gen_helper_sve_st1ss_le_r,
5114               gen_helper_sve_st1sd_le_r },
5115             { NULL, NULL, NULL,
5116               gen_helper_sve_st1dd_le_r } },
5117           { { gen_helper_sve_st1bb_r,
5118               gen_helper_sve_st1bh_r,
5119               gen_helper_sve_st1bs_r,
5120               gen_helper_sve_st1bd_r },
5121             { NULL,
5122               gen_helper_sve_st1hh_be_r,
5123               gen_helper_sve_st1hs_be_r,
5124               gen_helper_sve_st1hd_be_r },
5125             { NULL, NULL,
5126               gen_helper_sve_st1ss_be_r,
5127               gen_helper_sve_st1sd_be_r },
5128             { NULL, NULL, NULL,
5129               gen_helper_sve_st1dd_be_r } } },
5130
5131         { { { gen_helper_sve_st1bb_r_mte,
5132               gen_helper_sve_st1bh_r_mte,
5133               gen_helper_sve_st1bs_r_mte,
5134               gen_helper_sve_st1bd_r_mte },
5135             { NULL,
5136               gen_helper_sve_st1hh_le_r_mte,
5137               gen_helper_sve_st1hs_le_r_mte,
5138               gen_helper_sve_st1hd_le_r_mte },
5139             { NULL, NULL,
5140               gen_helper_sve_st1ss_le_r_mte,
5141               gen_helper_sve_st1sd_le_r_mte },
5142             { NULL, NULL, NULL,
5143               gen_helper_sve_st1dd_le_r_mte } },
5144           { { gen_helper_sve_st1bb_r_mte,
5145               gen_helper_sve_st1bh_r_mte,
5146               gen_helper_sve_st1bs_r_mte,
5147               gen_helper_sve_st1bd_r_mte },
5148             { NULL,
5149               gen_helper_sve_st1hh_be_r_mte,
5150               gen_helper_sve_st1hs_be_r_mte,
5151               gen_helper_sve_st1hd_be_r_mte },
5152             { NULL, NULL,
5153               gen_helper_sve_st1ss_be_r_mte,
5154               gen_helper_sve_st1sd_be_r_mte },
5155             { NULL, NULL, NULL,
5156               gen_helper_sve_st1dd_be_r_mte } } },
5157     };
5158     static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5159         { { { gen_helper_sve_st2bb_r,
5160               gen_helper_sve_st2hh_le_r,
5161               gen_helper_sve_st2ss_le_r,
5162               gen_helper_sve_st2dd_le_r },
5163             { gen_helper_sve_st3bb_r,
5164               gen_helper_sve_st3hh_le_r,
5165               gen_helper_sve_st3ss_le_r,
5166               gen_helper_sve_st3dd_le_r },
5167             { gen_helper_sve_st4bb_r,
5168               gen_helper_sve_st4hh_le_r,
5169               gen_helper_sve_st4ss_le_r,
5170               gen_helper_sve_st4dd_le_r } },
5171           { { gen_helper_sve_st2bb_r,
5172               gen_helper_sve_st2hh_be_r,
5173               gen_helper_sve_st2ss_be_r,
5174               gen_helper_sve_st2dd_be_r },
5175             { gen_helper_sve_st3bb_r,
5176               gen_helper_sve_st3hh_be_r,
5177               gen_helper_sve_st3ss_be_r,
5178               gen_helper_sve_st3dd_be_r },
5179             { gen_helper_sve_st4bb_r,
5180               gen_helper_sve_st4hh_be_r,
5181               gen_helper_sve_st4ss_be_r,
5182               gen_helper_sve_st4dd_be_r } } },
5183         { { { gen_helper_sve_st2bb_r_mte,
5184               gen_helper_sve_st2hh_le_r_mte,
5185               gen_helper_sve_st2ss_le_r_mte,
5186               gen_helper_sve_st2dd_le_r_mte },
5187             { gen_helper_sve_st3bb_r_mte,
5188               gen_helper_sve_st3hh_le_r_mte,
5189               gen_helper_sve_st3ss_le_r_mte,
5190               gen_helper_sve_st3dd_le_r_mte },
5191             { gen_helper_sve_st4bb_r_mte,
5192               gen_helper_sve_st4hh_le_r_mte,
5193               gen_helper_sve_st4ss_le_r_mte,
5194               gen_helper_sve_st4dd_le_r_mte } },
5195           { { gen_helper_sve_st2bb_r_mte,
5196               gen_helper_sve_st2hh_be_r_mte,
5197               gen_helper_sve_st2ss_be_r_mte,
5198               gen_helper_sve_st2dd_be_r_mte },
5199             { gen_helper_sve_st3bb_r_mte,
5200               gen_helper_sve_st3hh_be_r_mte,
5201               gen_helper_sve_st3ss_be_r_mte,
5202               gen_helper_sve_st3dd_be_r_mte },
5203             { gen_helper_sve_st4bb_r_mte,
5204               gen_helper_sve_st4hh_be_r_mte,
5205               gen_helper_sve_st4ss_be_r_mte,
5206               gen_helper_sve_st4dd_be_r_mte } } },
5207     };
5208     gen_helper_gvec_mem *fn;
5209     int be = s->be_data == MO_BE;
5210
5211     if (nreg == 0) {
5212         /* ST1 */
5213         fn = fn_single[s->mte_active[0]][be][msz][esz];
5214         nreg = 1;
5215     } else {
5216         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5217         assert(msz == esz);
5218         fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
5219     }
5220     assert(fn != NULL);
5221     do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
5222 }
5223
5224 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5225 {
5226     if (a->rm == 31 || a->msz > a->esz) {
5227         return false;
5228     }
5229     if (sve_access_check(s)) {
5230         TCGv_i64 addr = new_tmp_a64(s);
5231         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5232         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5233         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5234     }
5235     return true;
5236 }
5237
5238 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5239 {
5240     if (a->msz > a->esz) {
5241         return false;
5242     }
5243     if (sve_access_check(s)) {
5244         int vsz = vec_full_reg_size(s);
5245         int elements = vsz >> a->esz;
5246         TCGv_i64 addr = new_tmp_a64(s);
5247
5248         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5249                          (a->imm * elements * (a->nreg + 1)) << a->msz);
5250         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5251     }
5252     return true;
5253 }
5254
5255 /*
5256  *** SVE gather loads / scatter stores
5257  */
5258
5259 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5260                        int scale, TCGv_i64 scalar, int msz, bool is_write,
5261                        gen_helper_gvec_mem_scatter *fn)
5262 {
5263     unsigned vsz = vec_full_reg_size(s);
5264     TCGv_ptr t_zm = tcg_temp_new_ptr();
5265     TCGv_ptr t_pg = tcg_temp_new_ptr();
5266     TCGv_ptr t_zt = tcg_temp_new_ptr();
5267     TCGv_i32 t_desc;
5268     int desc = 0;
5269
5270     if (s->mte_active[0]) {
5271         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5272         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5273         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5274         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5275         desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
5276         desc <<= SVE_MTEDESC_SHIFT;
5277     }
5278     desc = simd_desc(vsz, vsz, scale);
5279     t_desc = tcg_const_i32(desc);
5280
5281     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5282     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5283     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5284     fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5285
5286     tcg_temp_free_ptr(t_zt);
5287     tcg_temp_free_ptr(t_zm);
5288     tcg_temp_free_ptr(t_pg);
5289     tcg_temp_free_i32(t_desc);
5290 }
5291
5292 /* Indexed by [mte][be][ff][xs][u][msz].  */
5293 static gen_helper_gvec_mem_scatter * const
5294 gather_load_fn32[2][2][2][2][2][3] = {
5295     { /* MTE Inactive */
5296         { /* Little-endian */
5297             { { { gen_helper_sve_ldbss_zsu,
5298                   gen_helper_sve_ldhss_le_zsu,
5299                   NULL, },
5300                 { gen_helper_sve_ldbsu_zsu,
5301                   gen_helper_sve_ldhsu_le_zsu,
5302                   gen_helper_sve_ldss_le_zsu, } },
5303               { { gen_helper_sve_ldbss_zss,
5304                   gen_helper_sve_ldhss_le_zss,
5305                   NULL, },
5306                 { gen_helper_sve_ldbsu_zss,
5307                   gen_helper_sve_ldhsu_le_zss,
5308                   gen_helper_sve_ldss_le_zss, } } },
5309
5310             /* First-fault */
5311             { { { gen_helper_sve_ldffbss_zsu,
5312                   gen_helper_sve_ldffhss_le_zsu,
5313                   NULL, },
5314                 { gen_helper_sve_ldffbsu_zsu,
5315                   gen_helper_sve_ldffhsu_le_zsu,
5316                   gen_helper_sve_ldffss_le_zsu, } },
5317               { { gen_helper_sve_ldffbss_zss,
5318                   gen_helper_sve_ldffhss_le_zss,
5319                   NULL, },
5320                 { gen_helper_sve_ldffbsu_zss,
5321                   gen_helper_sve_ldffhsu_le_zss,
5322                   gen_helper_sve_ldffss_le_zss, } } } },
5323
5324         { /* Big-endian */
5325             { { { gen_helper_sve_ldbss_zsu,
5326                   gen_helper_sve_ldhss_be_zsu,
5327                   NULL, },
5328                 { gen_helper_sve_ldbsu_zsu,
5329                   gen_helper_sve_ldhsu_be_zsu,
5330                   gen_helper_sve_ldss_be_zsu, } },
5331               { { gen_helper_sve_ldbss_zss,
5332                   gen_helper_sve_ldhss_be_zss,
5333                   NULL, },
5334                 { gen_helper_sve_ldbsu_zss,
5335                   gen_helper_sve_ldhsu_be_zss,
5336                   gen_helper_sve_ldss_be_zss, } } },
5337
5338             /* First-fault */
5339             { { { gen_helper_sve_ldffbss_zsu,
5340                   gen_helper_sve_ldffhss_be_zsu,
5341                   NULL, },
5342                 { gen_helper_sve_ldffbsu_zsu,
5343                   gen_helper_sve_ldffhsu_be_zsu,
5344                   gen_helper_sve_ldffss_be_zsu, } },
5345               { { gen_helper_sve_ldffbss_zss,
5346                   gen_helper_sve_ldffhss_be_zss,
5347                   NULL, },
5348                 { gen_helper_sve_ldffbsu_zss,
5349                   gen_helper_sve_ldffhsu_be_zss,
5350                   gen_helper_sve_ldffss_be_zss, } } } } },
5351     { /* MTE Active */
5352         { /* Little-endian */
5353             { { { gen_helper_sve_ldbss_zsu_mte,
5354                   gen_helper_sve_ldhss_le_zsu_mte,
5355                   NULL, },
5356                 { gen_helper_sve_ldbsu_zsu_mte,
5357                   gen_helper_sve_ldhsu_le_zsu_mte,
5358                   gen_helper_sve_ldss_le_zsu_mte, } },
5359               { { gen_helper_sve_ldbss_zss_mte,
5360                   gen_helper_sve_ldhss_le_zss_mte,
5361                   NULL, },
5362                 { gen_helper_sve_ldbsu_zss_mte,
5363                   gen_helper_sve_ldhsu_le_zss_mte,
5364                   gen_helper_sve_ldss_le_zss_mte, } } },
5365
5366             /* First-fault */
5367             { { { gen_helper_sve_ldffbss_zsu_mte,
5368                   gen_helper_sve_ldffhss_le_zsu_mte,
5369                   NULL, },
5370                 { gen_helper_sve_ldffbsu_zsu_mte,
5371                   gen_helper_sve_ldffhsu_le_zsu_mte,
5372                   gen_helper_sve_ldffss_le_zsu_mte, } },
5373               { { gen_helper_sve_ldffbss_zss_mte,
5374                   gen_helper_sve_ldffhss_le_zss_mte,
5375                   NULL, },
5376                 { gen_helper_sve_ldffbsu_zss_mte,
5377                   gen_helper_sve_ldffhsu_le_zss_mte,
5378                   gen_helper_sve_ldffss_le_zss_mte, } } } },
5379
5380         { /* Big-endian */
5381             { { { gen_helper_sve_ldbss_zsu_mte,
5382                   gen_helper_sve_ldhss_be_zsu_mte,
5383                   NULL, },
5384                 { gen_helper_sve_ldbsu_zsu_mte,
5385                   gen_helper_sve_ldhsu_be_zsu_mte,
5386                   gen_helper_sve_ldss_be_zsu_mte, } },
5387               { { gen_helper_sve_ldbss_zss_mte,
5388                   gen_helper_sve_ldhss_be_zss_mte,
5389                   NULL, },
5390                 { gen_helper_sve_ldbsu_zss_mte,
5391                   gen_helper_sve_ldhsu_be_zss_mte,
5392                   gen_helper_sve_ldss_be_zss_mte, } } },
5393
5394             /* First-fault */
5395             { { { gen_helper_sve_ldffbss_zsu_mte,
5396                   gen_helper_sve_ldffhss_be_zsu_mte,
5397                   NULL, },
5398                 { gen_helper_sve_ldffbsu_zsu_mte,
5399                   gen_helper_sve_ldffhsu_be_zsu_mte,
5400                   gen_helper_sve_ldffss_be_zsu_mte, } },
5401               { { gen_helper_sve_ldffbss_zss_mte,
5402                   gen_helper_sve_ldffhss_be_zss_mte,
5403                   NULL, },
5404                 { gen_helper_sve_ldffbsu_zss_mte,
5405                   gen_helper_sve_ldffhsu_be_zss_mte,
5406                   gen_helper_sve_ldffss_be_zss_mte, } } } } },
5407 };
5408
5409 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5410 static gen_helper_gvec_mem_scatter * const
5411 gather_load_fn64[2][2][2][3][2][4] = {
5412     { /* MTE Inactive */
5413         { /* Little-endian */
5414             { { { gen_helper_sve_ldbds_zsu,
5415                   gen_helper_sve_ldhds_le_zsu,
5416                   gen_helper_sve_ldsds_le_zsu,
5417                   NULL, },
5418                 { gen_helper_sve_ldbdu_zsu,
5419                   gen_helper_sve_ldhdu_le_zsu,
5420                   gen_helper_sve_ldsdu_le_zsu,
5421                   gen_helper_sve_lddd_le_zsu, } },
5422               { { gen_helper_sve_ldbds_zss,
5423                   gen_helper_sve_ldhds_le_zss,
5424                   gen_helper_sve_ldsds_le_zss,
5425                   NULL, },
5426                 { gen_helper_sve_ldbdu_zss,
5427                   gen_helper_sve_ldhdu_le_zss,
5428                   gen_helper_sve_ldsdu_le_zss,
5429                   gen_helper_sve_lddd_le_zss, } },
5430               { { gen_helper_sve_ldbds_zd,
5431                   gen_helper_sve_ldhds_le_zd,
5432                   gen_helper_sve_ldsds_le_zd,
5433                   NULL, },
5434                 { gen_helper_sve_ldbdu_zd,
5435                   gen_helper_sve_ldhdu_le_zd,
5436                   gen_helper_sve_ldsdu_le_zd,
5437                   gen_helper_sve_lddd_le_zd, } } },
5438
5439             /* First-fault */
5440             { { { gen_helper_sve_ldffbds_zsu,
5441                   gen_helper_sve_ldffhds_le_zsu,
5442                   gen_helper_sve_ldffsds_le_zsu,
5443                   NULL, },
5444                 { gen_helper_sve_ldffbdu_zsu,
5445                   gen_helper_sve_ldffhdu_le_zsu,
5446                   gen_helper_sve_ldffsdu_le_zsu,
5447                   gen_helper_sve_ldffdd_le_zsu, } },
5448               { { gen_helper_sve_ldffbds_zss,
5449                   gen_helper_sve_ldffhds_le_zss,
5450                   gen_helper_sve_ldffsds_le_zss,
5451                   NULL, },
5452                 { gen_helper_sve_ldffbdu_zss,
5453                   gen_helper_sve_ldffhdu_le_zss,
5454                   gen_helper_sve_ldffsdu_le_zss,
5455                   gen_helper_sve_ldffdd_le_zss, } },
5456               { { gen_helper_sve_ldffbds_zd,
5457                   gen_helper_sve_ldffhds_le_zd,
5458                   gen_helper_sve_ldffsds_le_zd,
5459                   NULL, },
5460                 { gen_helper_sve_ldffbdu_zd,
5461                   gen_helper_sve_ldffhdu_le_zd,
5462                   gen_helper_sve_ldffsdu_le_zd,
5463                   gen_helper_sve_ldffdd_le_zd, } } } },
5464         { /* Big-endian */
5465             { { { gen_helper_sve_ldbds_zsu,
5466                   gen_helper_sve_ldhds_be_zsu,
5467                   gen_helper_sve_ldsds_be_zsu,
5468                   NULL, },
5469                 { gen_helper_sve_ldbdu_zsu,
5470                   gen_helper_sve_ldhdu_be_zsu,
5471                   gen_helper_sve_ldsdu_be_zsu,
5472                   gen_helper_sve_lddd_be_zsu, } },
5473               { { gen_helper_sve_ldbds_zss,
5474                   gen_helper_sve_ldhds_be_zss,
5475                   gen_helper_sve_ldsds_be_zss,
5476                   NULL, },
5477                 { gen_helper_sve_ldbdu_zss,
5478                   gen_helper_sve_ldhdu_be_zss,
5479                   gen_helper_sve_ldsdu_be_zss,
5480                   gen_helper_sve_lddd_be_zss, } },
5481               { { gen_helper_sve_ldbds_zd,
5482                   gen_helper_sve_ldhds_be_zd,
5483                   gen_helper_sve_ldsds_be_zd,
5484                   NULL, },
5485                 { gen_helper_sve_ldbdu_zd,
5486                   gen_helper_sve_ldhdu_be_zd,
5487                   gen_helper_sve_ldsdu_be_zd,
5488                   gen_helper_sve_lddd_be_zd, } } },
5489
5490             /* First-fault */
5491             { { { gen_helper_sve_ldffbds_zsu,
5492                   gen_helper_sve_ldffhds_be_zsu,
5493                   gen_helper_sve_ldffsds_be_zsu,
5494                   NULL, },
5495                 { gen_helper_sve_ldffbdu_zsu,
5496                   gen_helper_sve_ldffhdu_be_zsu,
5497                   gen_helper_sve_ldffsdu_be_zsu,
5498                   gen_helper_sve_ldffdd_be_zsu, } },
5499               { { gen_helper_sve_ldffbds_zss,
5500                   gen_helper_sve_ldffhds_be_zss,
5501                   gen_helper_sve_ldffsds_be_zss,
5502                   NULL, },
5503                 { gen_helper_sve_ldffbdu_zss,
5504                   gen_helper_sve_ldffhdu_be_zss,
5505                   gen_helper_sve_ldffsdu_be_zss,
5506                   gen_helper_sve_ldffdd_be_zss, } },
5507               { { gen_helper_sve_ldffbds_zd,
5508                   gen_helper_sve_ldffhds_be_zd,
5509                   gen_helper_sve_ldffsds_be_zd,
5510                   NULL, },
5511                 { gen_helper_sve_ldffbdu_zd,
5512                   gen_helper_sve_ldffhdu_be_zd,
5513                   gen_helper_sve_ldffsdu_be_zd,
5514                   gen_helper_sve_ldffdd_be_zd, } } } } },
5515     { /* MTE Active */
5516         { /* Little-endian */
5517             { { { gen_helper_sve_ldbds_zsu_mte,
5518                   gen_helper_sve_ldhds_le_zsu_mte,
5519                   gen_helper_sve_ldsds_le_zsu_mte,
5520                   NULL, },
5521                 { gen_helper_sve_ldbdu_zsu_mte,
5522                   gen_helper_sve_ldhdu_le_zsu_mte,
5523                   gen_helper_sve_ldsdu_le_zsu_mte,
5524                   gen_helper_sve_lddd_le_zsu_mte, } },
5525               { { gen_helper_sve_ldbds_zss_mte,
5526                   gen_helper_sve_ldhds_le_zss_mte,
5527                   gen_helper_sve_ldsds_le_zss_mte,
5528                   NULL, },
5529                 { gen_helper_sve_ldbdu_zss_mte,
5530                   gen_helper_sve_ldhdu_le_zss_mte,
5531                   gen_helper_sve_ldsdu_le_zss_mte,
5532                   gen_helper_sve_lddd_le_zss_mte, } },
5533               { { gen_helper_sve_ldbds_zd_mte,
5534                   gen_helper_sve_ldhds_le_zd_mte,
5535                   gen_helper_sve_ldsds_le_zd_mte,
5536                   NULL, },
5537                 { gen_helper_sve_ldbdu_zd_mte,
5538                   gen_helper_sve_ldhdu_le_zd_mte,
5539                   gen_helper_sve_ldsdu_le_zd_mte,
5540                   gen_helper_sve_lddd_le_zd_mte, } } },
5541
5542             /* First-fault */
5543             { { { gen_helper_sve_ldffbds_zsu_mte,
5544                   gen_helper_sve_ldffhds_le_zsu_mte,
5545                   gen_helper_sve_ldffsds_le_zsu_mte,
5546                   NULL, },
5547                 { gen_helper_sve_ldffbdu_zsu_mte,
5548                   gen_helper_sve_ldffhdu_le_zsu_mte,
5549                   gen_helper_sve_ldffsdu_le_zsu_mte,
5550                   gen_helper_sve_ldffdd_le_zsu_mte, } },
5551               { { gen_helper_sve_ldffbds_zss_mte,
5552                   gen_helper_sve_ldffhds_le_zss_mte,
5553                   gen_helper_sve_ldffsds_le_zss_mte,
5554                   NULL, },
5555                 { gen_helper_sve_ldffbdu_zss_mte,
5556                   gen_helper_sve_ldffhdu_le_zss_mte,
5557                   gen_helper_sve_ldffsdu_le_zss_mte,
5558                   gen_helper_sve_ldffdd_le_zss_mte, } },
5559               { { gen_helper_sve_ldffbds_zd_mte,
5560                   gen_helper_sve_ldffhds_le_zd_mte,
5561                   gen_helper_sve_ldffsds_le_zd_mte,
5562                   NULL, },
5563                 { gen_helper_sve_ldffbdu_zd_mte,
5564                   gen_helper_sve_ldffhdu_le_zd_mte,
5565                   gen_helper_sve_ldffsdu_le_zd_mte,
5566                   gen_helper_sve_ldffdd_le_zd_mte, } } } },
5567         { /* Big-endian */
5568             { { { gen_helper_sve_ldbds_zsu_mte,
5569                   gen_helper_sve_ldhds_be_zsu_mte,
5570                   gen_helper_sve_ldsds_be_zsu_mte,
5571                   NULL, },
5572                 { gen_helper_sve_ldbdu_zsu_mte,
5573                   gen_helper_sve_ldhdu_be_zsu_mte,
5574                   gen_helper_sve_ldsdu_be_zsu_mte,
5575                   gen_helper_sve_lddd_be_zsu_mte, } },
5576               { { gen_helper_sve_ldbds_zss_mte,
5577                   gen_helper_sve_ldhds_be_zss_mte,
5578                   gen_helper_sve_ldsds_be_zss_mte,
5579                   NULL, },
5580                 { gen_helper_sve_ldbdu_zss_mte,
5581                   gen_helper_sve_ldhdu_be_zss_mte,
5582                   gen_helper_sve_ldsdu_be_zss_mte,
5583                   gen_helper_sve_lddd_be_zss_mte, } },
5584               { { gen_helper_sve_ldbds_zd_mte,
5585                   gen_helper_sve_ldhds_be_zd_mte,
5586                   gen_helper_sve_ldsds_be_zd_mte,
5587                   NULL, },
5588                 { gen_helper_sve_ldbdu_zd_mte,
5589                   gen_helper_sve_ldhdu_be_zd_mte,
5590                   gen_helper_sve_ldsdu_be_zd_mte,
5591                   gen_helper_sve_lddd_be_zd_mte, } } },
5592
5593             /* First-fault */
5594             { { { gen_helper_sve_ldffbds_zsu_mte,
5595                   gen_helper_sve_ldffhds_be_zsu_mte,
5596                   gen_helper_sve_ldffsds_be_zsu_mte,
5597                   NULL, },
5598                 { gen_helper_sve_ldffbdu_zsu_mte,
5599                   gen_helper_sve_ldffhdu_be_zsu_mte,
5600                   gen_helper_sve_ldffsdu_be_zsu_mte,
5601                   gen_helper_sve_ldffdd_be_zsu_mte, } },
5602               { { gen_helper_sve_ldffbds_zss_mte,
5603                   gen_helper_sve_ldffhds_be_zss_mte,
5604                   gen_helper_sve_ldffsds_be_zss_mte,
5605                   NULL, },
5606                 { gen_helper_sve_ldffbdu_zss_mte,
5607                   gen_helper_sve_ldffhdu_be_zss_mte,
5608                   gen_helper_sve_ldffsdu_be_zss_mte,
5609                   gen_helper_sve_ldffdd_be_zss_mte, } },
5610               { { gen_helper_sve_ldffbds_zd_mte,
5611                   gen_helper_sve_ldffhds_be_zd_mte,
5612                   gen_helper_sve_ldffsds_be_zd_mte,
5613                   NULL, },
5614                 { gen_helper_sve_ldffbdu_zd_mte,
5615                   gen_helper_sve_ldffhdu_be_zd_mte,
5616                   gen_helper_sve_ldffsdu_be_zd_mte,
5617                   gen_helper_sve_ldffdd_be_zd_mte, } } } } },
5618 };
5619
5620 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5621 {
5622     gen_helper_gvec_mem_scatter *fn = NULL;
5623     bool be = s->be_data == MO_BE;
5624     bool mte = s->mte_active[0];
5625
5626     if (!sve_access_check(s)) {
5627         return true;
5628     }
5629
5630     switch (a->esz) {
5631     case MO_32:
5632         fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
5633         break;
5634     case MO_64:
5635         fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
5636         break;
5637     }
5638     assert(fn != NULL);
5639
5640     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5641                cpu_reg_sp(s, a->rn), a->msz, false, fn);
5642     return true;
5643 }
5644
5645 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5646 {
5647     gen_helper_gvec_mem_scatter *fn = NULL;
5648     bool be = s->be_data == MO_BE;
5649     bool mte = s->mte_active[0];
5650     TCGv_i64 imm;
5651
5652     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5653         return false;
5654     }
5655     if (!sve_access_check(s)) {
5656         return true;
5657     }
5658
5659     switch (a->esz) {
5660     case MO_32:
5661         fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
5662         break;
5663     case MO_64:
5664         fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
5665         break;
5666     }
5667     assert(fn != NULL);
5668
5669     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5670      * by loading the immediate into the scalar parameter.
5671      */
5672     imm = tcg_const_i64(a->imm << a->msz);
5673     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
5674     tcg_temp_free_i64(imm);
5675     return true;
5676 }
5677
5678 /* Indexed by [mte][be][xs][msz].  */
5679 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
5680     { /* MTE Inactive */
5681         { /* Little-endian */
5682             { gen_helper_sve_stbs_zsu,
5683               gen_helper_sve_sths_le_zsu,
5684               gen_helper_sve_stss_le_zsu, },
5685             { gen_helper_sve_stbs_zss,
5686               gen_helper_sve_sths_le_zss,
5687               gen_helper_sve_stss_le_zss, } },
5688         { /* Big-endian */
5689             { gen_helper_sve_stbs_zsu,
5690               gen_helper_sve_sths_be_zsu,
5691               gen_helper_sve_stss_be_zsu, },
5692             { gen_helper_sve_stbs_zss,
5693               gen_helper_sve_sths_be_zss,
5694               gen_helper_sve_stss_be_zss, } } },
5695     { /* MTE Active */
5696         { /* Little-endian */
5697             { gen_helper_sve_stbs_zsu_mte,
5698               gen_helper_sve_sths_le_zsu_mte,
5699               gen_helper_sve_stss_le_zsu_mte, },
5700             { gen_helper_sve_stbs_zss_mte,
5701               gen_helper_sve_sths_le_zss_mte,
5702               gen_helper_sve_stss_le_zss_mte, } },
5703         { /* Big-endian */
5704             { gen_helper_sve_stbs_zsu_mte,
5705               gen_helper_sve_sths_be_zsu_mte,
5706               gen_helper_sve_stss_be_zsu_mte, },
5707             { gen_helper_sve_stbs_zss_mte,
5708               gen_helper_sve_sths_be_zss_mte,
5709               gen_helper_sve_stss_be_zss_mte, } } },
5710 };
5711
5712 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5713 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
5714     { /* MTE Inactive */
5715          { /* Little-endian */
5716              { gen_helper_sve_stbd_zsu,
5717                gen_helper_sve_sthd_le_zsu,
5718                gen_helper_sve_stsd_le_zsu,
5719                gen_helper_sve_stdd_le_zsu, },
5720              { gen_helper_sve_stbd_zss,
5721                gen_helper_sve_sthd_le_zss,
5722                gen_helper_sve_stsd_le_zss,
5723                gen_helper_sve_stdd_le_zss, },
5724              { gen_helper_sve_stbd_zd,
5725                gen_helper_sve_sthd_le_zd,
5726                gen_helper_sve_stsd_le_zd,
5727                gen_helper_sve_stdd_le_zd, } },
5728          { /* Big-endian */
5729              { gen_helper_sve_stbd_zsu,
5730                gen_helper_sve_sthd_be_zsu,
5731                gen_helper_sve_stsd_be_zsu,
5732                gen_helper_sve_stdd_be_zsu, },
5733              { gen_helper_sve_stbd_zss,
5734                gen_helper_sve_sthd_be_zss,
5735                gen_helper_sve_stsd_be_zss,
5736                gen_helper_sve_stdd_be_zss, },
5737              { gen_helper_sve_stbd_zd,
5738                gen_helper_sve_sthd_be_zd,
5739                gen_helper_sve_stsd_be_zd,
5740                gen_helper_sve_stdd_be_zd, } } },
5741     { /* MTE Inactive */
5742          { /* Little-endian */
5743              { gen_helper_sve_stbd_zsu_mte,
5744                gen_helper_sve_sthd_le_zsu_mte,
5745                gen_helper_sve_stsd_le_zsu_mte,
5746                gen_helper_sve_stdd_le_zsu_mte, },
5747              { gen_helper_sve_stbd_zss_mte,
5748                gen_helper_sve_sthd_le_zss_mte,
5749                gen_helper_sve_stsd_le_zss_mte,
5750                gen_helper_sve_stdd_le_zss_mte, },
5751              { gen_helper_sve_stbd_zd_mte,
5752                gen_helper_sve_sthd_le_zd_mte,
5753                gen_helper_sve_stsd_le_zd_mte,
5754                gen_helper_sve_stdd_le_zd_mte, } },
5755          { /* Big-endian */
5756              { gen_helper_sve_stbd_zsu_mte,
5757                gen_helper_sve_sthd_be_zsu_mte,
5758                gen_helper_sve_stsd_be_zsu_mte,
5759                gen_helper_sve_stdd_be_zsu_mte, },
5760              { gen_helper_sve_stbd_zss_mte,
5761                gen_helper_sve_sthd_be_zss_mte,
5762                gen_helper_sve_stsd_be_zss_mte,
5763                gen_helper_sve_stdd_be_zss_mte, },
5764              { gen_helper_sve_stbd_zd_mte,
5765                gen_helper_sve_sthd_be_zd_mte,
5766                gen_helper_sve_stsd_be_zd_mte,
5767                gen_helper_sve_stdd_be_zd_mte, } } },
5768 };
5769
5770 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5771 {
5772     gen_helper_gvec_mem_scatter *fn;
5773     bool be = s->be_data == MO_BE;
5774     bool mte = s->mte_active[0];
5775
5776     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5777         return false;
5778     }
5779     if (!sve_access_check(s)) {
5780         return true;
5781     }
5782     switch (a->esz) {
5783     case MO_32:
5784         fn = scatter_store_fn32[mte][be][a->xs][a->msz];
5785         break;
5786     case MO_64:
5787         fn = scatter_store_fn64[mte][be][a->xs][a->msz];
5788         break;
5789     default:
5790         g_assert_not_reached();
5791     }
5792     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5793                cpu_reg_sp(s, a->rn), a->msz, true, fn);
5794     return true;
5795 }
5796
5797 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5798 {
5799     gen_helper_gvec_mem_scatter *fn = NULL;
5800     bool be = s->be_data == MO_BE;
5801     bool mte = s->mte_active[0];
5802     TCGv_i64 imm;
5803
5804     if (a->esz < a->msz) {
5805         return false;
5806     }
5807     if (!sve_access_check(s)) {
5808         return true;
5809     }
5810
5811     switch (a->esz) {
5812     case MO_32:
5813         fn = scatter_store_fn32[mte][be][0][a->msz];
5814         break;
5815     case MO_64:
5816         fn = scatter_store_fn64[mte][be][2][a->msz];
5817         break;
5818     }
5819     assert(fn != NULL);
5820
5821     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5822      * by loading the immediate into the scalar parameter.
5823      */
5824     imm = tcg_const_i64(a->imm << a->msz);
5825     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
5826     tcg_temp_free_i64(imm);
5827     return true;
5828 }
5829
5830 /*
5831  * Prefetches
5832  */
5833
5834 static bool trans_PRF(DisasContext *s, arg_PRF *a)
5835 {
5836     /* Prefetch is a nop within QEMU.  */
5837     (void)sve_access_check(s);
5838     return true;
5839 }
5840
5841 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5842 {
5843     if (a->rm == 31) {
5844         return false;
5845     }
5846     /* Prefetch is a nop within QEMU.  */
5847     (void)sve_access_check(s);
5848     return true;
5849 }
5850
5851 /*
5852  * Move Prefix
5853  *
5854  * TODO: The implementation so far could handle predicated merging movprfx.
5855  * The helper functions as written take an extra source register to
5856  * use in the operation, but the result is only written when predication
5857  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5858  * to allow the final write back to the destination to be unconditional.
5859  * For predicated zeroing movprfx, we need to rearrange the helpers to
5860  * allow the final write back to zero inactives.
5861  *
5862  * In the meantime, just emit the moves.
5863  */
5864
5865 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5866 {
5867     return do_mov_z(s, a->rd, a->rn);
5868 }
5869
5870 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5871 {
5872     if (sve_access_check(s)) {
5873         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5874     }
5875     return true;
5876 }
5877
5878 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5879 {
5880     if (sve_access_check(s)) {
5881         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5882     }
5883     return true;
5884 }